add direct
This commit is contained in:
parent
5b75e03b35
commit
5dc9939cf0
2 changed files with 325 additions and 0 deletions
0
direct/README.md
Normal file
0
direct/README.md
Normal file
325
direct/birect.py
Normal file
325
direct/birect.py
Normal file
|
@ -0,0 +1,325 @@
|
|||
#!/usr/bin/env python3
|
||||
# based on hamming_exact3.py, but with all debug stuff removed
|
||||
|
||||
def birect(
|
||||
obj,
|
||||
lo,
|
||||
hi,
|
||||
*,
|
||||
min_diag,
|
||||
min_error=None,
|
||||
max_evals=None,
|
||||
max_iters=None,
|
||||
by_longest=False,
|
||||
pruning=0,
|
||||
F=float,
|
||||
):
|
||||
assert len(lo) == len(hi), "dimensional mismatch"
|
||||
|
||||
assert not (
|
||||
min_error is None and max_evals is None and max_iters is None
|
||||
), "at least one stopping condition must be specified"
|
||||
|
||||
# variables prefixed with v_ are to be one-dimensional vectors. [a, b, c]
|
||||
# variables prefixed with vw_ are to be two-dimensional vectors. [[a, b], [c, d]]
|
||||
# variables prefixed with w_ are to be one-dimensional vectors of pairs.
|
||||
# aside: xmin should actually be called v_xmin, but it's not!
|
||||
|
||||
def fun(w_t):
|
||||
# xs = [l + (h - l) * t for t in w_t]
|
||||
v_x = [F(num) / F(den) for num, den in w_t]
|
||||
v_x = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, v_x)]
|
||||
res = obj(v_x)
|
||||
return res
|
||||
|
||||
def ab_to_lu(w_a, w_b):
|
||||
w_l = [(a[0] + a[0] + b[0], (1 << a[1]) * 3) for a, b in zip(w_a, w_b)]
|
||||
w_u = [(a[0] + b[0] + b[0], (1 << b[1]) * 3) for a, b in zip(w_a, w_b)]
|
||||
return w_l, w_u
|
||||
|
||||
dims = len(lo)
|
||||
|
||||
w_a, w_b = [(0, 0)] * dims, [(1, 0)] * dims
|
||||
|
||||
w_l, w_u = ab_to_lu(w_a, w_b)
|
||||
fl = fun(w_l)
|
||||
fu = fun(w_u)
|
||||
if fl <= fu:
|
||||
xmin, fmin = w_l, fl
|
||||
else:
|
||||
xmin, fmin = w_u, fu
|
||||
imin = 0 # index of the minimum -- only one point so far, so it's that one.
|
||||
|
||||
# sample coordinates and their values:
|
||||
vw_a, vw_b = [w_a], [w_b]
|
||||
v_fl = [fl] # remember that "l" and "u" are arbitrary shorthand used by the paper,
|
||||
v_fu = [fu] # and one isn't necessarily above or below the other.
|
||||
v_active = {0: True} # indices of hyper-rectangles that have yet to be subdivided
|
||||
v_depth = [0] # increments every split
|
||||
n = 1 # how many indices are in use
|
||||
|
||||
del w_a, w_b, w_l, w_u, fl, fu # prevent accidental re-use
|
||||
|
||||
def precision_met():
|
||||
return min_error is not None and fmin <= min_error
|
||||
|
||||
def no_more_evals():
|
||||
return max_evals is not None and n + 1 >= max_evals
|
||||
|
||||
avg = lambda a, b: (a + b) / 2 # TODO: delete me!
|
||||
diff = lambda a, b: -abs(a - b) / 2 # TODO: delete me!
|
||||
cycle_funs = [min, max, avg, diff] # TODO: delete me!
|
||||
cycle_funs = [min, avg, max, diff, max, avg] # TODO: delete me!
|
||||
cycle_funs = [min] # TODO: delete me!
|
||||
# interesting. using cycle_funs = [max] seems optimal for objective2210.
|
||||
|
||||
def gather_potential(v_i):
|
||||
# crappy algorithm for finding the convex hull of the plot where
|
||||
# x = diameter of hyper-rectangle
|
||||
# y = minimum loss of the two points (v_fl, v_fu) within it
|
||||
|
||||
cycle_fun = cycle_funs[outer % len(cycle_funs)] # TODO: delete me!
|
||||
|
||||
# TODO: make this faster. use a sorted queue and peek at the best for each depth.
|
||||
# start by finding the arg-minimum for each set of equal-diameter rects.
|
||||
bests = {} # mapping of depth to arg-minimum value (i.e. its index)
|
||||
for i in v_i:
|
||||
fl, fu = v_fl[i], v_fu[i]
|
||||
f = cycle_fun(fl, fu) # TODO: use min(fl, fu)!
|
||||
depth = v_depth[i]
|
||||
if cycle_fun == diff: # TODO: delete me!
|
||||
# f = f * (1 << depth) # TODO: delete me!
|
||||
f = f / diagonal_cache[depth] # TODO: delete me!
|
||||
if by_longest:
|
||||
depth = depth // dims * dims
|
||||
# assert depth < depth_limit
|
||||
best = bests.get(depth)
|
||||
# TODO: handle f == best case.
|
||||
if best is None or f < best[1]:
|
||||
bests[depth] = (i, f)
|
||||
|
||||
if len(bests) == 1: # nothing to compare
|
||||
return [i for i, f in bests.values()]
|
||||
|
||||
asc = sorted(bests.items(), key=lambda t: -t[0]) # sort by length, ascending
|
||||
|
||||
# first, remove points that slope downwards.
|
||||
# this yields a pareto front, which isn't necessarily convex.
|
||||
old = asc
|
||||
new = [old[-1]]
|
||||
smallest = old[-1][1][1]
|
||||
for i in reversed(range(len(old) - 1)):
|
||||
f = old[i][1][1]
|
||||
if f <= smallest:
|
||||
smallest = f
|
||||
new.append(old[i])
|
||||
new = new[::-1]
|
||||
|
||||
# second, convert depths to lengths.
|
||||
if by_longest: # TODO: does this branch make any difference?
|
||||
new = [(longest_cache[depth],) + t for depth, t in new]
|
||||
else:
|
||||
new = [(diagonal_cache[depth],) + t for depth, t in new]
|
||||
|
||||
# third, remove points that fall under a previous slope.
|
||||
old = new
|
||||
skip = [False] * len(old)
|
||||
for i in range(len(old)):
|
||||
if skip[i]:
|
||||
continue
|
||||
len0, i0, f0 = old[i]
|
||||
smallest_slope = None
|
||||
for j in range(i + 1, len(old)):
|
||||
if skip[j]:
|
||||
continue
|
||||
len1, i1, f1 = old[j]
|
||||
num = f1 - f0
|
||||
den = len1 - len0
|
||||
# this factor of 3/2 comes from the denominator;
|
||||
# each length should be multiplied by 2/3:
|
||||
# the furthest relative distance from a corner to a center point.
|
||||
slope = num / den # * F(3 / 2)
|
||||
if smallest_slope is None:
|
||||
smallest_slope = slope
|
||||
elif slope < smallest_slope:
|
||||
for k in range(i + 1, j):
|
||||
skip[k] = True
|
||||
smallest_slope = slope
|
||||
|
||||
new = [entry for entry, skipping in zip(old, skip) if not skipping]
|
||||
|
||||
if pruning:
|
||||
v_f = sorted(min(fl, fu) for fl, fu in zip(v_fl, v_fu))
|
||||
fmedian = v_f[len(v_f) // 2]
|
||||
|
||||
offset = fmin - pruning * (fmedian - fmin)
|
||||
start = 0
|
||||
K_slope = None
|
||||
for i in range(len(new)):
|
||||
len0, i0, f0 = new[i]
|
||||
new_slope = (f0 - offset) / len0
|
||||
if K_slope is None or new_slope < K_slope:
|
||||
# if new_slope >= 0:
|
||||
K_slope = new_slope
|
||||
start = i
|
||||
# if start: print(end=f"[starting at {i} with slope {K_slope:.3f}]")
|
||||
new = new[start:]
|
||||
|
||||
return [i for len, i, f in new]
|
||||
|
||||
def determine_longest(w_a, w_b):
|
||||
# the index of the dimension is used as a tie-breaker (considered longer).
|
||||
# e.g. a box has lengths (2, 3, 3). the index returned is then 1.
|
||||
# TODO: alternate way of stating that comment: biased towards smaller indices.
|
||||
longest = 0
|
||||
|
||||
invlen = None
|
||||
for i, (a, b) in enumerate(zip(w_a, w_b)):
|
||||
den_a = 1 << a[1]
|
||||
den_b = 1 << b[1]
|
||||
den = max(den_a, den_b) # TODO: always the same, right?
|
||||
if invlen is None or den < invlen:
|
||||
invlen = den
|
||||
longest = i
|
||||
|
||||
return longest
|
||||
|
||||
def split_it(i, which, *, w_a, w_b, d):
|
||||
nonlocal n, xmin, fmin
|
||||
new, n = n, n + 1 # designate an index for the new hyper-rectangle
|
||||
w_new_a = w_a.copy()
|
||||
w_new_b = w_b.copy()
|
||||
|
||||
den = w_a[d][1] # should be equal to w_b[d][1] as well
|
||||
num_a = w_a[d][0]
|
||||
num_b = w_b[d][0]
|
||||
if which:
|
||||
w_new_a[d] = (num_b + num_b, den + 1) # swap
|
||||
w_new_b[d] = (num_a + num_b, den + 1) # slide
|
||||
else:
|
||||
w_new_a[d] = (num_a + num_b, den + 1) # slide
|
||||
w_new_b[d] = (num_a + num_a, den + 1) # swap
|
||||
|
||||
w_l, w_u = ab_to_lu(w_new_a, w_new_b)
|
||||
fl = fun(w_l) if which else v_fl[i]
|
||||
fu = v_fu[i] if which else fun(w_u)
|
||||
vw_a.append(w_new_a)
|
||||
vw_b.append(w_new_b)
|
||||
v_fl.append(fl)
|
||||
v_fu.append(fu)
|
||||
v_depth.append(v_depth[i] + 1)
|
||||
if which:
|
||||
if fl < fmin:
|
||||
xmin, fmin = w_l, fl
|
||||
else:
|
||||
if fu < fmin:
|
||||
xmin, fmin = w_u, fu
|
||||
return new
|
||||
|
||||
def split_rectangles(v_i): # returns new indices
|
||||
v_new = []
|
||||
for i in v_i:
|
||||
w_a = vw_a[i]
|
||||
w_b = vw_b[i]
|
||||
d = determine_longest(w_a, w_b)
|
||||
|
||||
v_new.append(split_it(i, 0, w_a=w_a, w_b=w_b, d=d))
|
||||
if precision_met() or no_more_evals():
|
||||
break
|
||||
v_new.append(split_it(i, 1, w_a=w_a, w_b=w_b, d=d))
|
||||
if precision_met() or no_more_evals():
|
||||
break
|
||||
|
||||
assert len(vw_a) == n, "internal error: vw_a has invalid length"
|
||||
assert len(vw_b) == n, "internal error: vw_b has invalid length"
|
||||
assert len(v_fl) == n, "internal error: v_fl has invalid length"
|
||||
assert len(v_fu) == n, "internal error: v_fu has invalid length"
|
||||
assert len(v_depth) == n, "internal error: v_depth has invalid length"
|
||||
return v_new
|
||||
|
||||
def precompute_diagonals_by_limit(limit):
|
||||
diags = []
|
||||
w_a = vw_a[0].copy()
|
||||
w_b = vw_b[0].copy()
|
||||
for depth in range(limit):
|
||||
sq_dist = 0
|
||||
for a, b in zip(w_a, w_b):
|
||||
delta = b[0] - a[0]
|
||||
sq_dist += (delta * delta) << (2 * (limit - a[1]))
|
||||
diags.append(sq_dist)
|
||||
|
||||
d = depth % dims
|
||||
a_d = w_a[d]
|
||||
b_d = w_b[d]
|
||||
large = max(a_d[0], b_d[0])
|
||||
small = min(a_d[0], b_d[0])
|
||||
w_a[d] = (large * 2 - 0, a_d[1] + 1)
|
||||
w_b[d] = (small * 2 + 1, b_d[1] + 1)
|
||||
return [F(diag) ** F(0.5) / F(1 << limit) for diag in diags]
|
||||
|
||||
def precompute_diagonals_by_length(limit):
|
||||
diags, longests = [], []
|
||||
w_a = vw_a[0].copy()
|
||||
w_b = vw_b[0].copy()
|
||||
for depth in range(1_000_000):
|
||||
bits = max(max(a[1], b[1]) for a, b in zip(w_a, w_b))
|
||||
sq_dist, longest = 0, 0
|
||||
for a, b in zip(w_a, w_b):
|
||||
delta = b[0] - a[0]
|
||||
sq_dist += (delta * delta) << (2 * (bits - a[1]))
|
||||
longest = max(longest, abs(delta) << (bits - a[1]))
|
||||
diag = F(sq_dist) ** F(0.5) / F(1 << bits)
|
||||
if diag < limit:
|
||||
break
|
||||
longest = F(longest) / F(1 << bits)
|
||||
diags.append(diag)
|
||||
longests.append(longest)
|
||||
|
||||
d = depth % dims
|
||||
a_d = w_a[d]
|
||||
b_d = w_b[d]
|
||||
large = max(a_d[0], b_d[0])
|
||||
small = min(a_d[0], b_d[0])
|
||||
w_a[d] = (large * 2 - 0, a_d[1] + 1)
|
||||
w_b[d] = (small * 2 + 1, b_d[1] + 1)
|
||||
return diags, longests
|
||||
|
||||
diagonal_cache, longest_cache = precompute_diagonals_by_length(min_diag)
|
||||
depth_limit = len(diagonal_cache)
|
||||
diagonal_cache = precompute_diagonals_by_limit(depth_limit)
|
||||
|
||||
for outer in range(1_000_000 if max_iters is None else max_iters):
|
||||
if precision_met() or no_more_evals():
|
||||
break
|
||||
|
||||
v_potential = gather_potential(v_active)
|
||||
v_new = split_rectangles(v_potential)
|
||||
|
||||
for j in v_potential:
|
||||
del v_active[j]
|
||||
for j in v_new:
|
||||
if v_depth[j] < depth_limit:
|
||||
v_active[j] = True
|
||||
|
||||
tmin = [F(x[0]) / F(x[1]) for x in xmin]
|
||||
argmin = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, tmin)]
|
||||
return argmin, fmin
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from arsd_objectives import objective2210
|
||||
import numpy as np
|
||||
|
||||
F = np.float64
|
||||
res = birect(
|
||||
lambda a: objective2210(np.array(a, F)),
|
||||
[0, 0],
|
||||
[5, 5],
|
||||
min_diag=F(1e-8 / 5),
|
||||
# max_evals=50_000,
|
||||
max_evals=2_000,
|
||||
F=F,
|
||||
by_longest=True,
|
||||
)
|
||||
print("", "birect result:", *res, sep="\n")
|
||||
print("", "double checked:", objective2210(np.array(res[0], F)), sep="\n")
|
Loading…
Reference in a new issue