add direct

2022-06-07 07:02:35 +02:00 · 2022-06-07 07:02:35 +02:00 · 5dc9939cf0
commit 5dc9939cf0
parent 5b75e03b35
2 changed files with 325 additions and 0 deletions
--- a/direct/README.md
+++ b/direct/README.md
--- a/direct/birect.py
+++ b/direct/birect.py
@ -0,0 +1,325 @@
+#!/usr/bin/env python3
+# based on hamming_exact3.py, but with all debug stuff removed
+
+def birect(
+    obj,
+    lo,
+    hi,
+    *,
+    min_diag,
+    min_error=None,
+    max_evals=None,
+    max_iters=None,
+    by_longest=False,
+    pruning=0,
+    F=float,
+):
+    assert len(lo) == len(hi), "dimensional mismatch"
+
+    assert not (
+        min_error is None and max_evals is None and max_iters is None
+    ), "at least one stopping condition must be specified"
+
+    # variables prefixed with v_ are to be one-dimensional vectors. [a, b, c]
+    # variables prefixed with vw_ are to be two-dimensional vectors. [[a, b], [c, d]]
+    # variables prefixed with w_ are to be one-dimensional vectors of pairs.
+    # aside: xmin should actually be called v_xmin, but it's not!
+
+    def fun(w_t):
+        # xs = [l + (h - l) * t for t in w_t]
+        v_x = [F(num) / F(den) for num, den in w_t]
+        v_x = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, v_x)]
+        res = obj(v_x)
+        return res
+
+    def ab_to_lu(w_a, w_b):
+        w_l = [(a[0] + a[0] + b[0], (1 << a[1]) * 3) for a, b in zip(w_a, w_b)]
+        w_u = [(a[0] + b[0] + b[0], (1 << b[1]) * 3) for a, b in zip(w_a, w_b)]
+        return w_l, w_u
+
+    dims = len(lo)
+
+    w_a, w_b = [(0, 0)] * dims, [(1, 0)] * dims
+
+    w_l, w_u = ab_to_lu(w_a, w_b)
+    fl = fun(w_l)
+    fu = fun(w_u)
+    if fl <= fu:
+        xmin, fmin = w_l, fl
+    else:
+        xmin, fmin = w_u, fu
+    imin = 0  # index of the minimum -- only one point so far, so it's that one.
+
+    # sample coordinates and their values:
+    vw_a, vw_b = [w_a], [w_b]
+    v_fl = [fl]  # remember that "l" and "u" are arbitrary shorthand used by the paper,
+    v_fu = [fu]  # and one isn't necessarily above or below the other.
+    v_active = {0: True}  # indices of hyper-rectangles that have yet to be subdivided
+    v_depth = [0]  # increments every split
+    n = 1  # how many indices are in use
+
+    del w_a, w_b, w_l, w_u, fl, fu  # prevent accidental re-use
+
+    def precision_met():
+        return min_error is not None and fmin <= min_error
+
+    def no_more_evals():
+        return max_evals is not None and n + 1 >= max_evals
+
+    avg = lambda a, b: (a + b) / 2  # TODO: delete me!
+    diff = lambda a, b: -abs(a - b) / 2  # TODO: delete me!
+    cycle_funs = [min, max, avg, diff]  # TODO: delete me!
+    cycle_funs = [min, avg, max, diff, max, avg]  # TODO: delete me!
+    cycle_funs = [min]  # TODO: delete me!
+    # interesting. using cycle_funs = [max] seems optimal for objective2210.
+
+    def gather_potential(v_i):
+        # crappy algorithm for finding the convex hull of the plot where
+        # x = diameter of hyper-rectangle
+        # y = minimum loss of the two points (v_fl, v_fu) within it
+
+        cycle_fun = cycle_funs[outer % len(cycle_funs)]  # TODO: delete me!
+
+        # TODO: make this faster. use a sorted queue and peek at the best for each depth.
+        # start by finding the arg-minimum for each set of equal-diameter rects.
+        bests = {}  # mapping of depth to arg-minimum value (i.e. its index)
+        for i in v_i:
+            fl, fu = v_fl[i], v_fu[i]
+            f = cycle_fun(fl, fu)  # TODO: use min(fl, fu)!
+            depth = v_depth[i]
+            if cycle_fun == diff:  # TODO: delete me!
+                # f = f * (1 << depth)  # TODO: delete me!
+                f = f / diagonal_cache[depth]  # TODO: delete me!
+            if by_longest:
+                depth = depth // dims * dims
+            # assert depth < depth_limit
+            best = bests.get(depth)
+            # TODO: handle f == best case.
+            if best is None or f < best[1]:
+                bests[depth] = (i, f)
+
+        if len(bests) == 1:  # nothing to compare
+            return [i for i, f in bests.values()]
+
+        asc = sorted(bests.items(), key=lambda t: -t[0])  # sort by length, ascending
+
+        # first, remove points that slope downwards.
+        # this yields a pareto front, which isn't necessarily convex.
+        old = asc
+        new = [old[-1]]
+        smallest = old[-1][1][1]
+        for i in reversed(range(len(old) - 1)):
+            f = old[i][1][1]
+            if f <= smallest:
+                smallest = f
+                new.append(old[i])
+        new = new[::-1]
+
+        # second, convert depths to lengths.
+        if by_longest:  # TODO: does this branch make any difference?
+            new = [(longest_cache[depth],) + t for depth, t in new]
+        else:
+            new = [(diagonal_cache[depth],) + t for depth, t in new]
+
+        # third, remove points that fall under a previous slope.
+        old = new
+        skip = [False] * len(old)
+        for i in range(len(old)):
+            if skip[i]:
+                continue
+            len0, i0, f0 = old[i]
+            smallest_slope = None
+            for j in range(i + 1, len(old)):
+                if skip[j]:
+                    continue
+                len1, i1, f1 = old[j]
+                num = f1 - f0
+                den = len1 - len0
+                # this factor of 3/2 comes from the denominator;
+                # each length should be multiplied by 2/3:
+                # the furthest relative distance from a corner to a center point.
+                slope = num / den  # * F(3 / 2)
+                if smallest_slope is None:
+                    smallest_slope = slope
+                elif slope < smallest_slope:
+                    for k in range(i + 1, j):
+                        skip[k] = True
+                    smallest_slope = slope
+
+        new = [entry for entry, skipping in zip(old, skip) if not skipping]
+
+        if pruning:
+            v_f = sorted(min(fl, fu) for fl, fu in zip(v_fl, v_fu))
+            fmedian = v_f[len(v_f) // 2]
+
+            offset = fmin - pruning * (fmedian - fmin)
+            start = 0
+            K_slope = None
+            for i in range(len(new)):
+                len0, i0, f0 = new[i]
+                new_slope = (f0 - offset) / len0
+                if K_slope is None or new_slope < K_slope:
+                    # if new_slope >= 0:
+                    K_slope = new_slope
+                    start = i
+            # if start: print(end=f"[starting at {i} with slope {K_slope:.3f}]")
+            new = new[start:]
+
+        return [i for len, i, f in new]
+
+    def determine_longest(w_a, w_b):
+        # the index of the dimension is used as a tie-breaker (considered longer).
+        # e.g. a box has lengths (2, 3, 3). the index returned is then 1.
+        # TODO: alternate way of stating that comment: biased towards smaller indices.
+        longest = 0
+
+        invlen = None
+        for i, (a, b) in enumerate(zip(w_a, w_b)):
+            den_a = 1 << a[1]
+            den_b = 1 << b[1]
+            den = max(den_a, den_b)  # TODO: always the same, right?
+            if invlen is None or den < invlen:
+                invlen = den
+                longest = i
+
+        return longest
+
+    def split_it(i, which, *, w_a, w_b, d):
+        nonlocal n, xmin, fmin
+        new, n = n, n + 1  # designate an index for the new hyper-rectangle
+        w_new_a = w_a.copy()
+        w_new_b = w_b.copy()
+
+        den = w_a[d][1]  # should be equal to w_b[d][1] as well
+        num_a = w_a[d][0]
+        num_b = w_b[d][0]
+        if which:
+            w_new_a[d] = (num_b + num_b, den + 1)  # swap
+            w_new_b[d] = (num_a + num_b, den + 1)  # slide
+        else:
+            w_new_a[d] = (num_a + num_b, den + 1)  # slide
+            w_new_b[d] = (num_a + num_a, den + 1)  # swap
+
+        w_l, w_u = ab_to_lu(w_new_a, w_new_b)
+        fl = fun(w_l) if which else v_fl[i]
+        fu = v_fu[i] if which else fun(w_u)
+        vw_a.append(w_new_a)
+        vw_b.append(w_new_b)
+        v_fl.append(fl)
+        v_fu.append(fu)
+        v_depth.append(v_depth[i] + 1)
+        if which:
+            if fl < fmin:
+                xmin, fmin = w_l, fl
+        else:
+            if fu < fmin:
+                xmin, fmin = w_u, fu
+        return new
+
+    def split_rectangles(v_i):  # returns new indices
+        v_new = []
+        for i in v_i:
+            w_a = vw_a[i]
+            w_b = vw_b[i]
+            d = determine_longest(w_a, w_b)
+
+            v_new.append(split_it(i, 0, w_a=w_a, w_b=w_b, d=d))
+            if precision_met() or no_more_evals():
+                break
+            v_new.append(split_it(i, 1, w_a=w_a, w_b=w_b, d=d))
+            if precision_met() or no_more_evals():
+                break
+
+        assert len(vw_a) == n, "internal error: vw_a has invalid length"
+        assert len(vw_b) == n, "internal error: vw_b has invalid length"
+        assert len(v_fl) == n, "internal error: v_fl has invalid length"
+        assert len(v_fu) == n, "internal error: v_fu has invalid length"
+        assert len(v_depth) == n, "internal error: v_depth has invalid length"
+        return v_new
+
+    def precompute_diagonals_by_limit(limit):
+        diags = []
+        w_a = vw_a[0].copy()
+        w_b = vw_b[0].copy()
+        for depth in range(limit):
+            sq_dist = 0
+            for a, b in zip(w_a, w_b):
+                delta = b[0] - a[0]
+                sq_dist += (delta * delta) << (2 * (limit - a[1]))
+            diags.append(sq_dist)
+
+            d = depth % dims
+            a_d = w_a[d]
+            b_d = w_b[d]
+            large = max(a_d[0], b_d[0])
+            small = min(a_d[0], b_d[0])
+            w_a[d] = (large * 2 - 0, a_d[1] + 1)
+            w_b[d] = (small * 2 + 1, b_d[1] + 1)
+        return [F(diag) ** F(0.5) / F(1 << limit) for diag in diags]
+
+    def precompute_diagonals_by_length(limit):
+        diags, longests = [], []
+        w_a = vw_a[0].copy()
+        w_b = vw_b[0].copy()
+        for depth in range(1_000_000):
+            bits = max(max(a[1], b[1]) for a, b in zip(w_a, w_b))
+            sq_dist, longest = 0, 0
+            for a, b in zip(w_a, w_b):
+                delta = b[0] - a[0]
+                sq_dist += (delta * delta) << (2 * (bits - a[1]))
+                longest = max(longest, abs(delta) << (bits - a[1]))
+            diag = F(sq_dist) ** F(0.5) / F(1 << bits)
+            if diag < limit:
+                break
+            longest = F(longest) / F(1 << bits)
+            diags.append(diag)
+            longests.append(longest)
+
+            d = depth % dims
+            a_d = w_a[d]
+            b_d = w_b[d]
+            large = max(a_d[0], b_d[0])
+            small = min(a_d[0], b_d[0])
+            w_a[d] = (large * 2 - 0, a_d[1] + 1)
+            w_b[d] = (small * 2 + 1, b_d[1] + 1)
+        return diags, longests
+
+    diagonal_cache, longest_cache = precompute_diagonals_by_length(min_diag)
+    depth_limit = len(diagonal_cache)
+    diagonal_cache = precompute_diagonals_by_limit(depth_limit)
+
+    for outer in range(1_000_000 if max_iters is None else max_iters):
+        if precision_met() or no_more_evals():
+            break
+
+        v_potential = gather_potential(v_active)
+        v_new = split_rectangles(v_potential)
+
+        for j in v_potential:
+            del v_active[j]
+        for j in v_new:
+            if v_depth[j] < depth_limit:
+                v_active[j] = True
+
+    tmin = [F(x[0]) / F(x[1]) for x in xmin]
+    argmin = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, tmin)]
+    return argmin, fmin
+
+
+if __name__ == "__main__":
+    from arsd_objectives import objective2210
+    import numpy as np
+
+    F = np.float64
+    res = birect(
+        lambda a: objective2210(np.array(a, F)),
+        [0, 0],
+        [5, 5],
+        min_diag=F(1e-8 / 5),
+        # max_evals=50_000,
+        max_evals=2_000,
+        F=F,
+        by_longest=True,
+    )
+    print("", "birect result:", *res, sep="\n")
+    print("", "double checked:", objective2210(np.array(res[0], F)), sep="\n")