add my own optimizers (birect, soo, mercury)

2023-05-04 15:35:23 -07:00 · 2023-05-04 15:35:23 -07:00 · 970260783d
commit 970260783d
parent eaa6c5de04
6 changed files with 823 additions and 0 deletions
--- a/birect.py
+++ b/birect.py
@ -0,0 +1,360 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2022 Connor Olding
+
+# Permission to use, copy, modify, and/or distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+
+# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+
+def birect(
+    obj,  # objective function to find the minimum value of
+    lo,  # list of lower bounds for each problem dimension
+    hi,  # list of upper bounds for each problem dimension
+    *,
+    min_diag,  # never subdivide hyper-rectangles below this length
+    min_error=None,  # exit when the objective function achieves this error
+    max_evals=None,  # exit when the objective function has been run this many times
+    max_iters=None,  # exit when the optimization procedure iterates this many times
+    by_longest=False,  # measure by rects by longest edge instead of their diagonal
+    pruning=0,
+    F=float,  # can be float, decimal.Decimal, numpy.float64, or anything float-like
+):
+    assert len(lo) == len(hi), "dimensional mismatch"
+
+    assert not (
+        min_error is None and max_evals is None and max_iters is None
+    ), "at least one stopping condition must be specified"
+
+    # variables prefixed with v_ are to be one-dimensional vectors. [a, b, c]
+    # variables prefixed with w_ are to be one-dimensional vectors of pairs. [a, b]
+    # variables prefixed with vw_ are to be two-dimensional vectors. [[a, b], [c, d]]
+    # aside: xmin should actually be called v_xmin, but it's not!
+
+    def fun(w_t):
+        # final conversion from exact fraction to possibly-inexact F-type:
+        v_x = [F(num) / F(den) for num, den in w_t]
+        # linearly interpolate within the bounds of the function:
+        v_x = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, v_x)]
+        res = obj(v_x)
+        return res
+
+    def ab_to_lu(w_a, w_b):  # converts corner points to midpoints, also denominators
+        # (2 * a + b) / (den * 3) = point inbetween corner "a" and the center
+        w_l = [(a[0] + a[0] + b[0], (1 << a[1]) * 3) for a, b in zip(w_a, w_b)]
+        # (a + 2 * b) / (den * 3) = point inbetween corner "b" and the center
+        w_u = [(a[0] + b[0] + b[0], (1 << b[1]) * 3) for a, b in zip(w_a, w_b)]
+        return w_l, w_u
+
+    dims = len(lo)  # already asserted that len(lo) == len(hi)
+
+    # initial corner points:
+    # note that the denominators are encoded as the exponents of a power of two.
+    # therefore, coordinate = pair[0] / (1 << pair[1]).
+    w_a, w_b = [(0, 0)] * dims, [(1, 0)] * dims
+
+    # initial points halfway between the each of the two corners and the center:
+    # note that the denominators are identity here.
+    # therefore, coordinate = pair[0] / pair[1].
+    w_l, w_u = ab_to_lu(w_a, w_b)
+
+    # initial function evaluations:
+    fl = fun(w_l)
+    fu = fun(w_u)
+
+    # initial minimum of all evaluated points so far:
+    if fl <= fu:
+        xmin, fmin = w_l, fl
+    else:
+        xmin, fmin = w_u, fu
+
+    # construct lists to hold all sample coordinates and their values:
+    vw_a, vw_b = [w_a], [w_b]
+    v_fl = [fl]  # remember that "l" and "u" are arbitrary shorthand used by the paper,
+    v_fu = [fu]  # and one isn't necessarily above or below the other.
+    v_active = {0: True}  # indices of hyper-rectangles that have yet to be subdivided
+    v_depth = [0]  # increments every split
+    n = 1  # how many indices are in use
+
+    del w_a, w_b, w_l, w_u, fl, fu  # prevent accidental re-use
+
+    def precision_met():  # returns True when the optimization procedure should exit
+        return min_error is not None and fmin <= min_error
+
+    def no_more_evals():  # returns True when the optimization procedure should exit
+        return max_evals is not None and n + 1 >= max_evals
+
+    def gather_potential(v_i):
+        # crappy algorithm for finding the convex hull of a line plot where
+        # the x axis is the diameter of hyper-rectangle, and
+        # the y axis is the minimum loss of the two points (v_fl, v_fu) within it.
+
+        # start by finding the arg-minimum for each set of equal-diameter rects.
+        # TODO: make this faster. use a sorted queue and peek at the best for each depth.
+        bests = {}  # mapping of depth to arg-minimum value (i.e. its index)
+        for i in v_i:
+            fl, fu = v_fl[i], v_fu[i]
+            f = min(fl, fu)
+            depth = v_depth[i]
+            if by_longest:
+                depth = depth // dims * dims
+            # assert depth < depth_limit
+            best = bests.get(depth)
+            # TODO: handle f == best case.
+            if best is None or f < best[1]:
+                bests[depth] = (i, f)
+
+        if len(bests) == 1:  # nothing to compare it to
+            return [i for i, f in bests.values()]
+
+        asc = sorted(bests.items(), key=lambda t: -t[0])  # sort by length, ascending
+
+        # first, remove points that slope downwards.
+        # this yields a pareto front, which isn't necessarily convex.
+        old = asc
+        new = [old[-1]]
+        smallest = old[-1][1][1]
+        for i in reversed(range(len(old) - 1)):
+            f = old[i][1][1]
+            if f <= smallest:
+                smallest = f
+                new.append(old[i])
+        new = new[::-1]
+
+        # second, convert depths to lengths.
+        if by_longest:  # TODO: does this branch make any difference?
+            new = [(longest_cache[depth],) + t for depth, t in new]
+        else:
+            new = [(diagonal_cache[depth],) + t for depth, t in new]
+
+        # third, remove points that fall under a previous slope.
+        old = new
+        skip = [False] * len(old)
+        for i in range(len(old)):
+            if skip[i]:
+                continue
+            len0, i0, f0 = old[i]
+            smallest_slope = None
+            for j in range(i + 1, len(old)):
+                if skip[j]:
+                    continue
+                len1, i1, f1 = old[j]
+                num = f1 - f0
+                den = len1 - len0
+                # this factor of 3/2 comes from the denominator;
+                # each length should be multiplied by 2/3:
+                # the furthest relative distance from a corner to a center point.
+                slope = num / den  # * F(3 / 2)
+                if smallest_slope is None:
+                    smallest_slope = slope
+                elif slope < smallest_slope:
+                    for k in range(i + 1, j):
+                        skip[k] = True
+                    smallest_slope = slope
+
+        new = [entry for entry, skipping in zip(old, skip) if not skipping]
+
+        if pruning:
+            v_f = sorted(min(fl, fu) for fl, fu in zip(v_fl, v_fu))
+            fmedian = v_f[len(v_f) // 2]
+
+            offset = fmin - pruning * (fmedian - fmin)
+            start = 0
+            K_slope = None
+            for i in range(len(new)):
+                len0, i0, f0 = new[i]
+                new_slope = (f0 - offset) / len0
+                if K_slope is None or new_slope < K_slope:
+                    K_slope = new_slope
+                    start = i
+            new = new[start:]
+
+        return [i for len, i, f in new]
+
+    def determine_longest(w_a, w_b):
+        # the index of the dimension is used as a tie-breaker (considered longer).
+        # e.g. a box has lengths (2, 3, 3). the index returned is then 1.
+        # TODO: alternate way of stating that comment: biased towards smaller indices.
+        longest = 0
+
+        invlen = None
+        for i, (a, b) in enumerate(zip(w_a, w_b)):
+            den_a = 1 << a[1]
+            den_b = 1 << b[1]
+            den = max(den_a, den_b)  # TODO: always the same, right?
+            if invlen is None or den < invlen:
+                invlen = den
+                longest = i
+
+        return longest
+
+    # Hyper-rectangle subdivision demonstration: (2D example)
+    #
+    #      Initial         Split Once        Split Twice       Split Both
+    #                     ↓b    ↓b               a↓b               a↓
+    #   ┌───────────┐←b   ┌─────╥─────┐     ┌─────╥─────┐     ┌─────╥─────┐←a
+    #   │           │     │     ║     │     │   l ║     │     │   l ║   l │
+    #   │   ①   u   │  →  │ u ② ║ u   │  →  │ u ③ ║ u   │  →  │ u   ║ u   │
+    #   │           │     │     ║     │   b→╞═════╣←a   │   b→╞═════╬═════╡←a
+    #   │   l       │  →  │   l ║   l │  →  │   l ║   l │  →  │   l ║   l │
+    #   │           │     │     ║     │     │ u   ║     │     │ u   ║ u   │
+    # a→└───────────┘     └─────╨─────┘   b→└─────╨─────┘   b→└─────╨─────┘
+    #                           ↑a    ↑a                ↑a          ↑b
+
+    def split_it(i, which, *, w_a, w_b, d):
+        nonlocal n, xmin, fmin
+        new, n = n, n + 1  # designate an index for the new hyper-rectangle
+        w_new_a = w_a.copy()
+        w_new_b = w_b.copy()
+
+        den = w_a[d][1]  # should be equal to w_b[d][1] as well
+        num_a = w_a[d][0]
+        num_b = w_b[d][0]
+        if which:
+            w_new_a[d] = (num_b + num_b, den + 1)  # swap
+            w_new_b[d] = (num_a + num_b, den + 1)  # slide
+        else:
+            w_new_a[d] = (num_a + num_b, den + 1)  # slide
+            w_new_b[d] = (num_a + num_a, den + 1)  # swap
+
+        w_l, w_u = ab_to_lu(w_new_a, w_new_b)
+        fl = fun(w_l) if which else v_fl[i]
+        fu = v_fu[i] if which else fun(w_u)
+        vw_a.append(w_new_a)
+        vw_b.append(w_new_b)
+        v_fl.append(fl)
+        v_fu.append(fu)
+        v_depth.append(v_depth[i] + 1)
+        if which:
+            if fl < fmin:
+                xmin, fmin = w_l, fl
+        else:
+            if fu < fmin:
+                xmin, fmin = w_u, fu
+        return new
+
+    def split_rectangles(v_i):  # returns new indices
+        v_new = []
+        for i in v_i:
+            w_a = vw_a[i]
+            w_b = vw_b[i]
+            d = determine_longest(w_a, w_b)
+
+            v_new.append(split_it(i, 0, w_a=w_a, w_b=w_b, d=d))
+            if precision_met() or no_more_evals():
+                break
+            v_new.append(split_it(i, 1, w_a=w_a, w_b=w_b, d=d))
+            if precision_met() or no_more_evals():
+                break
+
+        assert len(vw_a) == n, "internal error: vw_a has invalid length"
+        assert len(vw_b) == n, "internal error: vw_b has invalid length"
+        assert len(v_fl) == n, "internal error: v_fl has invalid length"
+        assert len(v_fu) == n, "internal error: v_fu has invalid length"
+        assert len(v_depth) == n, "internal error: v_depth has invalid length"
+        return v_new
+
+    def _arbitrary_subdivision(w_a, w_b, d):
+        # shrink the coordinates as if they were subdivided and a single
+        # subdivision was selected. which subdivision is chosen doesn't matter.
+        a_d = w_a[d]
+        b_d = w_b[d]
+        large = max(a_d[0], b_d[0])
+        small = min(a_d[0], b_d[0])
+        w_a[d] = (large * 2 - 0, a_d[1] + 1)
+        w_b[d] = (small * 2 + 1, b_d[1] + 1)
+
+    def precompute_diagonals_by_limit(limit):
+        diags = []
+        w_a = vw_a[0].copy()
+        w_b = vw_b[0].copy()
+        for depth in range(limit):
+            sq_dist = 0
+            for a, b in zip(w_a, w_b):
+                delta = b[0] - a[0]
+                sq_dist += (delta * delta) << (2 * (limit - a[1]))
+            diags.append(sq_dist)
+            _arbitrary_subdivision(w_a, w_b, depth % dims)
+        return [F(diag) ** F(0.5) / F(1 << limit) for diag in diags]
+
+    def precompute_diagonals_by_length(minlen):
+        diags, longests = [], []
+        w_a = vw_a[0].copy()
+        w_b = vw_b[0].copy()
+        for depth in range(1_000_000):
+            bits = max(max(a[1], b[1]) for a, b in zip(w_a, w_b))
+            sq_dist, longest = 0, 0
+            for a, b in zip(w_a, w_b):
+                delta = b[0] - a[0]
+                sq_dist += (delta * delta) << (2 * (bits - a[1]))
+                longest = max(longest, abs(delta) << (bits - a[1]))
+            diag = F(sq_dist) ** F(0.5) / F(1 << bits)
+            if diag < minlen:
+                break
+            longest = F(longest) / F(1 << bits)
+            diags.append(diag)
+            longests.append(longest)
+            _arbitrary_subdivision(w_a, w_b, depth % dims)
+        return diags, longests
+
+    diagonal_cache, longest_cache = precompute_diagonals_by_length(min_diag)
+    depth_limit = len(diagonal_cache)
+    if depth_limit <= 500:  # prevents OverflowError: int too large to convert to float
+        diagonal_cache = precompute_diagonals_by_limit(depth_limit)
+
+    for outer in range(1_000_000 if max_iters is None else max_iters):
+        if precision_met() or no_more_evals():  # check stopping conditions
+            break
+
+        # perform the actual *di*viding *rect*angles algorithm:
+        v_potential = gather_potential(v_active)
+        v_new = split_rectangles(v_potential)
+
+        for j in v_potential:
+            del v_active[j]  # these were just subdivided a moment ago, so remove them
+        for j in v_new:
+            if v_depth[j] < depth_limit:  # TODO: is checking this late wasting evals?
+                v_active[j] = True  # these were just created, so add them
+
+    tmin = [F(x[0]) / F(x[1]) for x in xmin]
+    argmin = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, tmin)]
+    return argmin, fmin
+
+
+if __name__ == "__main__":
+    import numpy as np
+
+    def objective2210(x):
+        # another linear transformation of the rosenbrock banana function.
+        assert len(x) > 1, len(x)
+        a, b = x[:-1], x[1:]
+
+        a = a / 4.0 * 2 - 12 / 15
+        b = b / 1.5 * 2 - 43 / 15
+        # solution: 3.60      1.40
+
+        return (
+            np.sum(100 * np.square(np.square(a) + b) + np.square(a - 1))
+            / 499.0444444444444
+        )
+
+    F = np.float64
+    res = birect(
+        lambda a: objective2210(np.array(a, F)),
+        [0, 0],
+        [5, 5],
+        min_diag=F(1e-8 / 5),
+        max_evals=2_000,
+        F=F,
+        by_longest=True,
+    )
+    print("", "birect result:", *res, sep="\n")
+    print("", "double checked:", objective2210(np.array(res[0], F)), sep="\n")
--- a/hg.py
+++ b/hg.py
@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# ars_lips_release3.py, but renamed, and fucked with.
+
+import numpy as np
+import sys
+
+
+def minimize(
+    objective,
+    init,
+    iterations,
+    sigma=1.0,
+    popsize=None,  # FIXME: None isn't actually supported!
+    rate=1.0,
+    parties=None,
+    true_objective=None,
+    seed=None,
+    _flags=0,
+):
+    F = lambda a: np.array(a, np.float64)
+    center = F(init)
+    central = objective(center)
+    evals = 1
+    history = []
+    prng = np.random.default_rng(seed)
+
+    dims = len(center)
+    if parties is None:
+        parties = 1 if popsize is None else (popsize + dims + 1) // (dims + 2)
+    elif parties < (popsize + dims + 1) // (dims + 2):
+        print("WARNING: parties is too small for chosen popsize.", sys.stderr)
+
+    def track():
+        cost = central if true_objective is None else true_objective(center)
+        history.append(cost)
+
+    track()
+
+    offset = np.sqrt(3 / 2)
+    weight = np.sqrt(np.pi) / 6
+    both = offset * weight
+
+    # seems to perform better without, at least for the 1220-based heuristic.
+    magic_d = np.sqrt(3 / 2) if _flags & 1 else 1  # good?
+    magic_a = 1 / magic_d + 1 / 2
+    magic_b = 1 / magic_d - 1 / 2
+
+    for i in range(iterations):
+        sigma_i = sigma(i) if callable(sigma) else sigma
+        rate_i = rate(i) if callable(rate) else rate
+
+        if _flags & 2:  # bad
+            assert not _flags & 16, f"flags 2 and 16 are incompatible ({_flags=})"
+            directions = prng.standard_normal((popsize, dims))
+        elif _flags & 16:
+            assert not _flags & 2, f"flags 2 and 16 are incompatible ({_flags=})"
+            directions = prng.standard_normal((popsize, dims + 2))
+            directions /= np.linalg.norm(directions, axis=1, keepdims=True)
+            directions = directions[:, :-2]  # drop coordinate
+        else:
+            directions = np.zeros((parties * (dims + 2), dims))
+            for i in range(parties):
+                new_dirs = np.linalg.qr(prng.standard_normal((dims + 2, dims + 2)))[0]
+                directions[i * (dims + 2) : (i + 1) * (dims + 2)] = new_dirs[:, :-2]
+            if popsize is not None:
+                directions = directions[:popsize]
+
+        scale = np.sqrt(2) * sigma_i
+        deltas = scale * offset * directions
+        c0 = F([objective(center - delta) for delta in deltas]) - central
+        c1 = F([objective(center + delta) for delta in deltas]) - central
+        if _flags & 8:
+            l0 = np.abs(c1 * magic_a + c0 * magic_b)
+            l1 = np.abs(c1 * magic_b + c0 * magic_a)
+            peak = (l0 + l1) / 2
+        else:
+            # peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0))
+            # peak *= np.sqrt(np.pi) / 5
+            l0 = np.square(c1 * magic_a + c0 * magic_b)
+            l1 = np.square(c1 * magic_b + c0 * magic_a)
+            peak = np.sqrt((l0 + l1) / 2)
+        peak[peak == 0.0] = 1.0
+        difference = ((c1 - c0) / peak)[:, None]
+
+        evals += len(deltas) * 2
+
+        # gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma_i / parties)
+        # gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma_i * sigma_i / parties
+        # gradscale = 1 / (np.sqrt(8) * parties)
+        # print(gradscale, 0.35355339059327373)
+        dd = directions * difference
+        if _flags & 4:
+            gradscale = 1 / (np.sqrt(8) * parties)
+            step = sigma_i * gradscale * np.sum(dd, axis=0)
+        else:
+            # NOTE: this is a different scale, but i figure it should work better:
+            step = sigma_i / np.sqrt(12) / parties * np.sum(dd, axis=0)
+
+        center = center - rate_i * step
+        central = objective(center)
+        evals += 1
+
+        track()
+
+    # print("popsize:", popsize)
+    # print("parties:", parties)
+    # print("evals:", evals)
+
+    return center, history
+
+
+if __name__ == "__main__":
+    # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
+    from hipsterplot import plot
+
+    prng = np.random.default_rng(42070)
+
+    problem_size = 100
+    rotation, _ = np.linalg.qr(prng.standard_normal((problem_size, problem_size)))
+    init = prng.uniform(-5, 5, problem_size)
+
+    def ellipsoid_problem(x):
+        return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x))
+
+    def rotated_problem(x):
+        return ellipsoid_problem(rotation @ x)
+
+    def noisy_problem(x):
+        multiplicative_noise = prng.uniform(0.707, 1.414)
+        additive_noise = np.abs(prng.normal(scale=50000))
+        return rotated_problem(x) * multiplicative_noise + additive_noise
+
+    objective, true_objective = noisy_problem, rotated_problem
+    # optimized, history = minimize(
+    #     objective, init, 1000, sigma=0.3, true_objective=true_objective
+    # )
+    optimized, history = minimize(
+        objective,
+        init,
+        1000,
+        sigma=2.0,
+        popsize=10,
+        true_objective=true_objective,
+        seed=42069,
+        # _flags=13,
+    )
+
+    print(" " * 11 + "plot of log10-losses over time")
+    plot(np.log10(history), num_y_chars=23)
+
+    print("loss, before optimization: {:9.6f}".format(true_objective(init)))
+    print("loss,  after optimization: {:9.6f}".format(true_objective(optimized)))
--- a/notwacube.py
+++ b/notwacube.py
@ -1,4 +1,5 @@
 from dlibcube2 import dlib_cube
+from notwacube2 import make_birect, make_mercury, make_soo
 from nloptcube2 import nlopt_neldermead_cube
 from randomcube2 import another_random_cube, quasirandom_cube
 from scipycube2 import (
@ -29,6 +30,8 @@ from scipycube2 import (
    # scipy_trustncg_cube,
 )

+import tinytweaks as tt
+
 BASELINE_OPTIMIZERS = [
    another_random_cube,
    nlopt_neldermead_cube,
@ -42,6 +45,15 @@ SHGO_OPTIMIZERS = [
    make_shgo(method="slsqp", mei=True),
 ]

+NOTWA_OPTIMIZERS = [
+    make_birect(deepness=38, longest=True, pruning=False),
+    make_mercury(13, isigma=tt.S3),
+    make_mercury(29, isigma=tt.S3),
+    make_soo(deepness=23, K=4),
+    make_soo(deepness=35, K=3),
+    make_soo(deepness=53, K=2),
+]
+

 def collect_everything():
    G = globals().values()
--- a/notwacube2.py
+++ b/notwacube2.py
@ -0,0 +1,103 @@
+from utils import wrap_untrustworthy, check, final
+import numpy as np
+import tinytweaks as tt
+
+
+def make_birect(deepness=23, *, longest=False, pruning=False):
+    from birect import birect
+
+    def f(objective, n_trials, n_dim, with_count):
+        feval_count = 0
+
+        def _objective(x):
+            nonlocal feval_count
+            feval_count += 1
+            return objective(x)
+
+        xopt, fopt = birect(
+            _objective,
+            (0,) * n_dim,
+            (1,) * n_dim,
+            min_diag=2.0 ** -float(deepness),
+            max_evals=n_trials,
+            by_longest=longest,
+            pruning=pruning,
+            F=np.float64,
+        )
+        return (fopt, xopt, feval_count) if with_count else (fopt, xopt)
+
+    name = f"birect{deepness:02}"
+    name += "_longest" if longest else ""
+    name += "_pruning" if pruning else ""
+    f.__name__ = name + "_cube"
+    return f
+
+
+def make_soo(deepness=None, *, K=3):
+    if deepness is None:
+        deepness = int(31 * np.log(2) / np.log(K) - 1e-8)
+    assert K >= 2
+    from soo import soo
+
+    def f(objective, n_trials, n_dim, with_count):
+        feval_count = 0
+
+        def _objective(x):
+            nonlocal feval_count
+            feval_count += 1
+            return objective(x)
+
+        xopt, history = soo(
+            _objective, np.full(n_dim, 0.5), 0.5, n_trials, K=K, h_max=deepness
+        )
+        fopt = history[-1]
+        return (fopt, xopt, feval_count) if with_count else (fopt, xopt)
+
+    name = f"soo{deepness}_k{K}"
+    f.__name__ = name + "_cube"
+    return f
+
+
+def make_mercury(
+    flags, bounding="clip", *, isigma=tt.IV, popsize=2, irate=1, seed=None
+):
+    from hg import minimize as hg
+
+    def f(objective, n_trials, n_dim, with_count):
+        _objective = wrap_untrustworthy(objective, n_trials, bounding=bounding)
+
+        init = (0.5,) * n_dim
+        iterations = (n_trials - 1 + popsize * 2) // (popsize * 2 + 1)
+        center, history = hg(
+            _objective,
+            init,
+            iterations,
+            sigma=isigma if callable(isigma) else 1 / isigma,
+            popsize=popsize,
+            rate=irate if callable(irate) else 1 / irate,
+            seed=seed,
+            _flags=flags,
+        )
+
+        # use our remaining evals on something, anything.
+        feval_count = _objective(check)
+        wasted = max(0, -(feval_count - n_trials))
+        if wasted:
+            print(f"wasting {wasted} of {n_trials} evaluations")
+        for _ in range(wasted):
+            prng = np.random.default_rng()
+            x = prng.uniform(size=n_dim)
+            fx = _objective(x)
+
+        fopt, xopt, feval_count = _objective(final)
+        return (fopt, xopt, feval_count) if with_count else (fopt, xopt)
+
+    name = f"hg{flags:02}"
+    name += f"_{bounding}" if bounding != "clip" else ""
+    name += f"_ps{popsize:02}" if popsize != 2 else ""
+    if isigma != tt.IV:
+        name += f"_is{isigma.__name__}" if callable(isigma) else f"_is{isigma:02}"
+    if irate != 1:
+        name += f"_ir{irate.__name__}" if callable(irate) else f"_ir{irate:02}"
+    f.__name__ = name + "_cube"
+    return f
--- a/soo.py
+++ b/soo.py
@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+from collections import namedtuple
+from queue import PriorityQueue
+import numpy as np
+
+Result = namedtuple("Result", ["h", "p", "value"])
+
+
+def checked_fma(x, m, a):
+    # assumes x, m, and a are integers.
+    # assumes m >= 1 and a >= 0.
+    y = x * m
+    assert y >= x, "integer overflow"
+    z = y + a
+    assert z >= y, "integer overflow"
+    return z
+
+
+# Simultaneous Optimistic Optimization
+def soo(obj, origin, sigma, evals, K=3, h_max=None, iters=None, _adapt=None):
+    assert K > 1, "K must be at least 2"
+
+    origin = np.array(origin, float)
+    dims = len(origin)
+    h_max = int(31 * np.log(2) / np.log(K) - 1e-8) if h_max is None else h_max
+
+    # Tree (coordinates)
+    Ts = [PriorityQueue() for _ in range(h_max + 1)]
+
+    def coords(h, p):
+        return (np.array(p, int) + 0.5) * (sigma * 2) / K ** np.array(h, int) - sigma
+
+    samples = 0
+
+    def query(h, p):
+        nonlocal samples
+        depth = np.max(h)
+        value = obj(coords(h, p) + origin)
+        Ts[depth].put((float(value), samples, list(h), list(p)))
+        samples += 1
+        return Result(h, p, value)
+
+    # kickstart with middle point at the root node.
+    best_ever = query([0] * dims, [0] * dims)
+
+    history, stopping = [], False
+    for t in range(1_000_000 if iters is None else iters):
+        best = np.inf
+        lim = h_max if _adapt is None else min(_adapt(t), h_max)
+        for depth in range(lim):
+            if Ts[depth].empty():
+                continue
+
+            # select a node at this depth.
+            tup = Ts[depth].get(block=False)
+            value, _, h, p = tup
+            if value > best:
+                Ts[depth].put(tup, block=False)
+                continue
+            best = value
+
+            # split this node, potentially increasing its overall depth.
+            inc = 0
+            for j in reversed(range(1, dims)):
+                if h[j] < h[j - 1]:
+                    inc = j
+
+            if K & 1 == 1:  # when odd,
+                # maintain center point but split it differently next time.
+                h[inc] += 1
+                p[inc] = checked_fma(p[inc], K, K // 2)
+
+                new_depth = np.max(h)
+                Ts[new_depth].put(tup, block=False)
+
+                for i in range(K):
+                    if i == K // 2:
+                        continue
+                    p_new = p.copy()
+                    p_new[inc] += i - K // 2
+                    candidate = query(h.copy(), p_new)
+                    if candidate.value < best_ever.value:
+                        best_ever = candidate
+                    if samples >= evals:
+                        stopping = True
+                        break
+
+            else:  # when even,
+                # discard center point and only use its subdivisions.
+                for i in range(K):
+                    h_new, p_new = h.copy(), p.copy()
+                    h_new[inc] += 1
+                    p_new[inc] = checked_fma(p_new[inc], K, i)
+                    candidate = query(h_new, p_new)
+                    if candidate.value < best_ever.value:
+                        best_ever = candidate
+                    if samples >= evals:
+                        stopping = True
+                        break
+
+            if stopping:
+                break
+
+        history.append(best_ever.value)
+        if stopping:
+            break
+
+    h, p, _ = best_ever
+
+    return coords(h, p) + origin, history
+
+
+if __name__ == "__main__":
+    # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
+    from hipsterplot import plot
+    import numpy as np
+
+    def objective2210(x):
+        # another linear transformation of the rosenbrock banana function.
+        assert len(x) > 1, len(x)
+        a, b = x[:-1], x[1:]
+
+        a = a / 4.0 * 2 - 12 / 15
+        b = b / 1.5 * 2 - 43 / 15
+        # solution: 3.60      1.40
+
+        return (
+            np.sum(100 * np.square(np.square(a) + b) + np.square(a - 1))
+            / 499.0444444444444
+        )
+
+    optimized, history = soo(
+        objective2210,
+        origin=np.array([2.5, 2.5]),
+        sigma=np.array([2.5, 2.5]),
+        evals=2_000,
+        # K=3,
+        # h_max=19,  # should be equivalent-ish to min_diag=1e-8 in birect.py
+        K=2,
+        h_max=27,  # should be equivalent-ish to min_diag=1e-8 in birect.py
+    )
+    print(" " * 11 + "plot of log10-losses over time")
+    plot(np.log10(history), num_y_chars=23)
+    print("", "soo result:", list(optimized), history[-1], sep="\n")
+    print("", "double checked:", objective2210(optimized), sep="\n")
--- a/tinytweaks.py
+++ b/tinytweaks.py
@ -0,0 +1,51 @@
+import numpy as np
+
+
+def IV(i):
+    fib = (0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597)
+    inv = tuple(range(-2, 16))
+    return 1 / max(3, max(b for a, b in zip(fib, inv) if i >= a))
+
+
+def T2(i):
+    return IV(i) * 2
+
+
+def T3(i):
+    return IV(i) * 3
+
+
+def D2(i):
+    return IV(i) / 2
+
+
+def D3(i):
+    return IV(i) / 3
+
+
+def P3(i):
+    return IV(i) ** 3
+
+
+def X3(i):
+    return i * 3
+
+
+def C2(i):  # crude workaround for my own broken interface
+    return 2.0
+
+
+def H2(i):  # throwaway
+    return 0.5 if i >= 65 // 5 else 1.0
+
+
+def II(i):
+    return 1 / (i + 1)
+
+
+def S2(i):
+    return 1 / (2 * np.sqrt(i + 2))
+
+
+def S3(i):
+    return 1 / (3 * np.sqrt(i + 3))