From 970260783d548e33c02618e99e00be8af0758838 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Thu, 4 May 2023 15:35:23 -0700 Subject: [PATCH] add my own optimizers (birect, soo, mercury) --- birect.py | 360 ++++++++++++++++++++++++++++++++++++++++++++++++++ hg.py | 152 +++++++++++++++++++++ notwacube.py | 12 ++ notwacube2.py | 103 +++++++++++++++ soo.py | 145 ++++++++++++++++++++ tinytweaks.py | 51 +++++++ 6 files changed, 823 insertions(+) create mode 100644 birect.py create mode 100644 hg.py create mode 100644 notwacube2.py create mode 100644 soo.py create mode 100644 tinytweaks.py diff --git a/birect.py b/birect.py new file mode 100644 index 0000000..568f7f1 --- /dev/null +++ b/birect.py @@ -0,0 +1,360 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022 Connor Olding + +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. + +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +def birect( + obj, # objective function to find the minimum value of + lo, # list of lower bounds for each problem dimension + hi, # list of upper bounds for each problem dimension + *, + min_diag, # never subdivide hyper-rectangles below this length + min_error=None, # exit when the objective function achieves this error + max_evals=None, # exit when the objective function has been run this many times + max_iters=None, # exit when the optimization procedure iterates this many times + by_longest=False, # measure by rects by longest edge instead of their diagonal + pruning=0, + F=float, # can be float, decimal.Decimal, numpy.float64, or anything float-like +): + assert len(lo) == len(hi), "dimensional mismatch" + + assert not ( + min_error is None and max_evals is None and max_iters is None + ), "at least one stopping condition must be specified" + + # variables prefixed with v_ are to be one-dimensional vectors. [a, b, c] + # variables prefixed with w_ are to be one-dimensional vectors of pairs. [a, b] + # variables prefixed with vw_ are to be two-dimensional vectors. [[a, b], [c, d]] + # aside: xmin should actually be called v_xmin, but it's not! + + def fun(w_t): + # final conversion from exact fraction to possibly-inexact F-type: + v_x = [F(num) / F(den) for num, den in w_t] + # linearly interpolate within the bounds of the function: + v_x = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, v_x)] + res = obj(v_x) + return res + + def ab_to_lu(w_a, w_b): # converts corner points to midpoints, also denominators + # (2 * a + b) / (den * 3) = point inbetween corner "a" and the center + w_l = [(a[0] + a[0] + b[0], (1 << a[1]) * 3) for a, b in zip(w_a, w_b)] + # (a + 2 * b) / (den * 3) = point inbetween corner "b" and the center + w_u = [(a[0] + b[0] + b[0], (1 << b[1]) * 3) for a, b in zip(w_a, w_b)] + return w_l, w_u + + dims = len(lo) # already asserted that len(lo) == len(hi) + + # initial corner points: + # note that the denominators are encoded as the exponents of a power of two. + # therefore, coordinate = pair[0] / (1 << pair[1]). + w_a, w_b = [(0, 0)] * dims, [(1, 0)] * dims + + # initial points halfway between the each of the two corners and the center: + # note that the denominators are identity here. + # therefore, coordinate = pair[0] / pair[1]. + w_l, w_u = ab_to_lu(w_a, w_b) + + # initial function evaluations: + fl = fun(w_l) + fu = fun(w_u) + + # initial minimum of all evaluated points so far: + if fl <= fu: + xmin, fmin = w_l, fl + else: + xmin, fmin = w_u, fu + + # construct lists to hold all sample coordinates and their values: + vw_a, vw_b = [w_a], [w_b] + v_fl = [fl] # remember that "l" and "u" are arbitrary shorthand used by the paper, + v_fu = [fu] # and one isn't necessarily above or below the other. + v_active = {0: True} # indices of hyper-rectangles that have yet to be subdivided + v_depth = [0] # increments every split + n = 1 # how many indices are in use + + del w_a, w_b, w_l, w_u, fl, fu # prevent accidental re-use + + def precision_met(): # returns True when the optimization procedure should exit + return min_error is not None and fmin <= min_error + + def no_more_evals(): # returns True when the optimization procedure should exit + return max_evals is not None and n + 1 >= max_evals + + def gather_potential(v_i): + # crappy algorithm for finding the convex hull of a line plot where + # the x axis is the diameter of hyper-rectangle, and + # the y axis is the minimum loss of the two points (v_fl, v_fu) within it. + + # start by finding the arg-minimum for each set of equal-diameter rects. + # TODO: make this faster. use a sorted queue and peek at the best for each depth. + bests = {} # mapping of depth to arg-minimum value (i.e. its index) + for i in v_i: + fl, fu = v_fl[i], v_fu[i] + f = min(fl, fu) + depth = v_depth[i] + if by_longest: + depth = depth // dims * dims + # assert depth < depth_limit + best = bests.get(depth) + # TODO: handle f == best case. + if best is None or f < best[1]: + bests[depth] = (i, f) + + if len(bests) == 1: # nothing to compare it to + return [i for i, f in bests.values()] + + asc = sorted(bests.items(), key=lambda t: -t[0]) # sort by length, ascending + + # first, remove points that slope downwards. + # this yields a pareto front, which isn't necessarily convex. + old = asc + new = [old[-1]] + smallest = old[-1][1][1] + for i in reversed(range(len(old) - 1)): + f = old[i][1][1] + if f <= smallest: + smallest = f + new.append(old[i]) + new = new[::-1] + + # second, convert depths to lengths. + if by_longest: # TODO: does this branch make any difference? + new = [(longest_cache[depth],) + t for depth, t in new] + else: + new = [(diagonal_cache[depth],) + t for depth, t in new] + + # third, remove points that fall under a previous slope. + old = new + skip = [False] * len(old) + for i in range(len(old)): + if skip[i]: + continue + len0, i0, f0 = old[i] + smallest_slope = None + for j in range(i + 1, len(old)): + if skip[j]: + continue + len1, i1, f1 = old[j] + num = f1 - f0 + den = len1 - len0 + # this factor of 3/2 comes from the denominator; + # each length should be multiplied by 2/3: + # the furthest relative distance from a corner to a center point. + slope = num / den # * F(3 / 2) + if smallest_slope is None: + smallest_slope = slope + elif slope < smallest_slope: + for k in range(i + 1, j): + skip[k] = True + smallest_slope = slope + + new = [entry for entry, skipping in zip(old, skip) if not skipping] + + if pruning: + v_f = sorted(min(fl, fu) for fl, fu in zip(v_fl, v_fu)) + fmedian = v_f[len(v_f) // 2] + + offset = fmin - pruning * (fmedian - fmin) + start = 0 + K_slope = None + for i in range(len(new)): + len0, i0, f0 = new[i] + new_slope = (f0 - offset) / len0 + if K_slope is None or new_slope < K_slope: + K_slope = new_slope + start = i + new = new[start:] + + return [i for len, i, f in new] + + def determine_longest(w_a, w_b): + # the index of the dimension is used as a tie-breaker (considered longer). + # e.g. a box has lengths (2, 3, 3). the index returned is then 1. + # TODO: alternate way of stating that comment: biased towards smaller indices. + longest = 0 + + invlen = None + for i, (a, b) in enumerate(zip(w_a, w_b)): + den_a = 1 << a[1] + den_b = 1 << b[1] + den = max(den_a, den_b) # TODO: always the same, right? + if invlen is None or den < invlen: + invlen = den + longest = i + + return longest + + # Hyper-rectangle subdivision demonstration: (2D example) + # + # Initial Split Once Split Twice Split Both + # ↓b ↓b a↓b a↓ + # ┌───────────┐←b ┌─────╥─────┐ ┌─────╥─────┐ ┌─────╥─────┐←a + # │ │ │ ║ │ │ l ║ │ │ l ║ l │ + # │ ① u │ → │ u ② ║ u │ → │ u ③ ║ u │ → │ u ║ u │ + # │ │ │ ║ │ b→╞═════╣←a │ b→╞═════╬═════╡←a + # │ l │ → │ l ║ l │ → │ l ║ l │ → │ l ║ l │ + # │ │ │ ║ │ │ u ║ │ │ u ║ u │ + # a→└───────────┘ └─────╨─────┘ b→└─────╨─────┘ b→└─────╨─────┘ + # ↑a ↑a ↑a ↑b + + def split_it(i, which, *, w_a, w_b, d): + nonlocal n, xmin, fmin + new, n = n, n + 1 # designate an index for the new hyper-rectangle + w_new_a = w_a.copy() + w_new_b = w_b.copy() + + den = w_a[d][1] # should be equal to w_b[d][1] as well + num_a = w_a[d][0] + num_b = w_b[d][0] + if which: + w_new_a[d] = (num_b + num_b, den + 1) # swap + w_new_b[d] = (num_a + num_b, den + 1) # slide + else: + w_new_a[d] = (num_a + num_b, den + 1) # slide + w_new_b[d] = (num_a + num_a, den + 1) # swap + + w_l, w_u = ab_to_lu(w_new_a, w_new_b) + fl = fun(w_l) if which else v_fl[i] + fu = v_fu[i] if which else fun(w_u) + vw_a.append(w_new_a) + vw_b.append(w_new_b) + v_fl.append(fl) + v_fu.append(fu) + v_depth.append(v_depth[i] + 1) + if which: + if fl < fmin: + xmin, fmin = w_l, fl + else: + if fu < fmin: + xmin, fmin = w_u, fu + return new + + def split_rectangles(v_i): # returns new indices + v_new = [] + for i in v_i: + w_a = vw_a[i] + w_b = vw_b[i] + d = determine_longest(w_a, w_b) + + v_new.append(split_it(i, 0, w_a=w_a, w_b=w_b, d=d)) + if precision_met() or no_more_evals(): + break + v_new.append(split_it(i, 1, w_a=w_a, w_b=w_b, d=d)) + if precision_met() or no_more_evals(): + break + + assert len(vw_a) == n, "internal error: vw_a has invalid length" + assert len(vw_b) == n, "internal error: vw_b has invalid length" + assert len(v_fl) == n, "internal error: v_fl has invalid length" + assert len(v_fu) == n, "internal error: v_fu has invalid length" + assert len(v_depth) == n, "internal error: v_depth has invalid length" + return v_new + + def _arbitrary_subdivision(w_a, w_b, d): + # shrink the coordinates as if they were subdivided and a single + # subdivision was selected. which subdivision is chosen doesn't matter. + a_d = w_a[d] + b_d = w_b[d] + large = max(a_d[0], b_d[0]) + small = min(a_d[0], b_d[0]) + w_a[d] = (large * 2 - 0, a_d[1] + 1) + w_b[d] = (small * 2 + 1, b_d[1] + 1) + + def precompute_diagonals_by_limit(limit): + diags = [] + w_a = vw_a[0].copy() + w_b = vw_b[0].copy() + for depth in range(limit): + sq_dist = 0 + for a, b in zip(w_a, w_b): + delta = b[0] - a[0] + sq_dist += (delta * delta) << (2 * (limit - a[1])) + diags.append(sq_dist) + _arbitrary_subdivision(w_a, w_b, depth % dims) + return [F(diag) ** F(0.5) / F(1 << limit) for diag in diags] + + def precompute_diagonals_by_length(minlen): + diags, longests = [], [] + w_a = vw_a[0].copy() + w_b = vw_b[0].copy() + for depth in range(1_000_000): + bits = max(max(a[1], b[1]) for a, b in zip(w_a, w_b)) + sq_dist, longest = 0, 0 + for a, b in zip(w_a, w_b): + delta = b[0] - a[0] + sq_dist += (delta * delta) << (2 * (bits - a[1])) + longest = max(longest, abs(delta) << (bits - a[1])) + diag = F(sq_dist) ** F(0.5) / F(1 << bits) + if diag < minlen: + break + longest = F(longest) / F(1 << bits) + diags.append(diag) + longests.append(longest) + _arbitrary_subdivision(w_a, w_b, depth % dims) + return diags, longests + + diagonal_cache, longest_cache = precompute_diagonals_by_length(min_diag) + depth_limit = len(diagonal_cache) + if depth_limit <= 500: # prevents OverflowError: int too large to convert to float + diagonal_cache = precompute_diagonals_by_limit(depth_limit) + + for outer in range(1_000_000 if max_iters is None else max_iters): + if precision_met() or no_more_evals(): # check stopping conditions + break + + # perform the actual *di*viding *rect*angles algorithm: + v_potential = gather_potential(v_active) + v_new = split_rectangles(v_potential) + + for j in v_potential: + del v_active[j] # these were just subdivided a moment ago, so remove them + for j in v_new: + if v_depth[j] < depth_limit: # TODO: is checking this late wasting evals? + v_active[j] = True # these were just created, so add them + + tmin = [F(x[0]) / F(x[1]) for x in xmin] + argmin = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, tmin)] + return argmin, fmin + + +if __name__ == "__main__": + import numpy as np + + def objective2210(x): + # another linear transformation of the rosenbrock banana function. + assert len(x) > 1, len(x) + a, b = x[:-1], x[1:] + + a = a / 4.0 * 2 - 12 / 15 + b = b / 1.5 * 2 - 43 / 15 + # solution: 3.60 1.40 + + return ( + np.sum(100 * np.square(np.square(a) + b) + np.square(a - 1)) + / 499.0444444444444 + ) + + F = np.float64 + res = birect( + lambda a: objective2210(np.array(a, F)), + [0, 0], + [5, 5], + min_diag=F(1e-8 / 5), + max_evals=2_000, + F=F, + by_longest=True, + ) + print("", "birect result:", *res, sep="\n") + print("", "double checked:", objective2210(np.array(res[0], F)), sep="\n") diff --git a/hg.py b/hg.py new file mode 100644 index 0000000..67e5ed2 --- /dev/null +++ b/hg.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +# ars_lips_release3.py, but renamed, and fucked with. + +import numpy as np +import sys + + +def minimize( + objective, + init, + iterations, + sigma=1.0, + popsize=None, # FIXME: None isn't actually supported! + rate=1.0, + parties=None, + true_objective=None, + seed=None, + _flags=0, +): + F = lambda a: np.array(a, np.float64) + center = F(init) + central = objective(center) + evals = 1 + history = [] + prng = np.random.default_rng(seed) + + dims = len(center) + if parties is None: + parties = 1 if popsize is None else (popsize + dims + 1) // (dims + 2) + elif parties < (popsize + dims + 1) // (dims + 2): + print("WARNING: parties is too small for chosen popsize.", sys.stderr) + + def track(): + cost = central if true_objective is None else true_objective(center) + history.append(cost) + + track() + + offset = np.sqrt(3 / 2) + weight = np.sqrt(np.pi) / 6 + both = offset * weight + + # seems to perform better without, at least for the 1220-based heuristic. + magic_d = np.sqrt(3 / 2) if _flags & 1 else 1 # good? + magic_a = 1 / magic_d + 1 / 2 + magic_b = 1 / magic_d - 1 / 2 + + for i in range(iterations): + sigma_i = sigma(i) if callable(sigma) else sigma + rate_i = rate(i) if callable(rate) else rate + + if _flags & 2: # bad + assert not _flags & 16, f"flags 2 and 16 are incompatible ({_flags=})" + directions = prng.standard_normal((popsize, dims)) + elif _flags & 16: + assert not _flags & 2, f"flags 2 and 16 are incompatible ({_flags=})" + directions = prng.standard_normal((popsize, dims + 2)) + directions /= np.linalg.norm(directions, axis=1, keepdims=True) + directions = directions[:, :-2] # drop coordinate + else: + directions = np.zeros((parties * (dims + 2), dims)) + for i in range(parties): + new_dirs = np.linalg.qr(prng.standard_normal((dims + 2, dims + 2)))[0] + directions[i * (dims + 2) : (i + 1) * (dims + 2)] = new_dirs[:, :-2] + if popsize is not None: + directions = directions[:popsize] + + scale = np.sqrt(2) * sigma_i + deltas = scale * offset * directions + c0 = F([objective(center - delta) for delta in deltas]) - central + c1 = F([objective(center + delta) for delta in deltas]) - central + if _flags & 8: + l0 = np.abs(c1 * magic_a + c0 * magic_b) + l1 = np.abs(c1 * magic_b + c0 * magic_a) + peak = (l0 + l1) / 2 + else: + # peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0)) + # peak *= np.sqrt(np.pi) / 5 + l0 = np.square(c1 * magic_a + c0 * magic_b) + l1 = np.square(c1 * magic_b + c0 * magic_a) + peak = np.sqrt((l0 + l1) / 2) + peak[peak == 0.0] = 1.0 + difference = ((c1 - c0) / peak)[:, None] + + evals += len(deltas) * 2 + + # gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma_i / parties) + # gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma_i * sigma_i / parties + # gradscale = 1 / (np.sqrt(8) * parties) + # print(gradscale, 0.35355339059327373) + dd = directions * difference + if _flags & 4: + gradscale = 1 / (np.sqrt(8) * parties) + step = sigma_i * gradscale * np.sum(dd, axis=0) + else: + # NOTE: this is a different scale, but i figure it should work better: + step = sigma_i / np.sqrt(12) / parties * np.sum(dd, axis=0) + + center = center - rate_i * step + central = objective(center) + evals += 1 + + track() + + # print("popsize:", popsize) + # print("parties:", parties) + # print("evals:", evals) + + return center, history + + +if __name__ == "__main__": + # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py + from hipsterplot import plot + + prng = np.random.default_rng(42070) + + problem_size = 100 + rotation, _ = np.linalg.qr(prng.standard_normal((problem_size, problem_size))) + init = prng.uniform(-5, 5, problem_size) + + def ellipsoid_problem(x): + return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x)) + + def rotated_problem(x): + return ellipsoid_problem(rotation @ x) + + def noisy_problem(x): + multiplicative_noise = prng.uniform(0.707, 1.414) + additive_noise = np.abs(prng.normal(scale=50000)) + return rotated_problem(x) * multiplicative_noise + additive_noise + + objective, true_objective = noisy_problem, rotated_problem + # optimized, history = minimize( + # objective, init, 1000, sigma=0.3, true_objective=true_objective + # ) + optimized, history = minimize( + objective, + init, + 1000, + sigma=2.0, + popsize=10, + true_objective=true_objective, + seed=42069, + # _flags=13, + ) + + print(" " * 11 + "plot of log10-losses over time") + plot(np.log10(history), num_y_chars=23) + + print("loss, before optimization: {:9.6f}".format(true_objective(init))) + print("loss, after optimization: {:9.6f}".format(true_objective(optimized))) diff --git a/notwacube.py b/notwacube.py index 355b14b..97e42bb 100644 --- a/notwacube.py +++ b/notwacube.py @@ -1,4 +1,5 @@ from dlibcube2 import dlib_cube +from notwacube2 import make_birect, make_mercury, make_soo from nloptcube2 import nlopt_neldermead_cube from randomcube2 import another_random_cube, quasirandom_cube from scipycube2 import ( @@ -29,6 +30,8 @@ from scipycube2 import ( # scipy_trustncg_cube, ) +import tinytweaks as tt + BASELINE_OPTIMIZERS = [ another_random_cube, nlopt_neldermead_cube, @@ -42,6 +45,15 @@ SHGO_OPTIMIZERS = [ make_shgo(method="slsqp", mei=True), ] +NOTWA_OPTIMIZERS = [ + make_birect(deepness=38, longest=True, pruning=False), + make_mercury(13, isigma=tt.S3), + make_mercury(29, isigma=tt.S3), + make_soo(deepness=23, K=4), + make_soo(deepness=35, K=3), + make_soo(deepness=53, K=2), +] + def collect_everything(): G = globals().values() diff --git a/notwacube2.py b/notwacube2.py new file mode 100644 index 0000000..d688896 --- /dev/null +++ b/notwacube2.py @@ -0,0 +1,103 @@ +from utils import wrap_untrustworthy, check, final +import numpy as np +import tinytweaks as tt + + +def make_birect(deepness=23, *, longest=False, pruning=False): + from birect import birect + + def f(objective, n_trials, n_dim, with_count): + feval_count = 0 + + def _objective(x): + nonlocal feval_count + feval_count += 1 + return objective(x) + + xopt, fopt = birect( + _objective, + (0,) * n_dim, + (1,) * n_dim, + min_diag=2.0 ** -float(deepness), + max_evals=n_trials, + by_longest=longest, + pruning=pruning, + F=np.float64, + ) + return (fopt, xopt, feval_count) if with_count else (fopt, xopt) + + name = f"birect{deepness:02}" + name += "_longest" if longest else "" + name += "_pruning" if pruning else "" + f.__name__ = name + "_cube" + return f + + +def make_soo(deepness=None, *, K=3): + if deepness is None: + deepness = int(31 * np.log(2) / np.log(K) - 1e-8) + assert K >= 2 + from soo import soo + + def f(objective, n_trials, n_dim, with_count): + feval_count = 0 + + def _objective(x): + nonlocal feval_count + feval_count += 1 + return objective(x) + + xopt, history = soo( + _objective, np.full(n_dim, 0.5), 0.5, n_trials, K=K, h_max=deepness + ) + fopt = history[-1] + return (fopt, xopt, feval_count) if with_count else (fopt, xopt) + + name = f"soo{deepness}_k{K}" + f.__name__ = name + "_cube" + return f + + +def make_mercury( + flags, bounding="clip", *, isigma=tt.IV, popsize=2, irate=1, seed=None +): + from hg import minimize as hg + + def f(objective, n_trials, n_dim, with_count): + _objective = wrap_untrustworthy(objective, n_trials, bounding=bounding) + + init = (0.5,) * n_dim + iterations = (n_trials - 1 + popsize * 2) // (popsize * 2 + 1) + center, history = hg( + _objective, + init, + iterations, + sigma=isigma if callable(isigma) else 1 / isigma, + popsize=popsize, + rate=irate if callable(irate) else 1 / irate, + seed=seed, + _flags=flags, + ) + + # use our remaining evals on something, anything. + feval_count = _objective(check) + wasted = max(0, -(feval_count - n_trials)) + if wasted: + print(f"wasting {wasted} of {n_trials} evaluations") + for _ in range(wasted): + prng = np.random.default_rng() + x = prng.uniform(size=n_dim) + fx = _objective(x) + + fopt, xopt, feval_count = _objective(final) + return (fopt, xopt, feval_count) if with_count else (fopt, xopt) + + name = f"hg{flags:02}" + name += f"_{bounding}" if bounding != "clip" else "" + name += f"_ps{popsize:02}" if popsize != 2 else "" + if isigma != tt.IV: + name += f"_is{isigma.__name__}" if callable(isigma) else f"_is{isigma:02}" + if irate != 1: + name += f"_ir{irate.__name__}" if callable(irate) else f"_ir{irate:02}" + f.__name__ = name + "_cube" + return f diff --git a/soo.py b/soo.py new file mode 100644 index 0000000..73136c3 --- /dev/null +++ b/soo.py @@ -0,0 +1,145 @@ +#!/usr/bin/env python3 +from collections import namedtuple +from queue import PriorityQueue +import numpy as np + +Result = namedtuple("Result", ["h", "p", "value"]) + + +def checked_fma(x, m, a): + # assumes x, m, and a are integers. + # assumes m >= 1 and a >= 0. + y = x * m + assert y >= x, "integer overflow" + z = y + a + assert z >= y, "integer overflow" + return z + + +# Simultaneous Optimistic Optimization +def soo(obj, origin, sigma, evals, K=3, h_max=None, iters=None, _adapt=None): + assert K > 1, "K must be at least 2" + + origin = np.array(origin, float) + dims = len(origin) + h_max = int(31 * np.log(2) / np.log(K) - 1e-8) if h_max is None else h_max + + # Tree (coordinates) + Ts = [PriorityQueue() for _ in range(h_max + 1)] + + def coords(h, p): + return (np.array(p, int) + 0.5) * (sigma * 2) / K ** np.array(h, int) - sigma + + samples = 0 + + def query(h, p): + nonlocal samples + depth = np.max(h) + value = obj(coords(h, p) + origin) + Ts[depth].put((float(value), samples, list(h), list(p))) + samples += 1 + return Result(h, p, value) + + # kickstart with middle point at the root node. + best_ever = query([0] * dims, [0] * dims) + + history, stopping = [], False + for t in range(1_000_000 if iters is None else iters): + best = np.inf + lim = h_max if _adapt is None else min(_adapt(t), h_max) + for depth in range(lim): + if Ts[depth].empty(): + continue + + # select a node at this depth. + tup = Ts[depth].get(block=False) + value, _, h, p = tup + if value > best: + Ts[depth].put(tup, block=False) + continue + best = value + + # split this node, potentially increasing its overall depth. + inc = 0 + for j in reversed(range(1, dims)): + if h[j] < h[j - 1]: + inc = j + + if K & 1 == 1: # when odd, + # maintain center point but split it differently next time. + h[inc] += 1 + p[inc] = checked_fma(p[inc], K, K // 2) + + new_depth = np.max(h) + Ts[new_depth].put(tup, block=False) + + for i in range(K): + if i == K // 2: + continue + p_new = p.copy() + p_new[inc] += i - K // 2 + candidate = query(h.copy(), p_new) + if candidate.value < best_ever.value: + best_ever = candidate + if samples >= evals: + stopping = True + break + + else: # when even, + # discard center point and only use its subdivisions. + for i in range(K): + h_new, p_new = h.copy(), p.copy() + h_new[inc] += 1 + p_new[inc] = checked_fma(p_new[inc], K, i) + candidate = query(h_new, p_new) + if candidate.value < best_ever.value: + best_ever = candidate + if samples >= evals: + stopping = True + break + + if stopping: + break + + history.append(best_ever.value) + if stopping: + break + + h, p, _ = best_ever + + return coords(h, p) + origin, history + + +if __name__ == "__main__": + # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py + from hipsterplot import plot + import numpy as np + + def objective2210(x): + # another linear transformation of the rosenbrock banana function. + assert len(x) > 1, len(x) + a, b = x[:-1], x[1:] + + a = a / 4.0 * 2 - 12 / 15 + b = b / 1.5 * 2 - 43 / 15 + # solution: 3.60 1.40 + + return ( + np.sum(100 * np.square(np.square(a) + b) + np.square(a - 1)) + / 499.0444444444444 + ) + + optimized, history = soo( + objective2210, + origin=np.array([2.5, 2.5]), + sigma=np.array([2.5, 2.5]), + evals=2_000, + # K=3, + # h_max=19, # should be equivalent-ish to min_diag=1e-8 in birect.py + K=2, + h_max=27, # should be equivalent-ish to min_diag=1e-8 in birect.py + ) + print(" " * 11 + "plot of log10-losses over time") + plot(np.log10(history), num_y_chars=23) + print("", "soo result:", list(optimized), history[-1], sep="\n") + print("", "double checked:", objective2210(optimized), sep="\n") diff --git a/tinytweaks.py b/tinytweaks.py new file mode 100644 index 0000000..5313929 --- /dev/null +++ b/tinytweaks.py @@ -0,0 +1,51 @@ +import numpy as np + + +def IV(i): + fib = (0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597) + inv = tuple(range(-2, 16)) + return 1 / max(3, max(b for a, b in zip(fib, inv) if i >= a)) + + +def T2(i): + return IV(i) * 2 + + +def T3(i): + return IV(i) * 3 + + +def D2(i): + return IV(i) / 2 + + +def D3(i): + return IV(i) / 3 + + +def P3(i): + return IV(i) ** 3 + + +def X3(i): + return i * 3 + + +def C2(i): # crude workaround for my own broken interface + return 2.0 + + +def H2(i): # throwaway + return 0.5 if i >= 65 // 5 else 1.0 + + +def II(i): + return 1 / (i + 1) + + +def S2(i): + return 1 / (2 * np.sqrt(i + 2)) + + +def S3(i): + return 1 / (3 * np.sqrt(i + 3))