add my own optimizers (birect, soo, mercury)
This commit is contained in:
parent
eaa6c5de04
commit
970260783d
6 changed files with 823 additions and 0 deletions
360
birect.py
Normal file
360
birect.py
Normal file
|
@ -0,0 +1,360 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
# Copyright (C) 2022 Connor Olding
|
||||
|
||||
# Permission to use, copy, modify, and/or distribute this software for any
|
||||
# purpose with or without fee is hereby granted, provided that the above
|
||||
# copyright notice and this permission notice appear in all copies.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
|
||||
def birect(
|
||||
obj, # objective function to find the minimum value of
|
||||
lo, # list of lower bounds for each problem dimension
|
||||
hi, # list of upper bounds for each problem dimension
|
||||
*,
|
||||
min_diag, # never subdivide hyper-rectangles below this length
|
||||
min_error=None, # exit when the objective function achieves this error
|
||||
max_evals=None, # exit when the objective function has been run this many times
|
||||
max_iters=None, # exit when the optimization procedure iterates this many times
|
||||
by_longest=False, # measure by rects by longest edge instead of their diagonal
|
||||
pruning=0,
|
||||
F=float, # can be float, decimal.Decimal, numpy.float64, or anything float-like
|
||||
):
|
||||
assert len(lo) == len(hi), "dimensional mismatch"
|
||||
|
||||
assert not (
|
||||
min_error is None and max_evals is None and max_iters is None
|
||||
), "at least one stopping condition must be specified"
|
||||
|
||||
# variables prefixed with v_ are to be one-dimensional vectors. [a, b, c]
|
||||
# variables prefixed with w_ are to be one-dimensional vectors of pairs. [a, b]
|
||||
# variables prefixed with vw_ are to be two-dimensional vectors. [[a, b], [c, d]]
|
||||
# aside: xmin should actually be called v_xmin, but it's not!
|
||||
|
||||
def fun(w_t):
|
||||
# final conversion from exact fraction to possibly-inexact F-type:
|
||||
v_x = [F(num) / F(den) for num, den in w_t]
|
||||
# linearly interpolate within the bounds of the function:
|
||||
v_x = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, v_x)]
|
||||
res = obj(v_x)
|
||||
return res
|
||||
|
||||
def ab_to_lu(w_a, w_b): # converts corner points to midpoints, also denominators
|
||||
# (2 * a + b) / (den * 3) = point inbetween corner "a" and the center
|
||||
w_l = [(a[0] + a[0] + b[0], (1 << a[1]) * 3) for a, b in zip(w_a, w_b)]
|
||||
# (a + 2 * b) / (den * 3) = point inbetween corner "b" and the center
|
||||
w_u = [(a[0] + b[0] + b[0], (1 << b[1]) * 3) for a, b in zip(w_a, w_b)]
|
||||
return w_l, w_u
|
||||
|
||||
dims = len(lo) # already asserted that len(lo) == len(hi)
|
||||
|
||||
# initial corner points:
|
||||
# note that the denominators are encoded as the exponents of a power of two.
|
||||
# therefore, coordinate = pair[0] / (1 << pair[1]).
|
||||
w_a, w_b = [(0, 0)] * dims, [(1, 0)] * dims
|
||||
|
||||
# initial points halfway between the each of the two corners and the center:
|
||||
# note that the denominators are identity here.
|
||||
# therefore, coordinate = pair[0] / pair[1].
|
||||
w_l, w_u = ab_to_lu(w_a, w_b)
|
||||
|
||||
# initial function evaluations:
|
||||
fl = fun(w_l)
|
||||
fu = fun(w_u)
|
||||
|
||||
# initial minimum of all evaluated points so far:
|
||||
if fl <= fu:
|
||||
xmin, fmin = w_l, fl
|
||||
else:
|
||||
xmin, fmin = w_u, fu
|
||||
|
||||
# construct lists to hold all sample coordinates and their values:
|
||||
vw_a, vw_b = [w_a], [w_b]
|
||||
v_fl = [fl] # remember that "l" and "u" are arbitrary shorthand used by the paper,
|
||||
v_fu = [fu] # and one isn't necessarily above or below the other.
|
||||
v_active = {0: True} # indices of hyper-rectangles that have yet to be subdivided
|
||||
v_depth = [0] # increments every split
|
||||
n = 1 # how many indices are in use
|
||||
|
||||
del w_a, w_b, w_l, w_u, fl, fu # prevent accidental re-use
|
||||
|
||||
def precision_met(): # returns True when the optimization procedure should exit
|
||||
return min_error is not None and fmin <= min_error
|
||||
|
||||
def no_more_evals(): # returns True when the optimization procedure should exit
|
||||
return max_evals is not None and n + 1 >= max_evals
|
||||
|
||||
def gather_potential(v_i):
|
||||
# crappy algorithm for finding the convex hull of a line plot where
|
||||
# the x axis is the diameter of hyper-rectangle, and
|
||||
# the y axis is the minimum loss of the two points (v_fl, v_fu) within it.
|
||||
|
||||
# start by finding the arg-minimum for each set of equal-diameter rects.
|
||||
# TODO: make this faster. use a sorted queue and peek at the best for each depth.
|
||||
bests = {} # mapping of depth to arg-minimum value (i.e. its index)
|
||||
for i in v_i:
|
||||
fl, fu = v_fl[i], v_fu[i]
|
||||
f = min(fl, fu)
|
||||
depth = v_depth[i]
|
||||
if by_longest:
|
||||
depth = depth // dims * dims
|
||||
# assert depth < depth_limit
|
||||
best = bests.get(depth)
|
||||
# TODO: handle f == best case.
|
||||
if best is None or f < best[1]:
|
||||
bests[depth] = (i, f)
|
||||
|
||||
if len(bests) == 1: # nothing to compare it to
|
||||
return [i for i, f in bests.values()]
|
||||
|
||||
asc = sorted(bests.items(), key=lambda t: -t[0]) # sort by length, ascending
|
||||
|
||||
# first, remove points that slope downwards.
|
||||
# this yields a pareto front, which isn't necessarily convex.
|
||||
old = asc
|
||||
new = [old[-1]]
|
||||
smallest = old[-1][1][1]
|
||||
for i in reversed(range(len(old) - 1)):
|
||||
f = old[i][1][1]
|
||||
if f <= smallest:
|
||||
smallest = f
|
||||
new.append(old[i])
|
||||
new = new[::-1]
|
||||
|
||||
# second, convert depths to lengths.
|
||||
if by_longest: # TODO: does this branch make any difference?
|
||||
new = [(longest_cache[depth],) + t for depth, t in new]
|
||||
else:
|
||||
new = [(diagonal_cache[depth],) + t for depth, t in new]
|
||||
|
||||
# third, remove points that fall under a previous slope.
|
||||
old = new
|
||||
skip = [False] * len(old)
|
||||
for i in range(len(old)):
|
||||
if skip[i]:
|
||||
continue
|
||||
len0, i0, f0 = old[i]
|
||||
smallest_slope = None
|
||||
for j in range(i + 1, len(old)):
|
||||
if skip[j]:
|
||||
continue
|
||||
len1, i1, f1 = old[j]
|
||||
num = f1 - f0
|
||||
den = len1 - len0
|
||||
# this factor of 3/2 comes from the denominator;
|
||||
# each length should be multiplied by 2/3:
|
||||
# the furthest relative distance from a corner to a center point.
|
||||
slope = num / den # * F(3 / 2)
|
||||
if smallest_slope is None:
|
||||
smallest_slope = slope
|
||||
elif slope < smallest_slope:
|
||||
for k in range(i + 1, j):
|
||||
skip[k] = True
|
||||
smallest_slope = slope
|
||||
|
||||
new = [entry for entry, skipping in zip(old, skip) if not skipping]
|
||||
|
||||
if pruning:
|
||||
v_f = sorted(min(fl, fu) for fl, fu in zip(v_fl, v_fu))
|
||||
fmedian = v_f[len(v_f) // 2]
|
||||
|
||||
offset = fmin - pruning * (fmedian - fmin)
|
||||
start = 0
|
||||
K_slope = None
|
||||
for i in range(len(new)):
|
||||
len0, i0, f0 = new[i]
|
||||
new_slope = (f0 - offset) / len0
|
||||
if K_slope is None or new_slope < K_slope:
|
||||
K_slope = new_slope
|
||||
start = i
|
||||
new = new[start:]
|
||||
|
||||
return [i for len, i, f in new]
|
||||
|
||||
def determine_longest(w_a, w_b):
|
||||
# the index of the dimension is used as a tie-breaker (considered longer).
|
||||
# e.g. a box has lengths (2, 3, 3). the index returned is then 1.
|
||||
# TODO: alternate way of stating that comment: biased towards smaller indices.
|
||||
longest = 0
|
||||
|
||||
invlen = None
|
||||
for i, (a, b) in enumerate(zip(w_a, w_b)):
|
||||
den_a = 1 << a[1]
|
||||
den_b = 1 << b[1]
|
||||
den = max(den_a, den_b) # TODO: always the same, right?
|
||||
if invlen is None or den < invlen:
|
||||
invlen = den
|
||||
longest = i
|
||||
|
||||
return longest
|
||||
|
||||
# Hyper-rectangle subdivision demonstration: (2D example)
|
||||
#
|
||||
# Initial Split Once Split Twice Split Both
|
||||
# ↓b ↓b a↓b a↓
|
||||
# ┌───────────┐←b ┌─────╥─────┐ ┌─────╥─────┐ ┌─────╥─────┐←a
|
||||
# │ │ │ ║ │ │ l ║ │ │ l ║ l │
|
||||
# │ ① u │ → │ u ② ║ u │ → │ u ③ ║ u │ → │ u ║ u │
|
||||
# │ │ │ ║ │ b→╞═════╣←a │ b→╞═════╬═════╡←a
|
||||
# │ l │ → │ l ║ l │ → │ l ║ l │ → │ l ║ l │
|
||||
# │ │ │ ║ │ │ u ║ │ │ u ║ u │
|
||||
# a→└───────────┘ └─────╨─────┘ b→└─────╨─────┘ b→└─────╨─────┘
|
||||
# ↑a ↑a ↑a ↑b
|
||||
|
||||
def split_it(i, which, *, w_a, w_b, d):
|
||||
nonlocal n, xmin, fmin
|
||||
new, n = n, n + 1 # designate an index for the new hyper-rectangle
|
||||
w_new_a = w_a.copy()
|
||||
w_new_b = w_b.copy()
|
||||
|
||||
den = w_a[d][1] # should be equal to w_b[d][1] as well
|
||||
num_a = w_a[d][0]
|
||||
num_b = w_b[d][0]
|
||||
if which:
|
||||
w_new_a[d] = (num_b + num_b, den + 1) # swap
|
||||
w_new_b[d] = (num_a + num_b, den + 1) # slide
|
||||
else:
|
||||
w_new_a[d] = (num_a + num_b, den + 1) # slide
|
||||
w_new_b[d] = (num_a + num_a, den + 1) # swap
|
||||
|
||||
w_l, w_u = ab_to_lu(w_new_a, w_new_b)
|
||||
fl = fun(w_l) if which else v_fl[i]
|
||||
fu = v_fu[i] if which else fun(w_u)
|
||||
vw_a.append(w_new_a)
|
||||
vw_b.append(w_new_b)
|
||||
v_fl.append(fl)
|
||||
v_fu.append(fu)
|
||||
v_depth.append(v_depth[i] + 1)
|
||||
if which:
|
||||
if fl < fmin:
|
||||
xmin, fmin = w_l, fl
|
||||
else:
|
||||
if fu < fmin:
|
||||
xmin, fmin = w_u, fu
|
||||
return new
|
||||
|
||||
def split_rectangles(v_i): # returns new indices
|
||||
v_new = []
|
||||
for i in v_i:
|
||||
w_a = vw_a[i]
|
||||
w_b = vw_b[i]
|
||||
d = determine_longest(w_a, w_b)
|
||||
|
||||
v_new.append(split_it(i, 0, w_a=w_a, w_b=w_b, d=d))
|
||||
if precision_met() or no_more_evals():
|
||||
break
|
||||
v_new.append(split_it(i, 1, w_a=w_a, w_b=w_b, d=d))
|
||||
if precision_met() or no_more_evals():
|
||||
break
|
||||
|
||||
assert len(vw_a) == n, "internal error: vw_a has invalid length"
|
||||
assert len(vw_b) == n, "internal error: vw_b has invalid length"
|
||||
assert len(v_fl) == n, "internal error: v_fl has invalid length"
|
||||
assert len(v_fu) == n, "internal error: v_fu has invalid length"
|
||||
assert len(v_depth) == n, "internal error: v_depth has invalid length"
|
||||
return v_new
|
||||
|
||||
def _arbitrary_subdivision(w_a, w_b, d):
|
||||
# shrink the coordinates as if they were subdivided and a single
|
||||
# subdivision was selected. which subdivision is chosen doesn't matter.
|
||||
a_d = w_a[d]
|
||||
b_d = w_b[d]
|
||||
large = max(a_d[0], b_d[0])
|
||||
small = min(a_d[0], b_d[0])
|
||||
w_a[d] = (large * 2 - 0, a_d[1] + 1)
|
||||
w_b[d] = (small * 2 + 1, b_d[1] + 1)
|
||||
|
||||
def precompute_diagonals_by_limit(limit):
|
||||
diags = []
|
||||
w_a = vw_a[0].copy()
|
||||
w_b = vw_b[0].copy()
|
||||
for depth in range(limit):
|
||||
sq_dist = 0
|
||||
for a, b in zip(w_a, w_b):
|
||||
delta = b[0] - a[0]
|
||||
sq_dist += (delta * delta) << (2 * (limit - a[1]))
|
||||
diags.append(sq_dist)
|
||||
_arbitrary_subdivision(w_a, w_b, depth % dims)
|
||||
return [F(diag) ** F(0.5) / F(1 << limit) for diag in diags]
|
||||
|
||||
def precompute_diagonals_by_length(minlen):
|
||||
diags, longests = [], []
|
||||
w_a = vw_a[0].copy()
|
||||
w_b = vw_b[0].copy()
|
||||
for depth in range(1_000_000):
|
||||
bits = max(max(a[1], b[1]) for a, b in zip(w_a, w_b))
|
||||
sq_dist, longest = 0, 0
|
||||
for a, b in zip(w_a, w_b):
|
||||
delta = b[0] - a[0]
|
||||
sq_dist += (delta * delta) << (2 * (bits - a[1]))
|
||||
longest = max(longest, abs(delta) << (bits - a[1]))
|
||||
diag = F(sq_dist) ** F(0.5) / F(1 << bits)
|
||||
if diag < minlen:
|
||||
break
|
||||
longest = F(longest) / F(1 << bits)
|
||||
diags.append(diag)
|
||||
longests.append(longest)
|
||||
_arbitrary_subdivision(w_a, w_b, depth % dims)
|
||||
return diags, longests
|
||||
|
||||
diagonal_cache, longest_cache = precompute_diagonals_by_length(min_diag)
|
||||
depth_limit = len(diagonal_cache)
|
||||
if depth_limit <= 500: # prevents OverflowError: int too large to convert to float
|
||||
diagonal_cache = precompute_diagonals_by_limit(depth_limit)
|
||||
|
||||
for outer in range(1_000_000 if max_iters is None else max_iters):
|
||||
if precision_met() or no_more_evals(): # check stopping conditions
|
||||
break
|
||||
|
||||
# perform the actual *di*viding *rect*angles algorithm:
|
||||
v_potential = gather_potential(v_active)
|
||||
v_new = split_rectangles(v_potential)
|
||||
|
||||
for j in v_potential:
|
||||
del v_active[j] # these were just subdivided a moment ago, so remove them
|
||||
for j in v_new:
|
||||
if v_depth[j] < depth_limit: # TODO: is checking this late wasting evals?
|
||||
v_active[j] = True # these were just created, so add them
|
||||
|
||||
tmin = [F(x[0]) / F(x[1]) for x in xmin]
|
||||
argmin = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, tmin)]
|
||||
return argmin, fmin
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import numpy as np
|
||||
|
||||
def objective2210(x):
|
||||
# another linear transformation of the rosenbrock banana function.
|
||||
assert len(x) > 1, len(x)
|
||||
a, b = x[:-1], x[1:]
|
||||
|
||||
a = a / 4.0 * 2 - 12 / 15
|
||||
b = b / 1.5 * 2 - 43 / 15
|
||||
# solution: 3.60 1.40
|
||||
|
||||
return (
|
||||
np.sum(100 * np.square(np.square(a) + b) + np.square(a - 1))
|
||||
/ 499.0444444444444
|
||||
)
|
||||
|
||||
F = np.float64
|
||||
res = birect(
|
||||
lambda a: objective2210(np.array(a, F)),
|
||||
[0, 0],
|
||||
[5, 5],
|
||||
min_diag=F(1e-8 / 5),
|
||||
max_evals=2_000,
|
||||
F=F,
|
||||
by_longest=True,
|
||||
)
|
||||
print("", "birect result:", *res, sep="\n")
|
||||
print("", "double checked:", objective2210(np.array(res[0], F)), sep="\n")
|
152
hg.py
Normal file
152
hg.py
Normal file
|
@ -0,0 +1,152 @@
|
|||
#!/usr/bin/env python3
|
||||
# ars_lips_release3.py, but renamed, and fucked with.
|
||||
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
|
||||
def minimize(
|
||||
objective,
|
||||
init,
|
||||
iterations,
|
||||
sigma=1.0,
|
||||
popsize=None, # FIXME: None isn't actually supported!
|
||||
rate=1.0,
|
||||
parties=None,
|
||||
true_objective=None,
|
||||
seed=None,
|
||||
_flags=0,
|
||||
):
|
||||
F = lambda a: np.array(a, np.float64)
|
||||
center = F(init)
|
||||
central = objective(center)
|
||||
evals = 1
|
||||
history = []
|
||||
prng = np.random.default_rng(seed)
|
||||
|
||||
dims = len(center)
|
||||
if parties is None:
|
||||
parties = 1 if popsize is None else (popsize + dims + 1) // (dims + 2)
|
||||
elif parties < (popsize + dims + 1) // (dims + 2):
|
||||
print("WARNING: parties is too small for chosen popsize.", sys.stderr)
|
||||
|
||||
def track():
|
||||
cost = central if true_objective is None else true_objective(center)
|
||||
history.append(cost)
|
||||
|
||||
track()
|
||||
|
||||
offset = np.sqrt(3 / 2)
|
||||
weight = np.sqrt(np.pi) / 6
|
||||
both = offset * weight
|
||||
|
||||
# seems to perform better without, at least for the 1220-based heuristic.
|
||||
magic_d = np.sqrt(3 / 2) if _flags & 1 else 1 # good?
|
||||
magic_a = 1 / magic_d + 1 / 2
|
||||
magic_b = 1 / magic_d - 1 / 2
|
||||
|
||||
for i in range(iterations):
|
||||
sigma_i = sigma(i) if callable(sigma) else sigma
|
||||
rate_i = rate(i) if callable(rate) else rate
|
||||
|
||||
if _flags & 2: # bad
|
||||
assert not _flags & 16, f"flags 2 and 16 are incompatible ({_flags=})"
|
||||
directions = prng.standard_normal((popsize, dims))
|
||||
elif _flags & 16:
|
||||
assert not _flags & 2, f"flags 2 and 16 are incompatible ({_flags=})"
|
||||
directions = prng.standard_normal((popsize, dims + 2))
|
||||
directions /= np.linalg.norm(directions, axis=1, keepdims=True)
|
||||
directions = directions[:, :-2] # drop coordinate
|
||||
else:
|
||||
directions = np.zeros((parties * (dims + 2), dims))
|
||||
for i in range(parties):
|
||||
new_dirs = np.linalg.qr(prng.standard_normal((dims + 2, dims + 2)))[0]
|
||||
directions[i * (dims + 2) : (i + 1) * (dims + 2)] = new_dirs[:, :-2]
|
||||
if popsize is not None:
|
||||
directions = directions[:popsize]
|
||||
|
||||
scale = np.sqrt(2) * sigma_i
|
||||
deltas = scale * offset * directions
|
||||
c0 = F([objective(center - delta) for delta in deltas]) - central
|
||||
c1 = F([objective(center + delta) for delta in deltas]) - central
|
||||
if _flags & 8:
|
||||
l0 = np.abs(c1 * magic_a + c0 * magic_b)
|
||||
l1 = np.abs(c1 * magic_b + c0 * magic_a)
|
||||
peak = (l0 + l1) / 2
|
||||
else:
|
||||
# peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0))
|
||||
# peak *= np.sqrt(np.pi) / 5
|
||||
l0 = np.square(c1 * magic_a + c0 * magic_b)
|
||||
l1 = np.square(c1 * magic_b + c0 * magic_a)
|
||||
peak = np.sqrt((l0 + l1) / 2)
|
||||
peak[peak == 0.0] = 1.0
|
||||
difference = ((c1 - c0) / peak)[:, None]
|
||||
|
||||
evals += len(deltas) * 2
|
||||
|
||||
# gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma_i / parties)
|
||||
# gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma_i * sigma_i / parties
|
||||
# gradscale = 1 / (np.sqrt(8) * parties)
|
||||
# print(gradscale, 0.35355339059327373)
|
||||
dd = directions * difference
|
||||
if _flags & 4:
|
||||
gradscale = 1 / (np.sqrt(8) * parties)
|
||||
step = sigma_i * gradscale * np.sum(dd, axis=0)
|
||||
else:
|
||||
# NOTE: this is a different scale, but i figure it should work better:
|
||||
step = sigma_i / np.sqrt(12) / parties * np.sum(dd, axis=0)
|
||||
|
||||
center = center - rate_i * step
|
||||
central = objective(center)
|
||||
evals += 1
|
||||
|
||||
track()
|
||||
|
||||
# print("popsize:", popsize)
|
||||
# print("parties:", parties)
|
||||
# print("evals:", evals)
|
||||
|
||||
return center, history
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
|
||||
from hipsterplot import plot
|
||||
|
||||
prng = np.random.default_rng(42070)
|
||||
|
||||
problem_size = 100
|
||||
rotation, _ = np.linalg.qr(prng.standard_normal((problem_size, problem_size)))
|
||||
init = prng.uniform(-5, 5, problem_size)
|
||||
|
||||
def ellipsoid_problem(x):
|
||||
return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x))
|
||||
|
||||
def rotated_problem(x):
|
||||
return ellipsoid_problem(rotation @ x)
|
||||
|
||||
def noisy_problem(x):
|
||||
multiplicative_noise = prng.uniform(0.707, 1.414)
|
||||
additive_noise = np.abs(prng.normal(scale=50000))
|
||||
return rotated_problem(x) * multiplicative_noise + additive_noise
|
||||
|
||||
objective, true_objective = noisy_problem, rotated_problem
|
||||
# optimized, history = minimize(
|
||||
# objective, init, 1000, sigma=0.3, true_objective=true_objective
|
||||
# )
|
||||
optimized, history = minimize(
|
||||
objective,
|
||||
init,
|
||||
1000,
|
||||
sigma=2.0,
|
||||
popsize=10,
|
||||
true_objective=true_objective,
|
||||
seed=42069,
|
||||
# _flags=13,
|
||||
)
|
||||
|
||||
print(" " * 11 + "plot of log10-losses over time")
|
||||
plot(np.log10(history), num_y_chars=23)
|
||||
|
||||
print("loss, before optimization: {:9.6f}".format(true_objective(init)))
|
||||
print("loss, after optimization: {:9.6f}".format(true_objective(optimized)))
|
12
notwacube.py
12
notwacube.py
|
@ -1,4 +1,5 @@
|
|||
from dlibcube2 import dlib_cube
|
||||
from notwacube2 import make_birect, make_mercury, make_soo
|
||||
from nloptcube2 import nlopt_neldermead_cube
|
||||
from randomcube2 import another_random_cube, quasirandom_cube
|
||||
from scipycube2 import (
|
||||
|
@ -29,6 +30,8 @@ from scipycube2 import (
|
|||
# scipy_trustncg_cube,
|
||||
)
|
||||
|
||||
import tinytweaks as tt
|
||||
|
||||
BASELINE_OPTIMIZERS = [
|
||||
another_random_cube,
|
||||
nlopt_neldermead_cube,
|
||||
|
@ -42,6 +45,15 @@ SHGO_OPTIMIZERS = [
|
|||
make_shgo(method="slsqp", mei=True),
|
||||
]
|
||||
|
||||
NOTWA_OPTIMIZERS = [
|
||||
make_birect(deepness=38, longest=True, pruning=False),
|
||||
make_mercury(13, isigma=tt.S3),
|
||||
make_mercury(29, isigma=tt.S3),
|
||||
make_soo(deepness=23, K=4),
|
||||
make_soo(deepness=35, K=3),
|
||||
make_soo(deepness=53, K=2),
|
||||
]
|
||||
|
||||
|
||||
def collect_everything():
|
||||
G = globals().values()
|
||||
|
|
103
notwacube2.py
Normal file
103
notwacube2.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
from utils import wrap_untrustworthy, check, final
|
||||
import numpy as np
|
||||
import tinytweaks as tt
|
||||
|
||||
|
||||
def make_birect(deepness=23, *, longest=False, pruning=False):
|
||||
from birect import birect
|
||||
|
||||
def f(objective, n_trials, n_dim, with_count):
|
||||
feval_count = 0
|
||||
|
||||
def _objective(x):
|
||||
nonlocal feval_count
|
||||
feval_count += 1
|
||||
return objective(x)
|
||||
|
||||
xopt, fopt = birect(
|
||||
_objective,
|
||||
(0,) * n_dim,
|
||||
(1,) * n_dim,
|
||||
min_diag=2.0 ** -float(deepness),
|
||||
max_evals=n_trials,
|
||||
by_longest=longest,
|
||||
pruning=pruning,
|
||||
F=np.float64,
|
||||
)
|
||||
return (fopt, xopt, feval_count) if with_count else (fopt, xopt)
|
||||
|
||||
name = f"birect{deepness:02}"
|
||||
name += "_longest" if longest else ""
|
||||
name += "_pruning" if pruning else ""
|
||||
f.__name__ = name + "_cube"
|
||||
return f
|
||||
|
||||
|
||||
def make_soo(deepness=None, *, K=3):
|
||||
if deepness is None:
|
||||
deepness = int(31 * np.log(2) / np.log(K) - 1e-8)
|
||||
assert K >= 2
|
||||
from soo import soo
|
||||
|
||||
def f(objective, n_trials, n_dim, with_count):
|
||||
feval_count = 0
|
||||
|
||||
def _objective(x):
|
||||
nonlocal feval_count
|
||||
feval_count += 1
|
||||
return objective(x)
|
||||
|
||||
xopt, history = soo(
|
||||
_objective, np.full(n_dim, 0.5), 0.5, n_trials, K=K, h_max=deepness
|
||||
)
|
||||
fopt = history[-1]
|
||||
return (fopt, xopt, feval_count) if with_count else (fopt, xopt)
|
||||
|
||||
name = f"soo{deepness}_k{K}"
|
||||
f.__name__ = name + "_cube"
|
||||
return f
|
||||
|
||||
|
||||
def make_mercury(
|
||||
flags, bounding="clip", *, isigma=tt.IV, popsize=2, irate=1, seed=None
|
||||
):
|
||||
from hg import minimize as hg
|
||||
|
||||
def f(objective, n_trials, n_dim, with_count):
|
||||
_objective = wrap_untrustworthy(objective, n_trials, bounding=bounding)
|
||||
|
||||
init = (0.5,) * n_dim
|
||||
iterations = (n_trials - 1 + popsize * 2) // (popsize * 2 + 1)
|
||||
center, history = hg(
|
||||
_objective,
|
||||
init,
|
||||
iterations,
|
||||
sigma=isigma if callable(isigma) else 1 / isigma,
|
||||
popsize=popsize,
|
||||
rate=irate if callable(irate) else 1 / irate,
|
||||
seed=seed,
|
||||
_flags=flags,
|
||||
)
|
||||
|
||||
# use our remaining evals on something, anything.
|
||||
feval_count = _objective(check)
|
||||
wasted = max(0, -(feval_count - n_trials))
|
||||
if wasted:
|
||||
print(f"wasting {wasted} of {n_trials} evaluations")
|
||||
for _ in range(wasted):
|
||||
prng = np.random.default_rng()
|
||||
x = prng.uniform(size=n_dim)
|
||||
fx = _objective(x)
|
||||
|
||||
fopt, xopt, feval_count = _objective(final)
|
||||
return (fopt, xopt, feval_count) if with_count else (fopt, xopt)
|
||||
|
||||
name = f"hg{flags:02}"
|
||||
name += f"_{bounding}" if bounding != "clip" else ""
|
||||
name += f"_ps{popsize:02}" if popsize != 2 else ""
|
||||
if isigma != tt.IV:
|
||||
name += f"_is{isigma.__name__}" if callable(isigma) else f"_is{isigma:02}"
|
||||
if irate != 1:
|
||||
name += f"_ir{irate.__name__}" if callable(irate) else f"_ir{irate:02}"
|
||||
f.__name__ = name + "_cube"
|
||||
return f
|
145
soo.py
Normal file
145
soo.py
Normal file
|
@ -0,0 +1,145 @@
|
|||
#!/usr/bin/env python3
|
||||
from collections import namedtuple
|
||||
from queue import PriorityQueue
|
||||
import numpy as np
|
||||
|
||||
Result = namedtuple("Result", ["h", "p", "value"])
|
||||
|
||||
|
||||
def checked_fma(x, m, a):
|
||||
# assumes x, m, and a are integers.
|
||||
# assumes m >= 1 and a >= 0.
|
||||
y = x * m
|
||||
assert y >= x, "integer overflow"
|
||||
z = y + a
|
||||
assert z >= y, "integer overflow"
|
||||
return z
|
||||
|
||||
|
||||
# Simultaneous Optimistic Optimization
|
||||
def soo(obj, origin, sigma, evals, K=3, h_max=None, iters=None, _adapt=None):
|
||||
assert K > 1, "K must be at least 2"
|
||||
|
||||
origin = np.array(origin, float)
|
||||
dims = len(origin)
|
||||
h_max = int(31 * np.log(2) / np.log(K) - 1e-8) if h_max is None else h_max
|
||||
|
||||
# Tree (coordinates)
|
||||
Ts = [PriorityQueue() for _ in range(h_max + 1)]
|
||||
|
||||
def coords(h, p):
|
||||
return (np.array(p, int) + 0.5) * (sigma * 2) / K ** np.array(h, int) - sigma
|
||||
|
||||
samples = 0
|
||||
|
||||
def query(h, p):
|
||||
nonlocal samples
|
||||
depth = np.max(h)
|
||||
value = obj(coords(h, p) + origin)
|
||||
Ts[depth].put((float(value), samples, list(h), list(p)))
|
||||
samples += 1
|
||||
return Result(h, p, value)
|
||||
|
||||
# kickstart with middle point at the root node.
|
||||
best_ever = query([0] * dims, [0] * dims)
|
||||
|
||||
history, stopping = [], False
|
||||
for t in range(1_000_000 if iters is None else iters):
|
||||
best = np.inf
|
||||
lim = h_max if _adapt is None else min(_adapt(t), h_max)
|
||||
for depth in range(lim):
|
||||
if Ts[depth].empty():
|
||||
continue
|
||||
|
||||
# select a node at this depth.
|
||||
tup = Ts[depth].get(block=False)
|
||||
value, _, h, p = tup
|
||||
if value > best:
|
||||
Ts[depth].put(tup, block=False)
|
||||
continue
|
||||
best = value
|
||||
|
||||
# split this node, potentially increasing its overall depth.
|
||||
inc = 0
|
||||
for j in reversed(range(1, dims)):
|
||||
if h[j] < h[j - 1]:
|
||||
inc = j
|
||||
|
||||
if K & 1 == 1: # when odd,
|
||||
# maintain center point but split it differently next time.
|
||||
h[inc] += 1
|
||||
p[inc] = checked_fma(p[inc], K, K // 2)
|
||||
|
||||
new_depth = np.max(h)
|
||||
Ts[new_depth].put(tup, block=False)
|
||||
|
||||
for i in range(K):
|
||||
if i == K // 2:
|
||||
continue
|
||||
p_new = p.copy()
|
||||
p_new[inc] += i - K // 2
|
||||
candidate = query(h.copy(), p_new)
|
||||
if candidate.value < best_ever.value:
|
||||
best_ever = candidate
|
||||
if samples >= evals:
|
||||
stopping = True
|
||||
break
|
||||
|
||||
else: # when even,
|
||||
# discard center point and only use its subdivisions.
|
||||
for i in range(K):
|
||||
h_new, p_new = h.copy(), p.copy()
|
||||
h_new[inc] += 1
|
||||
p_new[inc] = checked_fma(p_new[inc], K, i)
|
||||
candidate = query(h_new, p_new)
|
||||
if candidate.value < best_ever.value:
|
||||
best_ever = candidate
|
||||
if samples >= evals:
|
||||
stopping = True
|
||||
break
|
||||
|
||||
if stopping:
|
||||
break
|
||||
|
||||
history.append(best_ever.value)
|
||||
if stopping:
|
||||
break
|
||||
|
||||
h, p, _ = best_ever
|
||||
|
||||
return coords(h, p) + origin, history
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
|
||||
from hipsterplot import plot
|
||||
import numpy as np
|
||||
|
||||
def objective2210(x):
|
||||
# another linear transformation of the rosenbrock banana function.
|
||||
assert len(x) > 1, len(x)
|
||||
a, b = x[:-1], x[1:]
|
||||
|
||||
a = a / 4.0 * 2 - 12 / 15
|
||||
b = b / 1.5 * 2 - 43 / 15
|
||||
# solution: 3.60 1.40
|
||||
|
||||
return (
|
||||
np.sum(100 * np.square(np.square(a) + b) + np.square(a - 1))
|
||||
/ 499.0444444444444
|
||||
)
|
||||
|
||||
optimized, history = soo(
|
||||
objective2210,
|
||||
origin=np.array([2.5, 2.5]),
|
||||
sigma=np.array([2.5, 2.5]),
|
||||
evals=2_000,
|
||||
# K=3,
|
||||
# h_max=19, # should be equivalent-ish to min_diag=1e-8 in birect.py
|
||||
K=2,
|
||||
h_max=27, # should be equivalent-ish to min_diag=1e-8 in birect.py
|
||||
)
|
||||
print(" " * 11 + "plot of log10-losses over time")
|
||||
plot(np.log10(history), num_y_chars=23)
|
||||
print("", "soo result:", list(optimized), history[-1], sep="\n")
|
||||
print("", "double checked:", objective2210(optimized), sep="\n")
|
51
tinytweaks.py
Normal file
51
tinytweaks.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
import numpy as np
|
||||
|
||||
|
||||
def IV(i):
|
||||
fib = (0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597)
|
||||
inv = tuple(range(-2, 16))
|
||||
return 1 / max(3, max(b for a, b in zip(fib, inv) if i >= a))
|
||||
|
||||
|
||||
def T2(i):
|
||||
return IV(i) * 2
|
||||
|
||||
|
||||
def T3(i):
|
||||
return IV(i) * 3
|
||||
|
||||
|
||||
def D2(i):
|
||||
return IV(i) / 2
|
||||
|
||||
|
||||
def D3(i):
|
||||
return IV(i) / 3
|
||||
|
||||
|
||||
def P3(i):
|
||||
return IV(i) ** 3
|
||||
|
||||
|
||||
def X3(i):
|
||||
return i * 3
|
||||
|
||||
|
||||
def C2(i): # crude workaround for my own broken interface
|
||||
return 2.0
|
||||
|
||||
|
||||
def H2(i): # throwaway
|
||||
return 0.5 if i >= 65 // 5 else 1.0
|
||||
|
||||
|
||||
def II(i):
|
||||
return 1 / (i + 1)
|
||||
|
||||
|
||||
def S2(i):
|
||||
return 1 / (2 * np.sqrt(i + 2))
|
||||
|
||||
|
||||
def S3(i):
|
||||
return 1 / (3 * np.sqrt(i + 3))
|
Loading…
Reference in a new issue