153 lines
5.1 KiB
Python
153 lines
5.1 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# ars_lips_release3.py, but renamed, and fucked with.
|
||
|
|
||
|
import numpy as np
|
||
|
import sys
|
||
|
|
||
|
|
||
|
def minimize(
|
||
|
objective,
|
||
|
init,
|
||
|
iterations,
|
||
|
sigma=1.0,
|
||
|
popsize=None, # FIXME: None isn't actually supported!
|
||
|
rate=1.0,
|
||
|
parties=None,
|
||
|
true_objective=None,
|
||
|
seed=None,
|
||
|
_flags=0,
|
||
|
):
|
||
|
F = lambda a: np.array(a, np.float64)
|
||
|
center = F(init)
|
||
|
central = objective(center)
|
||
|
evals = 1
|
||
|
history = []
|
||
|
prng = np.random.default_rng(seed)
|
||
|
|
||
|
dims = len(center)
|
||
|
if parties is None:
|
||
|
parties = 1 if popsize is None else (popsize + dims + 1) // (dims + 2)
|
||
|
elif parties < (popsize + dims + 1) // (dims + 2):
|
||
|
print("WARNING: parties is too small for chosen popsize.", sys.stderr)
|
||
|
|
||
|
def track():
|
||
|
cost = central if true_objective is None else true_objective(center)
|
||
|
history.append(cost)
|
||
|
|
||
|
track()
|
||
|
|
||
|
offset = np.sqrt(3 / 2)
|
||
|
weight = np.sqrt(np.pi) / 6
|
||
|
both = offset * weight
|
||
|
|
||
|
# seems to perform better without, at least for the 1220-based heuristic.
|
||
|
magic_d = np.sqrt(3 / 2) if _flags & 1 else 1 # good?
|
||
|
magic_a = 1 / magic_d + 1 / 2
|
||
|
magic_b = 1 / magic_d - 1 / 2
|
||
|
|
||
|
for i in range(iterations):
|
||
|
sigma_i = sigma(i) if callable(sigma) else sigma
|
||
|
rate_i = rate(i) if callable(rate) else rate
|
||
|
|
||
|
if _flags & 2: # bad
|
||
|
assert not _flags & 16, f"flags 2 and 16 are incompatible ({_flags=})"
|
||
|
directions = prng.standard_normal((popsize, dims))
|
||
|
elif _flags & 16:
|
||
|
assert not _flags & 2, f"flags 2 and 16 are incompatible ({_flags=})"
|
||
|
directions = prng.standard_normal((popsize, dims + 2))
|
||
|
directions /= np.linalg.norm(directions, axis=1, keepdims=True)
|
||
|
directions = directions[:, :-2] # drop coordinate
|
||
|
else:
|
||
|
directions = np.zeros((parties * (dims + 2), dims))
|
||
|
for i in range(parties):
|
||
|
new_dirs = np.linalg.qr(prng.standard_normal((dims + 2, dims + 2)))[0]
|
||
|
directions[i * (dims + 2) : (i + 1) * (dims + 2)] = new_dirs[:, :-2]
|
||
|
if popsize is not None:
|
||
|
directions = directions[:popsize]
|
||
|
|
||
|
scale = np.sqrt(2) * sigma_i
|
||
|
deltas = scale * offset * directions
|
||
|
c0 = F([objective(center - delta) for delta in deltas]) - central
|
||
|
c1 = F([objective(center + delta) for delta in deltas]) - central
|
||
|
if _flags & 8:
|
||
|
l0 = np.abs(c1 * magic_a + c0 * magic_b)
|
||
|
l1 = np.abs(c1 * magic_b + c0 * magic_a)
|
||
|
peak = (l0 + l1) / 2
|
||
|
else:
|
||
|
# peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0))
|
||
|
# peak *= np.sqrt(np.pi) / 5
|
||
|
l0 = np.square(c1 * magic_a + c0 * magic_b)
|
||
|
l1 = np.square(c1 * magic_b + c0 * magic_a)
|
||
|
peak = np.sqrt((l0 + l1) / 2)
|
||
|
peak[peak == 0.0] = 1.0
|
||
|
difference = ((c1 - c0) / peak)[:, None]
|
||
|
|
||
|
evals += len(deltas) * 2
|
||
|
|
||
|
# gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma_i / parties)
|
||
|
# gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma_i * sigma_i / parties
|
||
|
# gradscale = 1 / (np.sqrt(8) * parties)
|
||
|
# print(gradscale, 0.35355339059327373)
|
||
|
dd = directions * difference
|
||
|
if _flags & 4:
|
||
|
gradscale = 1 / (np.sqrt(8) * parties)
|
||
|
step = sigma_i * gradscale * np.sum(dd, axis=0)
|
||
|
else:
|
||
|
# NOTE: this is a different scale, but i figure it should work better:
|
||
|
step = sigma_i / np.sqrt(12) / parties * np.sum(dd, axis=0)
|
||
|
|
||
|
center = center - rate_i * step
|
||
|
central = objective(center)
|
||
|
evals += 1
|
||
|
|
||
|
track()
|
||
|
|
||
|
# print("popsize:", popsize)
|
||
|
# print("parties:", parties)
|
||
|
# print("evals:", evals)
|
||
|
|
||
|
return center, history
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
# get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
|
||
|
from hipsterplot import plot
|
||
|
|
||
|
prng = np.random.default_rng(42070)
|
||
|
|
||
|
problem_size = 100
|
||
|
rotation, _ = np.linalg.qr(prng.standard_normal((problem_size, problem_size)))
|
||
|
init = prng.uniform(-5, 5, problem_size)
|
||
|
|
||
|
def ellipsoid_problem(x):
|
||
|
return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x))
|
||
|
|
||
|
def rotated_problem(x):
|
||
|
return ellipsoid_problem(rotation @ x)
|
||
|
|
||
|
def noisy_problem(x):
|
||
|
multiplicative_noise = prng.uniform(0.707, 1.414)
|
||
|
additive_noise = np.abs(prng.normal(scale=50000))
|
||
|
return rotated_problem(x) * multiplicative_noise + additive_noise
|
||
|
|
||
|
objective, true_objective = noisy_problem, rotated_problem
|
||
|
# optimized, history = minimize(
|
||
|
# objective, init, 1000, sigma=0.3, true_objective=true_objective
|
||
|
# )
|
||
|
optimized, history = minimize(
|
||
|
objective,
|
||
|
init,
|
||
|
1000,
|
||
|
sigma=2.0,
|
||
|
popsize=10,
|
||
|
true_objective=true_objective,
|
||
|
seed=42069,
|
||
|
# _flags=13,
|
||
|
)
|
||
|
|
||
|
print(" " * 11 + "plot of log10-losses over time")
|
||
|
plot(np.log10(history), num_y_chars=23)
|
||
|
|
||
|
print("loss, before optimization: {:9.6f}".format(true_objective(init)))
|
||
|
print("loss, after optimization: {:9.6f}".format(true_objective(optimized)))
|