#!/usr/bin/env python3 # ars_lips_release3.py, but renamed, and fucked with. import numpy as np import sys def minimize( objective, init, iterations, sigma=1.0, popsize=None, # FIXME: None isn't actually supported! rate=1.0, parties=None, true_objective=None, seed=None, _flags=0, ): F = lambda a: np.array(a, np.float64) center = F(init) central = objective(center) evals = 1 history = [] prng = np.random.default_rng(seed) dims = len(center) if parties is None: parties = 1 if popsize is None else (popsize + dims + 1) // (dims + 2) elif parties < (popsize + dims + 1) // (dims + 2): print("WARNING: parties is too small for chosen popsize.", sys.stderr) def track(): cost = central if true_objective is None else true_objective(center) history.append(cost) track() offset = np.sqrt(3 / 2) weight = np.sqrt(np.pi) / 6 both = offset * weight # seems to perform better without, at least for the 1220-based heuristic. magic_d = np.sqrt(3 / 2) if _flags & 1 else 1 # good? magic_a = 1 / magic_d + 1 / 2 magic_b = 1 / magic_d - 1 / 2 for i in range(iterations): sigma_i = sigma(i) if callable(sigma) else sigma rate_i = rate(i) if callable(rate) else rate if _flags & 2: # bad assert not _flags & 16, f"flags 2 and 16 are incompatible ({_flags=})" directions = prng.standard_normal((popsize, dims)) elif _flags & 16: assert not _flags & 2, f"flags 2 and 16 are incompatible ({_flags=})" directions = prng.standard_normal((popsize, dims + 2)) directions /= np.linalg.norm(directions, axis=1, keepdims=True) directions = directions[:, :-2] # drop coordinate else: directions = np.zeros((parties * (dims + 2), dims)) for i in range(parties): new_dirs = np.linalg.qr(prng.standard_normal((dims + 2, dims + 2)))[0] directions[i * (dims + 2) : (i + 1) * (dims + 2)] = new_dirs[:, :-2] if popsize is not None: directions = directions[:popsize] scale = np.sqrt(2) * sigma_i deltas = scale * offset * directions c0 = F([objective(center - delta) for delta in deltas]) - central c1 = F([objective(center + delta) for delta in deltas]) - central if _flags & 8: l0 = np.abs(c1 * magic_a + c0 * magic_b) l1 = np.abs(c1 * magic_b + c0 * magic_a) peak = (l0 + l1) / 2 else: # peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0)) # peak *= np.sqrt(np.pi) / 5 l0 = np.square(c1 * magic_a + c0 * magic_b) l1 = np.square(c1 * magic_b + c0 * magic_a) peak = np.sqrt((l0 + l1) / 2) peak[peak == 0.0] = 1.0 difference = ((c1 - c0) / peak)[:, None] evals += len(deltas) * 2 # gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma_i / parties) # gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma_i * sigma_i / parties # gradscale = 1 / (np.sqrt(8) * parties) # print(gradscale, 0.35355339059327373) dd = directions * difference if _flags & 4: gradscale = 1 / (np.sqrt(8) * parties) step = sigma_i * gradscale * np.sum(dd, axis=0) else: # NOTE: this is a different scale, but i figure it should work better: step = sigma_i / np.sqrt(12) / parties * np.sum(dd, axis=0) center = center - rate_i * step central = objective(center) evals += 1 track() # print("popsize:", popsize) # print("parties:", parties) # print("evals:", evals) return center, history if __name__ == "__main__": # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py from hipsterplot import plot prng = np.random.default_rng(42070) problem_size = 100 rotation, _ = np.linalg.qr(prng.standard_normal((problem_size, problem_size))) init = prng.uniform(-5, 5, problem_size) def ellipsoid_problem(x): return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x)) def rotated_problem(x): return ellipsoid_problem(rotation @ x) def noisy_problem(x): multiplicative_noise = prng.uniform(0.707, 1.414) additive_noise = np.abs(prng.normal(scale=50000)) return rotated_problem(x) * multiplicative_noise + additive_noise objective, true_objective = noisy_problem, rotated_problem # optimized, history = minimize( # objective, init, 1000, sigma=0.3, true_objective=true_objective # ) optimized, history = minimize( objective, init, 1000, sigma=2.0, popsize=10, true_objective=true_objective, seed=42069, # _flags=13, ) print(" " * 11 + "plot of log10-losses over time") plot(np.log10(history), num_y_chars=23) print("loss, before optimization: {:9.6f}".format(true_objective(init))) print("loss, after optimization: {:9.6f}".format(true_objective(optimized)))