From 246a5cb1d26eb98aa6331e085601235a9297e152 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Tue, 7 Jun 2022 07:15:31 +0200 Subject: [PATCH] add rs_dissection --- rs_dissection/README.md | 6 ++ rs_dissection/ars_lips_release.py | 95 ++++++++++++++++++++++++ rs_dissection/ars_lips_release2.py | 115 +++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+) create mode 100644 rs_dissection/README.md create mode 100644 rs_dissection/ars_lips_release.py create mode 100644 rs_dissection/ars_lips_release2.py diff --git a/rs_dissection/README.md b/rs_dissection/README.md new file mode 100644 index 0000000..4b652e1 --- /dev/null +++ b/rs_dissection/README.md @@ -0,0 +1,6 @@ +experiment with a highly-heuristic optimizer. + +there is no theory, only practice, and the practice ain't even good. + +you will need the files from the library directory, +and possibly [dlib.](https://pypi.org/project/dlib/) diff --git a/rs_dissection/ars_lips_release.py b/rs_dissection/ars_lips_release.py new file mode 100644 index 0000000..76a517f --- /dev/null +++ b/rs_dissection/ars_lips_release.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# the basis of this algorithm is described by Ben Recht et al. +# Augmented Random Search: https://arxiv.org/abs/1803.07055 +# see also https://www.argmin.net/ + +import numpy as np + +def heuristic(costs, deltas, center_cost): + # this is drawing a quadratic through antithetically-sampled points + # (and the single center point shared with each pair in that generation), + # then dividing by the quadratic's peak absolute derivative. + d = np.linalg.norm(deltas, axis=1) + c0 = costs[:, 0] - center_cost + c1 = costs[:, 1] - center_cost + l0 = np.abs(3 * c1 + c0) + l1 = np.abs(c1 + 3 * c0) + peak = np.maximum(l0, l1) / (2 * d) + #peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0)) / (2 * d) + #peak *= np.sqrt(np.pi / 4) # keeps the area consistent with the parallelogram + return costs / np.where(np.abs(peak) < 1e-7, 1, peak)[:, None] + +def populate(f, center, popsize, sigma): + costs, deltas = [], [] + for p in range(popsize): + delta = np.random.normal(scale=sigma, size=center.shape) + cost_pos = f(center + delta) + cost_neg = f(center - delta) + costs.append((cost_pos, cost_neg)) + deltas.append(delta) + return np.array(costs, float), np.array(deltas, float) + +def minimize(objective, init, iterations, + sigma=0.1, popsize=None, step_size=1.0, + true_objective=None): + if popsize is None: + # it's still better to provide one yourself. + popsize = int(np.sqrt(len(init))) + + center = np.array(init, float, copy=True) + center_cost = objective(center) + history = [] + + def track(): + if true_objective is None: + cost = center_cost + else: + cost = true_objective(center) + history.append(cost) + + track() + + for i in range(iterations): + costs, deltas = populate(objective, center, popsize, sigma) + costs = heuristic(costs, deltas, center_cost) + + flat_costs = costs[:, 0] - costs[:, 1] + step = np.average(deltas / sigma * flat_costs[:, None], axis=0) + center -= step_size * step + center_cost = objective(center) + + track() + + return center, history + +if __name__ == '__main__': + # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py + from hipsterplot import plot + + np.random.seed(42070) + + problem_size = 100 + rotation, _ = np.linalg.qr(np.random.randn(problem_size, problem_size)) + init = np.random.uniform(-5, 5, problem_size) + + def ellipsoid_problem(x): + return np.sum(10**(6 * np.linspace(0, 1, len(x))) * np.square(x)) + + def rotated_problem(x): + return ellipsoid_problem(rotation @ x) + + def noisy_problem(x): + multiplicative_noise = np.random.uniform(0.707, 1.414) + additive_noise = np.abs(np.random.normal(scale=50000)) + return rotated_problem(x) * multiplicative_noise + additive_noise + + objective, true_objective = noisy_problem, rotated_problem + optimized, history = minimize(objective, init, 1000, + step_size=0.25, sigma=0.3, + true_objective=true_objective) + + print(" " * 11 + "plot of log10-losses over time") + plot(np.log10(history), num_y_chars=23) + + print("loss, before optimization: {:9.6f}".format(true_objective(init))) + print("loss, after optimization: {:9.6f}".format(true_objective(optimized))) diff --git a/rs_dissection/ars_lips_release2.py b/rs_dissection/ars_lips_release2.py new file mode 100644 index 0000000..01cbd0e --- /dev/null +++ b/rs_dissection/ars_lips_release2.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 +# now with a bunch of experimental junk. + +import numpy as np + + +def minimize( + objective, init, iterations, sigma=1.0, popsize=None, parties=1, true_objective=None +): + F = lambda a: np.array(a, np.float64) + center = F(init) + central = objective(center) + evals = 1 + history = [] + + def track(): + cost = central if true_objective is None else true_objective(center) + history.append(cost) + + track() + + dims = len(center) + # offset = 1.2247448713915890 # via np.polynomial.hermite.hermgauss(3)[0][2] + # weight = 0.2954089751509194 # via np.polynomial.hermite.hermgauss(3)[1][2] + offset = np.sqrt(3 / 2) + weight = np.sqrt(np.pi) / 6 + both = offset * weight + + # seems to perform better without, at least for the 1220-based heuristic. + magic_d = 1#np.sqrt(3 / 2) + magic_a = 1 / magic_d + 1 / 2 + magic_b = 1 / magic_d - 1 / 2 + + for i in range(iterations): + # if i == iterations // 2: + # sigma /= 2 + + directions = np.zeros((parties * (dims + 1), dims)) + for i in range(parties): + new_dirs = np.linalg.qr(np.random.randn(dims + 1, dims + 1))[0] + directions[i * (dims + 1) : (i + 1) * (dims + 1)] = new_dirs[:, :-1] + if popsize is not None: + directions = directions[:popsize] + + scale = np.sqrt(2) * sigma + deltas = scale * offset * directions + c0 = F([objective(center - delta) for delta in deltas]) - central + c1 = F([objective(center + delta) for delta in deltas]) - central + if 0: + l0 = np.abs(c1 * magic_a + c0 * magic_b) + l1 = np.abs(c1 * magic_b + c0 * magic_a) + peak = (l0 + l1) / 2 + else: + # peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0)) + # peak *= np.sqrt(np.pi) / 5 + l0 = np.square(c1 * magic_a + c0 * magic_b) + l1 = np.square(c1 * magic_b + c0 * magic_a) + peak = np.sqrt((l0 + l1) / 2) + difference = ((c1 - c0) / peak)[:, None] + + evals += len(deltas) * 2 + + # gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma / parties) + # gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma * sigma / parties + # gradscale = 1 / (np.sqrt(8) * parties) + # print(gradscale, 0.35355339059327373) + # step = sigma * gradscale * np.sum(directions * difference, axis=0) + # NOTE: this is a different scale, but i figure it should work better: + step = sigma / np.sqrt(12) / parties * np.sum(directions * difference, axis=0) + + center = center - step + central = objective(center) + evals += 1 + + track() + + print("evals:", evals) + + return center, history + + +if __name__ == "__main__": + # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py + from hipsterplot import plot + + np.random.seed(42070) + + problem_size = 100 + rotation, _ = np.linalg.qr(np.random.randn(problem_size, problem_size)) + init = np.random.uniform(-5, 5, problem_size) + + def ellipsoid_problem(x): + return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x)) + + def rotated_problem(x): + return ellipsoid_problem(rotation @ x) + + def noisy_problem(x): + multiplicative_noise = np.random.uniform(0.707, 1.414) + additive_noise = np.abs(np.random.normal(scale=50000)) + return rotated_problem(x) * multiplicative_noise + additive_noise + + objective, true_objective = noisy_problem, rotated_problem + # optimized, history = minimize( + # objective, init, 1000, sigma=0.3, true_objective=true_objective + # ) + optimized, history = minimize( + objective, init, 1000, sigma=2.0, popsize=10, true_objective=true_objective + ) + + print(" " * 11 + "plot of log10-losses over time") + plot(np.log10(history), num_y_chars=23) + + print("loss, before optimization: {:9.6f}".format(true_objective(init))) + print("loss, after optimization: {:9.6f}".format(true_objective(optimized)))