add rs_dissection

2022-06-07 07:15:31 +02:00 · 2022-06-07 07:15:31 +02:00 · 246a5cb1d2
commit 246a5cb1d2
parent deeb9792ba
3 changed files with 216 additions and 0 deletions
--- a/rs_dissection/README.md
+++ b/rs_dissection/README.md
@ -0,0 +1,6 @@
+experiment with a highly-heuristic optimizer.
+
+there is no theory, only practice, and the practice ain't even good.
+
+you will need the files from the library directory,
+and possibly [dlib.](https://pypi.org/project/dlib/)
--- a/rs_dissection/ars_lips_release.py
+++ b/rs_dissection/ars_lips_release.py
@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+# the basis of this algorithm is described by Ben Recht et al.
+# Augmented Random Search: https://arxiv.org/abs/1803.07055
+# see also https://www.argmin.net/
+
+import numpy as np
+
+def heuristic(costs, deltas, center_cost):
+    # this is drawing a quadratic through antithetically-sampled points
+    # (and the single center point shared with each pair in that generation),
+    # then dividing by the quadratic's peak absolute derivative.
+    d = np.linalg.norm(deltas, axis=1)
+    c0 = costs[:, 0] - center_cost
+    c1 = costs[:, 1] - center_cost
+    l0 = np.abs(3 * c1 + c0)
+    l1 = np.abs(c1 + 3 * c0)
+    peak = np.maximum(l0, l1) / (2 * d)
+    #peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0)) / (2 * d)
+    #peak *= np.sqrt(np.pi / 4)  # keeps the area consistent with the parallelogram
+    return costs / np.where(np.abs(peak) < 1e-7, 1, peak)[:, None]
+
+def populate(f, center, popsize, sigma):
+    costs, deltas = [], []
+    for p in range(popsize):
+        delta = np.random.normal(scale=sigma, size=center.shape)
+        cost_pos = f(center + delta)
+        cost_neg = f(center - delta)
+        costs.append((cost_pos, cost_neg))
+        deltas.append(delta)
+    return np.array(costs, float), np.array(deltas, float)
+
+def minimize(objective, init, iterations,
+             sigma=0.1, popsize=None, step_size=1.0,
+             true_objective=None):
+    if popsize is None:
+        # it's still better to provide one yourself.
+        popsize = int(np.sqrt(len(init)))
+
+    center = np.array(init, float, copy=True)
+    center_cost = objective(center)
+    history = []
+
+    def track():
+        if true_objective is None:
+            cost = center_cost
+        else:
+            cost = true_objective(center)
+        history.append(cost)
+
+    track()
+
+    for i in range(iterations):
+        costs, deltas = populate(objective, center, popsize, sigma)
+        costs = heuristic(costs, deltas, center_cost)
+
+        flat_costs = costs[:, 0] - costs[:, 1]
+        step = np.average(deltas / sigma * flat_costs[:, None], axis=0)
+        center -= step_size * step
+        center_cost = objective(center)
+
+        track()
+
+    return center, history
+
+if __name__ == '__main__':
+    # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
+    from hipsterplot import plot
+
+    np.random.seed(42070)
+
+    problem_size = 100
+    rotation, _ = np.linalg.qr(np.random.randn(problem_size, problem_size))
+    init = np.random.uniform(-5, 5, problem_size)
+
+    def ellipsoid_problem(x):
+        return np.sum(10**(6 * np.linspace(0, 1, len(x))) * np.square(x))
+
+    def rotated_problem(x):
+        return ellipsoid_problem(rotation @ x)
+
+    def noisy_problem(x):
+        multiplicative_noise = np.random.uniform(0.707, 1.414)
+        additive_noise = np.abs(np.random.normal(scale=50000))
+        return rotated_problem(x) * multiplicative_noise + additive_noise
+
+    objective, true_objective = noisy_problem, rotated_problem
+    optimized, history = minimize(objective, init, 1000,
+                                  step_size=0.25, sigma=0.3,
+                                  true_objective=true_objective)
+
+    print(" " * 11 + "plot of log10-losses over time")
+    plot(np.log10(history), num_y_chars=23)
+
+    print("loss, before optimization: {:9.6f}".format(true_objective(init)))
+    print("loss,  after optimization: {:9.6f}".format(true_objective(optimized)))
--- a/rs_dissection/ars_lips_release2.py
+++ b/rs_dissection/ars_lips_release2.py
@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# now with a bunch of experimental junk.
+
+import numpy as np
+
+
+def minimize(
+    objective, init, iterations, sigma=1.0, popsize=None, parties=1, true_objective=None
+):
+    F = lambda a: np.array(a, np.float64)
+    center = F(init)
+    central = objective(center)
+    evals = 1
+    history = []
+
+    def track():
+        cost = central if true_objective is None else true_objective(center)
+        history.append(cost)
+
+    track()
+
+    dims = len(center)
+    # offset = 1.2247448713915890  # via np.polynomial.hermite.hermgauss(3)[0][2]
+    # weight = 0.2954089751509194  # via np.polynomial.hermite.hermgauss(3)[1][2]
+    offset = np.sqrt(3 / 2)
+    weight = np.sqrt(np.pi) / 6
+    both = offset * weight
+
+    # seems to perform better without, at least for the 1220-based heuristic.
+    magic_d = 1#np.sqrt(3 / 2)
+    magic_a = 1 / magic_d + 1 / 2
+    magic_b = 1 / magic_d - 1 / 2
+
+    for i in range(iterations):
+        # if i == iterations // 2:
+        #     sigma /= 2
+
+        directions = np.zeros((parties * (dims + 1), dims))
+        for i in range(parties):
+            new_dirs = np.linalg.qr(np.random.randn(dims + 1, dims + 1))[0]
+            directions[i * (dims + 1) : (i + 1) * (dims + 1)] = new_dirs[:, :-1]
+        if popsize is not None:
+            directions = directions[:popsize]
+
+        scale = np.sqrt(2) * sigma
+        deltas = scale * offset * directions
+        c0 = F([objective(center - delta) for delta in deltas]) - central
+        c1 = F([objective(center + delta) for delta in deltas]) - central
+        if 0:
+            l0 = np.abs(c1 * magic_a + c0 * magic_b)
+            l1 = np.abs(c1 * magic_b + c0 * magic_a)
+            peak = (l0 + l1) / 2
+        else:
+            # peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0))
+            # peak *= np.sqrt(np.pi) / 5
+            l0 = np.square(c1 * magic_a + c0 * magic_b)
+            l1 = np.square(c1 * magic_b + c0 * magic_a)
+            peak = np.sqrt((l0 + l1) / 2)
+        difference = ((c1 - c0) / peak)[:, None]
+
+        evals += len(deltas) * 2
+
+        # gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma / parties)
+        # gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma * sigma / parties
+        # gradscale = 1 / (np.sqrt(8) * parties)
+        # print(gradscale, 0.35355339059327373)
+        # step = sigma * gradscale * np.sum(directions * difference, axis=0)
+        # NOTE: this is a different scale, but i figure it should work better:
+        step = sigma / np.sqrt(12) / parties * np.sum(directions * difference, axis=0)
+
+        center = center - step
+        central = objective(center)
+        evals += 1
+
+        track()
+
+    print("evals:", evals)
+
+    return center, history
+
+
+if __name__ == "__main__":
+    # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
+    from hipsterplot import plot
+
+    np.random.seed(42070)
+
+    problem_size = 100
+    rotation, _ = np.linalg.qr(np.random.randn(problem_size, problem_size))
+    init = np.random.uniform(-5, 5, problem_size)
+
+    def ellipsoid_problem(x):
+        return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x))
+
+    def rotated_problem(x):
+        return ellipsoid_problem(rotation @ x)
+
+    def noisy_problem(x):
+        multiplicative_noise = np.random.uniform(0.707, 1.414)
+        additive_noise = np.abs(np.random.normal(scale=50000))
+        return rotated_problem(x) * multiplicative_noise + additive_noise
+
+    objective, true_objective = noisy_problem, rotated_problem
+    # optimized, history = minimize(
+    #     objective, init, 1000, sigma=0.3, true_objective=true_objective
+    # )
+    optimized, history = minimize(
+        objective, init, 1000, sigma=2.0, popsize=10, true_objective=true_objective
+    )
+
+    print(" " * 11 + "plot of log10-losses over time")
+    plot(np.log10(history), num_y_chars=23)
+
+    print("loss, before optimization: {:9.6f}".format(true_objective(init)))
+    print("loss,  after optimization: {:9.6f}".format(true_objective(optimized)))