backyard/rs_dissection/ars_lips_release2.py
2022-06-07 07:15:31 +02:00

115 lines
4 KiB
Python

#!/usr/bin/env python3
# now with a bunch of experimental junk.
import numpy as np
def minimize(
objective, init, iterations, sigma=1.0, popsize=None, parties=1, true_objective=None
):
F = lambda a: np.array(a, np.float64)
center = F(init)
central = objective(center)
evals = 1
history = []
def track():
cost = central if true_objective is None else true_objective(center)
history.append(cost)
track()
dims = len(center)
# offset = 1.2247448713915890 # via np.polynomial.hermite.hermgauss(3)[0][2]
# weight = 0.2954089751509194 # via np.polynomial.hermite.hermgauss(3)[1][2]
offset = np.sqrt(3 / 2)
weight = np.sqrt(np.pi) / 6
both = offset * weight
# seems to perform better without, at least for the 1220-based heuristic.
magic_d = 1#np.sqrt(3 / 2)
magic_a = 1 / magic_d + 1 / 2
magic_b = 1 / magic_d - 1 / 2
for i in range(iterations):
# if i == iterations // 2:
# sigma /= 2
directions = np.zeros((parties * (dims + 1), dims))
for i in range(parties):
new_dirs = np.linalg.qr(np.random.randn(dims + 1, dims + 1))[0]
directions[i * (dims + 1) : (i + 1) * (dims + 1)] = new_dirs[:, :-1]
if popsize is not None:
directions = directions[:popsize]
scale = np.sqrt(2) * sigma
deltas = scale * offset * directions
c0 = F([objective(center - delta) for delta in deltas]) - central
c1 = F([objective(center + delta) for delta in deltas]) - central
if 0:
l0 = np.abs(c1 * magic_a + c0 * magic_b)
l1 = np.abs(c1 * magic_b + c0 * magic_a)
peak = (l0 + l1) / 2
else:
# peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0))
# peak *= np.sqrt(np.pi) / 5
l0 = np.square(c1 * magic_a + c0 * magic_b)
l1 = np.square(c1 * magic_b + c0 * magic_a)
peak = np.sqrt((l0 + l1) / 2)
difference = ((c1 - c0) / peak)[:, None]
evals += len(deltas) * 2
# gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma / parties)
# gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma * sigma / parties
# gradscale = 1 / (np.sqrt(8) * parties)
# print(gradscale, 0.35355339059327373)
# step = sigma * gradscale * np.sum(directions * difference, axis=0)
# NOTE: this is a different scale, but i figure it should work better:
step = sigma / np.sqrt(12) / parties * np.sum(directions * difference, axis=0)
center = center - step
central = objective(center)
evals += 1
track()
print("evals:", evals)
return center, history
if __name__ == "__main__":
# get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
from hipsterplot import plot
np.random.seed(42070)
problem_size = 100
rotation, _ = np.linalg.qr(np.random.randn(problem_size, problem_size))
init = np.random.uniform(-5, 5, problem_size)
def ellipsoid_problem(x):
return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x))
def rotated_problem(x):
return ellipsoid_problem(rotation @ x)
def noisy_problem(x):
multiplicative_noise = np.random.uniform(0.707, 1.414)
additive_noise = np.abs(np.random.normal(scale=50000))
return rotated_problem(x) * multiplicative_noise + additive_noise
objective, true_objective = noisy_problem, rotated_problem
# optimized, history = minimize(
# objective, init, 1000, sigma=0.3, true_objective=true_objective
# )
optimized, history = minimize(
objective, init, 1000, sigma=2.0, popsize=10, true_objective=true_objective
)
print(" " * 11 + "plot of log10-losses over time")
plot(np.log10(history), num_y_chars=23)
print("loss, before optimization: {:9.6f}".format(true_objective(init)))
print("loss, after optimization: {:9.6f}".format(true_objective(optimized)))