#!/usr/bin/env python3 # the basis of this algorithm is described by Ben Recht et al. # Augmented Random Search: https://arxiv.org/abs/1803.07055 # see also https://www.argmin.net/ import numpy as np def heuristic(costs, deltas, center_cost): # this is drawing a quadratic through antithetically-sampled points # (and the single center point shared with each pair in that generation), # then dividing by the quadratic's peak absolute derivative. d = np.linalg.norm(deltas, axis=1) c0 = costs[:, 0] - center_cost c1 = costs[:, 1] - center_cost l0 = np.abs(3 * c1 + c0) l1 = np.abs(c1 + 3 * c0) peak = np.maximum(l0, l1) / (2 * d) #peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0)) / (2 * d) #peak *= np.sqrt(np.pi / 4) # keeps the area consistent with the parallelogram return costs / np.where(np.abs(peak) < 1e-7, 1, peak)[:, None] def populate(f, center, popsize, sigma): costs, deltas = [], [] for p in range(popsize): delta = np.random.normal(scale=sigma, size=center.shape) cost_pos = f(center + delta) cost_neg = f(center - delta) costs.append((cost_pos, cost_neg)) deltas.append(delta) return np.array(costs, float), np.array(deltas, float) def minimize(objective, init, iterations, sigma=0.1, popsize=None, step_size=1.0, true_objective=None): if popsize is None: # it's still better to provide one yourself. popsize = int(np.sqrt(len(init))) center = np.array(init, float, copy=True) center_cost = objective(center) history = [] def track(): if true_objective is None: cost = center_cost else: cost = true_objective(center) history.append(cost) track() for i in range(iterations): costs, deltas = populate(objective, center, popsize, sigma) costs = heuristic(costs, deltas, center_cost) flat_costs = costs[:, 0] - costs[:, 1] step = np.average(deltas / sigma * flat_costs[:, None], axis=0) center -= step_size * step center_cost = objective(center) track() return center, history if __name__ == '__main__': # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py from hipsterplot import plot np.random.seed(42070) problem_size = 100 rotation, _ = np.linalg.qr(np.random.randn(problem_size, problem_size)) init = np.random.uniform(-5, 5, problem_size) def ellipsoid_problem(x): return np.sum(10**(6 * np.linspace(0, 1, len(x))) * np.square(x)) def rotated_problem(x): return ellipsoid_problem(rotation @ x) def noisy_problem(x): multiplicative_noise = np.random.uniform(0.707, 1.414) additive_noise = np.abs(np.random.normal(scale=50000)) return rotated_problem(x) * multiplicative_noise + additive_noise objective, true_objective = noisy_problem, rotated_problem optimized, history = minimize(objective, init, 1000, step_size=0.25, sigma=0.3, true_objective=true_objective) print(" " * 11 + "plot of log10-losses over time") plot(np.log10(history), num_y_chars=23) print("loss, before optimization: {:9.6f}".format(true_objective(init))) print("loss, after optimization: {:9.6f}".format(true_objective(optimized)))