#!/usr/bin/env python3
# ars_lips_release3.py, but renamed, and fucked with.

import numpy as np
import sys


def minimize(
    objective,
    init,
    iterations,
    sigma=1.0,
    popsize=None,  # FIXME: None isn't actually supported!
    rate=1.0,
    parties=None,
    true_objective=None,
    seed=None,
    _flags=0,
):
    F = lambda a: np.array(a, np.float64)
    center = F(init)
    central = objective(center)
    evals = 1
    history = []
    prng = np.random.default_rng(seed)

    dims = len(center)
    if parties is None:
        parties = 1 if popsize is None else (popsize + dims + 1) // (dims + 2)
    elif parties < (popsize + dims + 1) // (dims + 2):
        print("WARNING: parties is too small for chosen popsize.", sys.stderr)

    def track():
        cost = central if true_objective is None else true_objective(center)
        history.append(cost)

    track()

    offset = np.sqrt(3 / 2)
    weight = np.sqrt(np.pi) / 6
    both = offset * weight

    # seems to perform better without, at least for the 1220-based heuristic.
    magic_d = np.sqrt(3 / 2) if _flags & 1 else 1  # good?
    magic_a = 1 / magic_d + 1 / 2
    magic_b = 1 / magic_d - 1 / 2

    for i in range(iterations):
        sigma_i = sigma(i) if callable(sigma) else sigma
        rate_i = rate(i) if callable(rate) else rate

        if _flags & 2:  # bad
            assert not _flags & 16, f"flags 2 and 16 are incompatible ({_flags=})"
            directions = prng.standard_normal((popsize, dims))
        elif _flags & 16:
            assert not _flags & 2, f"flags 2 and 16 are incompatible ({_flags=})"
            directions = prng.standard_normal((popsize, dims + 2))
            directions /= np.linalg.norm(directions, axis=1, keepdims=True)
            directions = directions[:, :-2]  # drop coordinate
        else:
            directions = np.zeros((parties * (dims + 2), dims))
            for i in range(parties):
                new_dirs = np.linalg.qr(prng.standard_normal((dims + 2, dims + 2)))[0]
                directions[i * (dims + 2) : (i + 1) * (dims + 2)] = new_dirs[:, :-2]
            if popsize is not None:
                directions = directions[:popsize]

        scale = np.sqrt(2) * sigma_i
        deltas = scale * offset * directions
        c0 = F([objective(center - delta) for delta in deltas]) - central
        c1 = F([objective(center + delta) for delta in deltas]) - central
        if _flags & 8:
            l0 = np.abs(c1 * magic_a + c0 * magic_b)
            l1 = np.abs(c1 * magic_b + c0 * magic_a)
            peak = (l0 + l1) / 2
        else:
            # peak = np.sqrt(np.square(3 * c1 + c0) + np.square(c1 + 3 * c0))
            # peak *= np.sqrt(np.pi) / 5
            l0 = np.square(c1 * magic_a + c0 * magic_b)
            l1 = np.square(c1 * magic_b + c0 * magic_a)
            peak = np.sqrt((l0 + l1) / 2)
        peak[peak == 0.0] = 1.0
        difference = ((c1 - c0) / peak)[:, None]

        evals += len(deltas) * 2

        # gradscale = both / (scale / 2 * np.sqrt(np.pi)) * (offset * sigma_i / parties)
        # gradscale = 2 / np.sqrt(2) / np.sqrt(np.pi) * offset * offset * weight / sigma_i * sigma_i / parties
        # gradscale = 1 / (np.sqrt(8) * parties)
        # print(gradscale, 0.35355339059327373)
        dd = directions * difference
        if _flags & 4:
            gradscale = 1 / (np.sqrt(8) * parties)
            step = sigma_i * gradscale * np.sum(dd, axis=0)
        else:
            # NOTE: this is a different scale, but i figure it should work better:
            step = sigma_i / np.sqrt(12) / parties * np.sum(dd, axis=0)

        center = center - rate_i * step
        central = objective(center)
        evals += 1

        track()

    # print("popsize:", popsize)
    # print("parties:", parties)
    # print("evals:", evals)

    return center, history


if __name__ == "__main__":
    # get this here: https://github.com/imh/hipsterplot/blob/master/hipsterplot.py
    from hipsterplot import plot

    prng = np.random.default_rng(42070)

    problem_size = 100
    rotation, _ = np.linalg.qr(prng.standard_normal((problem_size, problem_size)))
    init = prng.uniform(-5, 5, problem_size)

    def ellipsoid_problem(x):
        return np.sum(10 ** (6 * np.linspace(0, 1, len(x))) * np.square(x))

    def rotated_problem(x):
        return ellipsoid_problem(rotation @ x)

    def noisy_problem(x):
        multiplicative_noise = prng.uniform(0.707, 1.414)
        additive_noise = np.abs(prng.normal(scale=50000))
        return rotated_problem(x) * multiplicative_noise + additive_noise

    objective, true_objective = noisy_problem, rotated_problem
    # optimized, history = minimize(
    #     objective, init, 1000, sigma=0.3, true_objective=true_objective
    # )
    optimized, history = minimize(
        objective,
        init,
        1000,
        sigma=2.0,
        popsize=10,
        true_objective=true_objective,
        seed=42069,
        # _flags=13,
    )

    print(" " * 11 + "plot of log10-losses over time")
    plot(np.log10(history), num_y_chars=23)

    print("loss, before optimization: {:9.6f}".format(true_objective(init)))
    print("loss,  after optimization: {:9.6f}".format(true_objective(optimized)))