#!/usr/bin/env python3 # Copyright (C) 2022 Connor Olding # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. def birect( obj, # objective function to find the minimum value of lo, # list of lower bounds for each problem dimension hi, # list of upper bounds for each problem dimension *, min_diag, # never subdivide hyper-rectangles below this length min_error=None, # exit when the objective function achieves this error max_evals=None, # exit when the objective function has been run this many times max_iters=None, # exit when the optimization procedure iterates this many times by_longest=False, # measure by rects by longest edge instead of their diagonal pruning=0, F=float, # can be float, decimal.Decimal, numpy.float64, or anything float-like ): assert len(lo) == len(hi), "dimensional mismatch" assert not ( min_error is None and max_evals is None and max_iters is None ), "at least one stopping condition must be specified" # variables prefixed with v_ are to be one-dimensional vectors. [a, b, c] # variables prefixed with w_ are to be one-dimensional vectors of pairs. [a, b] # variables prefixed with vw_ are to be two-dimensional vectors. [[a, b], [c, d]] # aside: xmin should actually be called v_xmin, but it's not! def fun(w_t): # final conversion from exact fraction to possibly-inexact F-type: v_x = [F(num) / F(den) for num, den in w_t] # linearly interpolate within the bounds of the function: v_x = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, v_x)] res = obj(v_x) return res def ab_to_lu(w_a, w_b): # converts corner points to midpoints, also denominators # (2 * a + b) / (den * 3) = point inbetween corner "a" and the center w_l = [(a[0] + a[0] + b[0], (1 << a[1]) * 3) for a, b in zip(w_a, w_b)] # (a + 2 * b) / (den * 3) = point inbetween corner "b" and the center w_u = [(a[0] + b[0] + b[0], (1 << b[1]) * 3) for a, b in zip(w_a, w_b)] return w_l, w_u dims = len(lo) # already asserted that len(lo) == len(hi) # initial corner points: # note that the denominators are encoded as the exponents of a power of two. # therefore, coordinate = pair[0] / (1 << pair[1]). w_a, w_b = [(0, 0)] * dims, [(1, 0)] * dims # initial points halfway between the each of the two corners and the center: # note that the denominators are identity here. # therefore, coordinate = pair[0] / pair[1]. w_l, w_u = ab_to_lu(w_a, w_b) # initial function evaluations: fl = fun(w_l) fu = fun(w_u) # initial minimum of all evaluated points so far: if fl <= fu: xmin, fmin = w_l, fl else: xmin, fmin = w_u, fu # construct lists to hold all sample coordinates and their values: vw_a, vw_b = [w_a], [w_b] v_fl = [fl] # remember that "l" and "u" are arbitrary shorthand used by the paper, v_fu = [fu] # and one isn't necessarily above or below the other. v_active = {0: True} # indices of hyper-rectangles that have yet to be subdivided v_depth = [0] # increments every split n = 1 # how many indices are in use del w_a, w_b, w_l, w_u, fl, fu # prevent accidental re-use def precision_met(): # returns True when the optimization procedure should exit return min_error is not None and fmin <= min_error def no_more_evals(): # returns True when the optimization procedure should exit return max_evals is not None and n + 1 >= max_evals def gather_potential(v_i): # crappy algorithm for finding the convex hull of a line plot where # the x axis is the diameter of hyper-rectangle, and # the y axis is the minimum loss of the two points (v_fl, v_fu) within it. # start by finding the arg-minimum for each set of equal-diameter rects. # TODO: make this faster. use a sorted queue and peek at the best for each depth. bests = {} # mapping of depth to arg-minimum value (i.e. its index) for i in v_i: fl, fu = v_fl[i], v_fu[i] f = min(fl, fu) depth = v_depth[i] if by_longest: depth = depth // dims * dims # assert depth < depth_limit best = bests.get(depth) # TODO: handle f == best case. if best is None or f < best[1]: bests[depth] = (i, f) if len(bests) == 1: # nothing to compare it to return [i for i, f in bests.values()] asc = sorted(bests.items(), key=lambda t: -t[0]) # sort by length, ascending # first, remove points that slope downwards. # this yields a pareto front, which isn't necessarily convex. old = asc new = [old[-1]] smallest = old[-1][1][1] for i in reversed(range(len(old) - 1)): f = old[i][1][1] if f <= smallest: smallest = f new.append(old[i]) new = new[::-1] # second, convert depths to lengths. if by_longest: # TODO: does this branch make any difference? new = [(longest_cache[depth],) + t for depth, t in new] else: new = [(diagonal_cache[depth],) + t for depth, t in new] # third, remove points that fall under a previous slope. old = new skip = [False] * len(old) for i in range(len(old)): if skip[i]: continue len0, i0, f0 = old[i] smallest_slope = None for j in range(i + 1, len(old)): if skip[j]: continue len1, i1, f1 = old[j] num = f1 - f0 den = len1 - len0 # this factor of 3/2 comes from the denominator; # each length should be multiplied by 2/3: # the furthest relative distance from a corner to a center point. slope = num / den # * F(3 / 2) if smallest_slope is None: smallest_slope = slope elif slope < smallest_slope: for k in range(i + 1, j): skip[k] = True smallest_slope = slope new = [entry for entry, skipping in zip(old, skip) if not skipping] if pruning: v_f = sorted(min(fl, fu) for fl, fu in zip(v_fl, v_fu)) fmedian = v_f[len(v_f) // 2] offset = fmin - pruning * (fmedian - fmin) start = 0 K_slope = None for i in range(len(new)): len0, i0, f0 = new[i] new_slope = (f0 - offset) / len0 if K_slope is None or new_slope < K_slope: K_slope = new_slope start = i new = new[start:] return [i for len, i, f in new] def determine_longest(w_a, w_b): # the index of the dimension is used as a tie-breaker (considered longer). # e.g. a box has lengths (2, 3, 3). the index returned is then 1. # TODO: alternate way of stating that comment: biased towards smaller indices. longest = 0 invlen = None for i, (a, b) in enumerate(zip(w_a, w_b)): den_a = 1 << a[1] den_b = 1 << b[1] den = max(den_a, den_b) # TODO: always the same, right? if invlen is None or den < invlen: invlen = den longest = i return longest # Hyper-rectangle subdivision demonstration: (2D example) # # Initial Split Once Split Twice Split Both # ↓b ↓b a↓b a↓ # ┌───────────┐←b ┌─────╥─────┐ ┌─────╥─────┐ ┌─────╥─────┐←a # │ │ │ ║ │ │ l ║ │ │ l ║ l │ # │ ① u │ → │ u ② ║ u │ → │ u ③ ║ u │ → │ u ║ u │ # │ │ │ ║ │ b→╞═════╣←a │ b→╞═════╬═════╡←a # │ l │ → │ l ║ l │ → │ l ║ l │ → │ l ║ l │ # │ │ │ ║ │ │ u ║ │ │ u ║ u │ # a→└───────────┘ └─────╨─────┘ b→└─────╨─────┘ b→└─────╨─────┘ # ↑a ↑a ↑a ↑b def split_it(i, which, *, w_a, w_b, d): nonlocal n, xmin, fmin new, n = n, n + 1 # designate an index for the new hyper-rectangle w_new_a = w_a.copy() w_new_b = w_b.copy() den = w_a[d][1] # should be equal to w_b[d][1] as well num_a = w_a[d][0] num_b = w_b[d][0] if which: w_new_a[d] = (num_b + num_b, den + 1) # swap w_new_b[d] = (num_a + num_b, den + 1) # slide else: w_new_a[d] = (num_a + num_b, den + 1) # slide w_new_b[d] = (num_a + num_a, den + 1) # swap w_l, w_u = ab_to_lu(w_new_a, w_new_b) fl = fun(w_l) if which else v_fl[i] fu = v_fu[i] if which else fun(w_u) vw_a.append(w_new_a) vw_b.append(w_new_b) v_fl.append(fl) v_fu.append(fu) v_depth.append(v_depth[i] + 1) if which: if fl < fmin: xmin, fmin = w_l, fl else: if fu < fmin: xmin, fmin = w_u, fu return new def split_rectangles(v_i): # returns new indices v_new = [] for i in v_i: w_a = vw_a[i] w_b = vw_b[i] d = determine_longest(w_a, w_b) v_new.append(split_it(i, 0, w_a=w_a, w_b=w_b, d=d)) if precision_met() or no_more_evals(): break v_new.append(split_it(i, 1, w_a=w_a, w_b=w_b, d=d)) if precision_met() or no_more_evals(): break assert len(vw_a) == n, "internal error: vw_a has invalid length" assert len(vw_b) == n, "internal error: vw_b has invalid length" assert len(v_fl) == n, "internal error: v_fl has invalid length" assert len(v_fu) == n, "internal error: v_fu has invalid length" assert len(v_depth) == n, "internal error: v_depth has invalid length" return v_new def _arbitrary_subdivision(w_a, w_b, d): # shrink the coordinates as if they were subdivided and a single # subdivision was selected. which subdivision is chosen doesn't matter. a_d = w_a[d] b_d = w_b[d] large = max(a_d[0], b_d[0]) small = min(a_d[0], b_d[0]) w_a[d] = (large * 2 - 0, a_d[1] + 1) w_b[d] = (small * 2 + 1, b_d[1] + 1) def precompute_diagonals_by_limit(limit): diags = [] w_a = vw_a[0].copy() w_b = vw_b[0].copy() for depth in range(limit): sq_dist = 0 for a, b in zip(w_a, w_b): delta = b[0] - a[0] sq_dist += (delta * delta) << (2 * (limit - a[1])) diags.append(sq_dist) _arbitrary_subdivision(w_a, w_b, depth % dims) return [F(diag) ** F(0.5) / F(1 << limit) for diag in diags] def precompute_diagonals_by_length(minlen): diags, longests = [], [] w_a = vw_a[0].copy() w_b = vw_b[0].copy() for depth in range(1_000_000): bits = max(max(a[1], b[1]) for a, b in zip(w_a, w_b)) sq_dist, longest = 0, 0 for a, b in zip(w_a, w_b): delta = b[0] - a[0] sq_dist += (delta * delta) << (2 * (bits - a[1])) longest = max(longest, abs(delta) << (bits - a[1])) diag = F(sq_dist) ** F(0.5) / F(1 << bits) if diag < minlen: break longest = F(longest) / F(1 << bits) diags.append(diag) longests.append(longest) _arbitrary_subdivision(w_a, w_b, depth % dims) return diags, longests diagonal_cache, longest_cache = precompute_diagonals_by_length(min_diag) depth_limit = len(diagonal_cache) if depth_limit <= 500: # prevents OverflowError: int too large to convert to float diagonal_cache = precompute_diagonals_by_limit(depth_limit) for outer in range(1_000_000 if max_iters is None else max_iters): if precision_met() or no_more_evals(): # check stopping conditions break # perform the actual *di*viding *rect*angles algorithm: v_potential = gather_potential(v_active) v_new = split_rectangles(v_potential) for j in v_potential: del v_active[j] # these were just subdivided a moment ago, so remove them for j in v_new: if v_depth[j] < depth_limit: # TODO: is checking this late wasting evals? v_active[j] = True # these were just created, so add them tmin = [F(x[0]) / F(x[1]) for x in xmin] argmin = [l * (1 - t) + h * t for l, h, t in zip(lo, hi, tmin)] return argmin, fmin if __name__ == "__main__": import numpy as np def objective2210(x): # another linear transformation of the rosenbrock banana function. assert len(x) > 1, len(x) a, b = x[:-1], x[1:] a = a / 4.0 * 2 - 12 / 15 b = b / 1.5 * 2 - 43 / 15 # solution: 3.60 1.40 return ( np.sum(100 * np.square(np.square(a) + b) + np.square(a - 1)) / 499.0444444444444 ) F = np.float64 res = birect( lambda a: objective2210(np.array(a, F)), [0, 0], [5, 5], min_diag=F(1e-8 / 5), max_evals=2_000, F=F, by_longest=True, ) print("", "birect result:", *res, sep="\n") print("", "double checked:", objective2210(np.array(res[0], F)), sep="\n")