From 422468dd472989208fb70d3b68202f284161994f Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Thu, 14 Jun 2018 22:15:49 +0200 Subject: [PATCH] more comments --- ars.lua | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ars.lua b/ars.lua index b631533..2cc6422 100644 --- a/ars.lua +++ b/ars.lua @@ -1,6 +1,9 @@ -- Augmented Random Search -- https://arxiv.org/abs/1803.07055 --- with some tweaks (lips) by myself. +-- with some tweaks (lipschitz stuff) by myself. +-- i also added an option for graycode sampling, +-- borrowed from a (1+1) optimizer, +-- but i haven't yet found a case where it performs better. local abs = math.abs local exp = math.exp @@ -46,6 +49,9 @@ local function collect_best_indices(scored, top, antithetic) end local function kinda_lipschitz(dir, pos, neg, mid) + -- based on the local lipschitz constant of a quadratic curve + -- drawn through the 3 sampled points: positive, negative, and unperturbed. + -- it kinda helps? there's probably a better function to base it around. local _, dev = calc_mean_dev(dir) local c0 = neg - mid local c1 = pos - mid @@ -133,6 +139,9 @@ function Ars:tell(scored, unperturbed_score) top_rewards[sind + 1] = scored[sind + 1] end else + -- ARS is built around antithetic sampling, + -- but we can still do something without. + -- this is getting to be very similar to SNES however. for _, i in ipairs(indices) do top_rewards[i] = scored[i] end -- note: although this normalizes the scale, it's later -- re-normalized differently by reward_dev anyway.