more comments

2018-06-14 22:15:49 +02:00 · 2018-06-14 22:15:49 +02:00 · 422468dd47
commit 422468dd47
parent 63583789c3
1 changed files with 10 additions and 1 deletions
--- a/ars.lua
+++ b/ars.lua
@ -1,6 +1,9 @@
 -- Augmented Random Search
 -- https://arxiv.org/abs/1803.07055
-- with some tweaks (lips) by myself.
+-- with some tweaks (lipschitz stuff) by myself.
+-- i also added an option for graycode sampling,
+-- borrowed from a (1+1) optimizer,
+-- but i haven't yet found a case where it performs better.

 local abs = math.abs
 local exp = math.exp
@ -46,6 +49,9 @@ local function collect_best_indices(scored, top, antithetic)
 end

 local function kinda_lipschitz(dir, pos, neg, mid)
+    -- based on the local lipschitz constant of a quadratic curve
+    -- drawn through the 3 sampled points: positive, negative, and unperturbed.
+    -- it kinda helps? there's probably a better function to base it around.
    local _, dev = calc_mean_dev(dir)
    local c0 = neg - mid
    local c1 = pos - mid
@ -133,6 +139,9 @@ function Ars:tell(scored, unperturbed_score)
            top_rewards[sind + 1] = scored[sind + 1]
        end
    else
+        -- ARS is built around antithetic sampling,
+        -- but we can still do something without.
+        -- this is getting to be very similar to SNES however.
        for _, i in ipairs(indices) do top_rewards[i] = scored[i] end
        -- note: although this normalizes the scale, it's later
        --       re-normalized differently by reward_dev anyway.