From 422468dd472989208fb70d3b68202f284161994f Mon Sep 17 00:00:00 2001
From: Connor Olding <cloningdonor@gmail.com>
Date: Thu, 14 Jun 2018 22:15:49 +0200
Subject: [PATCH] more comments

---
 ars.lua | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/ars.lua b/ars.lua
index b631533..2cc6422 100644
--- a/ars.lua
+++ b/ars.lua
@@ -1,6 +1,9 @@
 -- Augmented Random Search
 -- https://arxiv.org/abs/1803.07055
--- with some tweaks (lips) by myself.
+-- with some tweaks (lipschitz stuff) by myself.
+-- i also added an option for graycode sampling,
+-- borrowed from a (1+1) optimizer,
+-- but i haven't yet found a case where it performs better.
 
 local abs = math.abs
 local exp = math.exp
@@ -46,6 +49,9 @@ local function collect_best_indices(scored, top, antithetic)
 end
 
 local function kinda_lipschitz(dir, pos, neg, mid)
+    -- based on the local lipschitz constant of a quadratic curve
+    -- drawn through the 3 sampled points: positive, negative, and unperturbed.
+    -- it kinda helps? there's probably a better function to base it around.
     local _, dev = calc_mean_dev(dir)
     local c0 = neg - mid
     local c1 = pos - mid
@@ -133,6 +139,9 @@ function Ars:tell(scored, unperturbed_score)
             top_rewards[sind + 1] = scored[sind + 1]
         end
     else
+        -- ARS is built around antithetic sampling,
+        -- but we can still do something without.
+        -- this is getting to be very similar to SNES however.
         for _, i in ipairs(indices) do top_rewards[i] = scored[i] end
         -- note: although this normalizes the scale, it's later
         --       re-normalized differently by reward_dev anyway.