more comments

This commit is contained in:
Connor Olding 2018-06-14 22:15:49 +02:00
parent 63583789c3
commit 422468dd47

11
ars.lua
View File

@ -1,6 +1,9 @@
-- Augmented Random Search
-- https://arxiv.org/abs/1803.07055
-- with some tweaks (lips) by myself.
-- with some tweaks (lipschitz stuff) by myself.
-- i also added an option for graycode sampling,
-- borrowed from a (1+1) optimizer,
-- but i haven't yet found a case where it performs better.
local abs = math.abs
local exp = math.exp
@ -46,6 +49,9 @@ local function collect_best_indices(scored, top, antithetic)
end
local function kinda_lipschitz(dir, pos, neg, mid)
-- based on the local lipschitz constant of a quadratic curve
-- drawn through the 3 sampled points: positive, negative, and unperturbed.
-- it kinda helps? there's probably a better function to base it around.
local _, dev = calc_mean_dev(dir)
local c0 = neg - mid
local c1 = pos - mid
@ -133,6 +139,9 @@ function Ars:tell(scored, unperturbed_score)
top_rewards[sind + 1] = scored[sind + 1]
end
else
-- ARS is built around antithetic sampling,
-- but we can still do something without.
-- this is getting to be very similar to SNES however.
for _, i in ipairs(indices) do top_rewards[i] = scored[i] end
-- note: although this normalizes the scale, it's later
-- re-normalized differently by reward_dev anyway.