more comments
This commit is contained in:
parent
63583789c3
commit
422468dd47
1 changed files with 10 additions and 1 deletions
11
ars.lua
11
ars.lua
|
@ -1,6 +1,9 @@
|
|||
-- Augmented Random Search
|
||||
-- https://arxiv.org/abs/1803.07055
|
||||
-- with some tweaks (lips) by myself.
|
||||
-- with some tweaks (lipschitz stuff) by myself.
|
||||
-- i also added an option for graycode sampling,
|
||||
-- borrowed from a (1+1) optimizer,
|
||||
-- but i haven't yet found a case where it performs better.
|
||||
|
||||
local abs = math.abs
|
||||
local exp = math.exp
|
||||
|
@ -46,6 +49,9 @@ local function collect_best_indices(scored, top, antithetic)
|
|||
end
|
||||
|
||||
local function kinda_lipschitz(dir, pos, neg, mid)
|
||||
-- based on the local lipschitz constant of a quadratic curve
|
||||
-- drawn through the 3 sampled points: positive, negative, and unperturbed.
|
||||
-- it kinda helps? there's probably a better function to base it around.
|
||||
local _, dev = calc_mean_dev(dir)
|
||||
local c0 = neg - mid
|
||||
local c1 = pos - mid
|
||||
|
@ -133,6 +139,9 @@ function Ars:tell(scored, unperturbed_score)
|
|||
top_rewards[sind + 1] = scored[sind + 1]
|
||||
end
|
||||
else
|
||||
-- ARS is built around antithetic sampling,
|
||||
-- but we can still do something without.
|
||||
-- this is getting to be very similar to SNES however.
|
||||
for _, i in ipairs(indices) do top_rewards[i] = scored[i] end
|
||||
-- note: although this normalizes the scale, it's later
|
||||
-- re-normalized differently by reward_dev anyway.
|
||||
|
|
Loading…
Add table
Reference in a new issue