more comments
This commit is contained in:
parent
63583789c3
commit
422468dd47
1 changed files with 10 additions and 1 deletions
11
ars.lua
11
ars.lua
|
@ -1,6 +1,9 @@
|
||||||
-- Augmented Random Search
|
-- Augmented Random Search
|
||||||
-- https://arxiv.org/abs/1803.07055
|
-- https://arxiv.org/abs/1803.07055
|
||||||
-- with some tweaks (lips) by myself.
|
-- with some tweaks (lipschitz stuff) by myself.
|
||||||
|
-- i also added an option for graycode sampling,
|
||||||
|
-- borrowed from a (1+1) optimizer,
|
||||||
|
-- but i haven't yet found a case where it performs better.
|
||||||
|
|
||||||
local abs = math.abs
|
local abs = math.abs
|
||||||
local exp = math.exp
|
local exp = math.exp
|
||||||
|
@ -46,6 +49,9 @@ local function collect_best_indices(scored, top, antithetic)
|
||||||
end
|
end
|
||||||
|
|
||||||
local function kinda_lipschitz(dir, pos, neg, mid)
|
local function kinda_lipschitz(dir, pos, neg, mid)
|
||||||
|
-- based on the local lipschitz constant of a quadratic curve
|
||||||
|
-- drawn through the 3 sampled points: positive, negative, and unperturbed.
|
||||||
|
-- it kinda helps? there's probably a better function to base it around.
|
||||||
local _, dev = calc_mean_dev(dir)
|
local _, dev = calc_mean_dev(dir)
|
||||||
local c0 = neg - mid
|
local c0 = neg - mid
|
||||||
local c1 = pos - mid
|
local c1 = pos - mid
|
||||||
|
@ -133,6 +139,9 @@ function Ars:tell(scored, unperturbed_score)
|
||||||
top_rewards[sind + 1] = scored[sind + 1]
|
top_rewards[sind + 1] = scored[sind + 1]
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
|
-- ARS is built around antithetic sampling,
|
||||||
|
-- but we can still do something without.
|
||||||
|
-- this is getting to be very similar to SNES however.
|
||||||
for _, i in ipairs(indices) do top_rewards[i] = scored[i] end
|
for _, i in ipairs(indices) do top_rewards[i] = scored[i] end
|
||||||
-- note: although this normalizes the scale, it's later
|
-- note: although this normalizes the scale, it's later
|
||||||
-- re-normalized differently by reward_dev anyway.
|
-- re-normalized differently by reward_dev anyway.
|
||||||
|
|
Loading…
Add table
Reference in a new issue