add graycode-like distribution option

This commit is contained in:
Connor Olding 2018-04-02 16:29:12 +02:00
parent 6a01f609a9
commit b453438055
4 changed files with 37 additions and 6 deletions

View file

@ -8,13 +8,14 @@ local cfg = {
playable_mode = false, playable_mode = false,
start_big = false, --true start_big = false, --true
starting_lives = 0, --1 starting_lives = 0, --1
--
init_zeros = true, -- instead of he_normal noise or whatever. init_zeros = true, -- instead of he_normal noise or whatever.
frameskip = 4, frameskip = 4,
-- true greedy epsilon has both deterministic and det_epsilon set. -- true greedy epsilon has both deterministic and det_epsilon set.
deterministic = true, -- use argmax on outputs instead of random sampling. deterministic = true, -- use argmax on outputs instead of random sampling.
det_epsilon = false, -- take random actions with probability eps. det_epsilon = false, -- take random actions with probability eps.
--
graycode = true,
epoch_trials = 50, epoch_trials = 50,
epoch_top_trials = 25, -- new with ARS. epoch_top_trials = 25, -- new with ARS.
unperturbed_trial = true, -- do a trial without any noise. unperturbed_trial = true, -- do a trial without any noise.
@ -27,7 +28,7 @@ local cfg = {
--learning_rate = 0.0032 / approx_cossim(66573) --learning_rate = 0.0032 / approx_cossim(66573)
--learning_rate = 0.0056 / approx_cossim(66573) --learning_rate = 0.0056 / approx_cossim(66573)
weight_decay = 0.00032, --0.001 --0.0023 weight_decay = 0.00032, --0.001 --0.0023
--
cap_time = 200, --400 cap_time = 200, --400
timer_loser = 1/2, timer_loser = 1/2,
decrement_reward = false, -- bad idea, encourages mario to kill himself decrement_reward = false, -- bad idea, encourages mario to kill himself

View file

@ -1,3 +1,6 @@
-- "gameconfig" is kind of a misnomer, to be honest.
-- it's more like things the end user shouldn't have to change.
local gcfg = { local gcfg = {
input_size = 60 + 4, -- TODO: let the script figure this out for us. input_size = 60 + 4, -- TODO: let the script figure this out for us.
tile_count = 17 * 13, tile_count = 17 * 13,

View file

@ -61,6 +61,7 @@ local ceil = math.ceil
local min = math.min local min = math.min
local max = math.max local max = math.max
local exp = math.exp local exp = math.exp
local pow = math.pow
local log = math.log local log = math.log
local sqrt = math.sqrt local sqrt = math.sqrt
local random = math.random local random = math.random
@ -83,6 +84,8 @@ local arshift = bit.arshift
local rol = bit.rol local rol = bit.rol
local ror = bit.ror local ror = bit.ror
local gui = gui
-- utilities. -- utilities.
local function boolean_xor(a, b) local function boolean_xor(a, b)
@ -423,13 +426,36 @@ local function prepare_epoch()
base_params = network:collect() base_params = network:collect()
empty(trial_noise) empty(trial_noise)
empty(trial_rewards) empty(trial_rewards)
-- TODO: save memory. generate noise as needed by saving the seed
-- TODO: (optionally) save memory.
-- generate noise as needed by saving the seed
-- (the os.time() as of here) and calling nn.normal() each trial. -- (the os.time() as of here) and calling nn.normal() each trial.
-- of course this doubles the amount of time we spend generating noise,
-- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`.
local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392
print(("chosen precision: %.2f"):format(precision))
for i = 1, cfg.epoch_trials do for i = 1, cfg.epoch_trials do
local noise = nn.zeros(#base_params) local noise = nn.zeros(#base_params)
-- NOTE: change in implementation: deviation is multiplied here -- NOTE: change in implementation: deviation is multiplied here
-- and ONLY here now. -- and ONLY here now.
for j = 1, #base_params do noise[j] = cfg.deviation * nn.normal() end if cfg.graycode then
--local precision = 1 / cfg.deviation
--print(cfg.deviation, precision)
for j = 1, #base_params do
noise[j] = exp(-precision * nn.uniform())
end
for j = 1, #base_params do
noise[j] = nn.uniform() < 0.5 and noise[j] or -noise[j]
end
-- TODO? wrap/bound domain to [-1,1].
-- dunno if this will work with the learning rate stuff.
else
for j = 1, #base_params do
noise[j] = cfg.deviation * nn.normal()
end
end
trial_noise[i] = noise trial_noise[i] = noise
end end
trial_i = -1 trial_i = -1
@ -722,7 +748,7 @@ local function init()
joypad_mash('start') joypad_mash('start')
emu.frameadvance() emu.frameadvance()
end end
print(emu.framecount()) --print(emu.framecount())
local res, err = pcall(network.load, network) local res, err = pcall(network.load, network)
if res == false then print(err) end if res == false then print(err) end

1
nn.lua
View file

@ -747,6 +747,7 @@ return {
indexof = indexof, indexof = indexof,
contains = contains, contains = contains,
prod = prod, prod = prod,
uniform = uniform,
normal = normal, normal = normal,
zeros = zeros, zeros = zeros,
arange = arange, arange = arange,