From b45343805576000d315e18894a412cf400613f14 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Mon, 2 Apr 2018 16:29:12 +0200 Subject: [PATCH] add graycode-like distribution option --- config.lua | 7 ++++--- gameconfig.lua | 3 +++ main.lua | 32 +++++++++++++++++++++++++++++--- nn.lua | 1 + 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/config.lua b/config.lua index a619aa8..e8cb96d 100644 --- a/config.lua +++ b/config.lua @@ -8,13 +8,14 @@ local cfg = { playable_mode = false, start_big = false, --true starting_lives = 0, --1 - -- + init_zeros = true, -- instead of he_normal noise or whatever. frameskip = 4, -- true greedy epsilon has both deterministic and det_epsilon set. deterministic = true, -- use argmax on outputs instead of random sampling. det_epsilon = false, -- take random actions with probability eps. - -- + + graycode = true, epoch_trials = 50, epoch_top_trials = 25, -- new with ARS. unperturbed_trial = true, -- do a trial without any noise. @@ -27,7 +28,7 @@ local cfg = { --learning_rate = 0.0032 / approx_cossim(66573) --learning_rate = 0.0056 / approx_cossim(66573) weight_decay = 0.00032, --0.001 --0.0023 - -- + cap_time = 200, --400 timer_loser = 1/2, decrement_reward = false, -- bad idea, encourages mario to kill himself diff --git a/gameconfig.lua b/gameconfig.lua index 3ec69ac..34e1b5a 100644 --- a/gameconfig.lua +++ b/gameconfig.lua @@ -1,3 +1,6 @@ +-- "gameconfig" is kind of a misnomer, to be honest. +-- it's more like things the end user shouldn't have to change. + local gcfg = { input_size = 60 + 4, -- TODO: let the script figure this out for us. tile_count = 17 * 13, diff --git a/main.lua b/main.lua index ac09e90..4ab813f 100644 --- a/main.lua +++ b/main.lua @@ -61,6 +61,7 @@ local ceil = math.ceil local min = math.min local max = math.max local exp = math.exp +local pow = math.pow local log = math.log local sqrt = math.sqrt local random = math.random @@ -83,6 +84,8 @@ local arshift = bit.arshift local rol = bit.rol local ror = bit.ror +local gui = gui + -- utilities. local function boolean_xor(a, b) @@ -423,13 +426,36 @@ local function prepare_epoch() base_params = network:collect() empty(trial_noise) empty(trial_rewards) - -- TODO: save memory. generate noise as needed by saving the seed + + -- TODO: (optionally) save memory. + -- generate noise as needed by saving the seed -- (the os.time() as of here) and calling nn.normal() each trial. + -- of course this doubles the amount of time we spend generating noise, + -- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`. + + local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392 + print(("chosen precision: %.2f"):format(precision)) + for i = 1, cfg.epoch_trials do local noise = nn.zeros(#base_params) -- NOTE: change in implementation: deviation is multiplied here -- and ONLY here now. - for j = 1, #base_params do noise[j] = cfg.deviation * nn.normal() end + if cfg.graycode then + --local precision = 1 / cfg.deviation + --print(cfg.deviation, precision) + for j = 1, #base_params do + noise[j] = exp(-precision * nn.uniform()) + end + for j = 1, #base_params do + noise[j] = nn.uniform() < 0.5 and noise[j] or -noise[j] + end + -- TODO? wrap/bound domain to [-1,1]. + -- dunno if this will work with the learning rate stuff. + else + for j = 1, #base_params do + noise[j] = cfg.deviation * nn.normal() + end + end trial_noise[i] = noise end trial_i = -1 @@ -722,7 +748,7 @@ local function init() joypad_mash('start') emu.frameadvance() end - print(emu.framecount()) + --print(emu.framecount()) local res, err = pcall(network.load, network) if res == false then print(err) end diff --git a/nn.lua b/nn.lua index 8fcb092..85b9a10 100644 --- a/nn.lua +++ b/nn.lua @@ -747,6 +747,7 @@ return { indexof = indexof, contains = contains, prod = prod, + uniform = uniform, normal = normal, zeros = zeros, arange = arange,