diff --git a/main.lua b/main.lua index 1f2e0b7..0eee2ca 100644 --- a/main.lua +++ b/main.lua @@ -2,8 +2,6 @@ local globalize = require("strict") -- configuration. ---randomseed(11) - local cfg = require("config") local gcfg = require("gameconfig") @@ -93,6 +91,9 @@ local lerp = util.lerp local softchoice = util.softchoice local unperturbed_rank = util.unperturbed_rank +local game = require("smb") +game.overlay = cfg.enable_overlay + -- utilities. local log_map = { @@ -158,12 +159,6 @@ local function make_network(input_size) return nn.Model({nn_x, nn_tx}, {nn_z}) end --- and here we go with the game stuff. --- which was all refactored out, so this comment looks a little silly now. - -local game = require("smb") -game.overlay = cfg.enable_overlay - -- learning and evaluation. local function prepare_epoch() @@ -174,12 +169,6 @@ local function prepare_epoch() empty(trial_noise) empty(trial_rewards) - -- TODO: (optionally) save memory. - -- generate noise as needed by saving the seed - -- (the os.time() as of here) and calling nn.normal() each trial. - -- of course this doubles the amount of time we spend generating noise, - -- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`. - local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392 if cfg.graycode then print(("chosen precision: %.2f"):format(precision)) @@ -187,8 +176,6 @@ local function prepare_epoch() for i = 1, cfg.epoch_trials do local noise = nn.zeros(#base_params) - -- NOTE: change in implementation: deviation is multiplied here - -- and ONLY here now. if cfg.graycode then --local precision = 1 / cfg.deviation --print(cfg.deviation, precision) @@ -198,8 +185,6 @@ local function prepare_epoch() for j = 1, #base_params do noise[j] = nn.uniform() < 0.5 and noise[j] or -noise[j] end - -- TODO? wrap/bound domain to [-1,1]. - -- dunno if this will work with the learning rate stuff. else for j = 1, #base_params do noise[j] = cfg.deviation * nn.normal() @@ -334,7 +319,6 @@ local function learn_from_epoch() for i, v in ipairs(top_rewards) do top_rewards[i] = v / reward_dev end end - -- NOTE: step no longer directly incorporates learning_rate. for i = 1, cfg.epoch_trials do local ind = (i - 1) * 2 + 1 local pos = top_rewards[ind + 0]