local function approx_cossim(dim) return math.pow(1.521 * dim - 0.521, -0.5026) end local cfg = { defer_prints = true, playable_mode = false, start_big = false, --true starting_lives = 0, --1 init_zeros = true, -- instead of he_normal noise or whatever. frameskip = 4, -- true greedy epsilon has both deterministic and det_epsilon set. deterministic = false, -- use argmax on outputs instead of random sampling. det_epsilon = false, -- take random actions with probability eps. graycode = false, epoch_trials = 5, epoch_top_trials = 2, -- new with ARS. unperturbed_trial = false, -- do a trial without any noise. negate_trials = true, -- try pairs of normal and negated noise directions. time_inputs = true, -- binary inputs of global frame count -- ^ note that this now doubles the effective trials. deviation = 0.32, --learning_rate = 0.01 / approx_cossim(7051) learning_rate = 0.32, --learning_rate = 0.0032 / approx_cossim(66573) --learning_rate = 0.0056 / approx_cossim(66573) weight_decay = 0.0032, cap_time = 200, --400 timer_loser = 1/2, decrement_reward = false, -- bad idea, encourages mario to kill himself playback_mode = false, } -- TODO: so, uhh.. -- what happens when playback_mode is true but unperturbed_trial is false? cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials) cfg.eps_start = 1.0 * cfg.frameskip / 64 cfg.eps_stop = 0.1 * cfg.eps_start cfg.eps_frames = 1000000 cfg.enable_overlay = cfg.playable_mode cfg.enable_network = not cfg.playable_mode return setmetatable(cfg, { __index = function(t, n) error("cannot use undeclared config '" .. tostring(n) .. "'", 2) end })