local function approx_cossim(dim) return math.pow(1.521 * dim - 0.521, -0.5026) end local function intmap(x) -- 0 -> 1.0 -- -1 -> 0.316 -- -2 -> 0.1 -- -3 -> 0.0316 -- -4 -> 0.01 -- etc. return math.pow(10, x / 2) end local common_cfg = { playable_mode = false, playback_mode = false, start_big = false, starting_lives = 0, frameskip = 4, -- true greedy epsilon has both deterministic and det_epsilon set. deterministic = false, -- use argmax on outputs instead of random sampling. det_epsilon = false, -- take random actions with probability eps. layernorm = false, init_zeros = true, -- instead of he_normal noise or whatever. graycode = false, unperturbed_trial = true, -- do a trial without any noise. negate_trials = true, -- try pairs of normal and negated noise directions. -- AKA antithetic sampling. note that this doubles the number of trials. time_inputs = true, -- binary inputs of global frame count normalize_inputs = false, es = 'ars', ars_lips = false, adamant = false, -- run steps through AMSgrad. adam_b1 = math.pow(10, -1 / 1), -- fewer trials, more momentum! adam_b2 = math.pow(10, -1 / 50), adam_eps = intmap(-1), -- focus on b1 rather than b2. adam_debias = true, cap_time = 300, timer_loser = 1/2, decrement_reward = false, -- bad idea, encourages mario to run into goombas. score_multiplier = 1, -- how much the ingame score influences our rewards. starting_world = 1, -- set to 0 for random! starting_level = 1, -- set to 0 for random! } local cfg = { log_fn = 'logs-xnes.csv', params_fn = 'params-xnes.txt', decrement_reward = true, score_multiplier = 5, starting_world = 1, starting_level = 1, starting_lives = 1, cap_time = 300, deterministic = true, epoch_trials = 50, epoch_top_trials = 9999, negate_trials = false, es = 'xnes', learning_rate = 0.14, deviation = 1.0, weight_decay = 0.0, } -- TODO: so, uhh.. -- what happens when playback_mode is true but unperturbed_trial is false? setmetatable(cfg, { __index = function(t, n) if common_cfg[n] ~= nil then return common_cfg[n] end if n == 'log_fn' then return nil end if n == 'params_fn' then return nil end if n == 'stats_fn' then return nil end error("cannot use undeclared config '" .. tostring(n) .. "'", 2) end }) cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials) cfg.eps_start = 1.0 * cfg.frameskip / 64 cfg.eps_stop = 0.1 * cfg.eps_start cfg.eps_frames = 1000000 cfg.enable_overlay = cfg.playable_mode cfg.enable_network = not cfg.playable_mode assert(not cfg.ars_lips or cfg.unperturbed_trial, "cfg.unperturbed_trial must be true to use cfg.ars_lips") assert(not cfg.ars_lips or cfg.negate_trials, "cfg.negate_trials must be true to use cfg.ars_lips") assert(not cfg.adamant, "cfg.adamant not yet re-implemented") return cfg