smbot/config.lua

115 lines
3.2 KiB
Lua
Raw Normal View History

local function approx_cossim(dim)
return math.pow(1.521 * dim - 0.521, -0.5026)
end
2018-05-03 06:33:17 -07:00
local function intmap(x)
-- 0 -> 1.0
-- -1 -> 0.316
-- -2 -> 0.1
-- -3 -> 0.0316
-- -4 -> 0.01
-- etc.
return math.pow(10, x / 2)
end
2018-05-06 20:55:58 -07:00
local common_cfg = {
playable_mode = false,
2018-05-06 20:55:58 -07:00
playback_mode = false,
start_big = false,
starting_lives = 0,
2018-04-02 07:29:12 -07:00
frameskip = 4,
-- true greedy epsilon has both deterministic and det_epsilon set.
2018-05-02 04:06:28 -07:00
deterministic = false, -- use argmax on outputs instead of random sampling.
det_epsilon = false, -- take random actions with probability eps.
2018-05-06 20:55:58 -07:00
layernorm = false,
2018-04-02 07:29:12 -07:00
2018-05-06 20:55:58 -07:00
init_zeros = true, -- instead of he_normal noise or whatever.
2018-04-03 09:13:11 -07:00
graycode = false,
2018-05-03 06:33:17 -07:00
unperturbed_trial = true, -- do a trial without any noise.
negate_trials = true, -- try pairs of normal and negated noise directions.
-- AKA antithetic sampling. note that this doubles the number of trials.
2018-05-06 20:55:58 -07:00
time_inputs = true, -- binary inputs of global frame count
2018-05-07 07:22:02 -07:00
normalize_inputs = false,
2018-05-03 06:33:17 -07:00
2018-06-10 07:38:25 -07:00
learning_rate = 1.0,
mean_adapt = 1.0, -- for xNES
weight_decay = 0.0,
sigma_decay = 0.0,
2018-06-10 07:38:25 -07:00
es = 'ars',
2018-05-06 20:57:52 -07:00
ars_lips = false,
2018-05-06 20:55:58 -07:00
adamant = false, -- run steps through AMSgrad.
2018-05-07 07:22:02 -07:00
adam_b1 = math.pow(10, -1 / 1), -- fewer trials, more momentum!
adam_b2 = math.pow(10, -1 / 50),
adam_eps = intmap(-1), -- focus on b1 rather than b2.
adam_debias = true,
2018-05-03 06:33:17 -07:00
min_time = 100,
max_time = 300,
timer_loser = 1/2,
2018-05-06 20:55:58 -07:00
decrement_reward = false, -- bad idea, encourages mario to run into goombas.
2018-06-08 14:59:43 -07:00
score_multiplier = 1, -- how much the ingame score influences our rewards.
starting_world = 1, -- set to 0 for random!
starting_level = 1, -- set to 0 for random!
2018-05-06 20:55:58 -07:00
}
2018-04-03 09:13:11 -07:00
2018-05-06 20:55:58 -07:00
local cfg = {
2018-06-10 07:41:45 -07:00
log_fn = 'logs-snes.csv',
params_fn = 'params-snes.txt',
2018-06-09 10:12:25 -07:00
decrement_reward = true,
score_multiplier = 5,
2018-06-10 07:41:45 -07:00
starting_world = 0,
2018-06-09 10:12:25 -07:00
starting_level = 1,
2018-06-10 07:41:45 -07:00
--starting_lives = 1,
min_time = 300,
max_time = 300,
2018-06-09 10:12:25 -07:00
2018-06-10 07:41:45 -07:00
deterministic = false, --true,
2018-06-09 10:12:25 -07:00
2018-06-10 07:41:45 -07:00
epoch_trials = 32,
2018-06-09 10:12:25 -07:00
epoch_top_trials = 9999,
2018-06-10 07:41:45 -07:00
negate_trials = true,
2018-06-09 10:12:25 -07:00
2018-06-10 07:41:45 -07:00
es = 'snes',
learning_rate = 0.5,
mean_adapt = 0.5,
deviation = 0.5,
weight_decay = 0.025,
2018-06-11 20:39:22 -07:00
sigma_decay = 0.001,
}
2018-05-02 04:06:28 -07:00
-- TODO: so, uhh..
-- what happens when playback_mode is true but unperturbed_trial is false?
2018-05-06 20:55:58 -07:00
setmetatable(cfg, {
__index = function(t, n)
if common_cfg[n] ~= nil then return common_cfg[n] end
2018-05-07 00:20:01 -07:00
if n == 'log_fn' then return nil end
2018-05-06 20:55:58 -07:00
if n == 'params_fn' then return nil end
2018-05-07 00:20:01 -07:00
if n == 'stats_fn' then return nil end
2018-05-06 20:55:58 -07:00
error("cannot use undeclared config '" .. tostring(n) .. "'", 2)
end
})
cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)
cfg.eps_start = 1.0 * cfg.frameskip / 64
cfg.eps_stop = 0.1 * cfg.eps_start
cfg.eps_frames = 1000000
cfg.enable_overlay = cfg.playable_mode
cfg.enable_network = not cfg.playable_mode
2018-05-06 20:55:58 -07:00
assert(not cfg.ars_lips or cfg.unperturbed_trial,
"cfg.unperturbed_trial must be true to use cfg.ars_lips")
2018-06-07 17:45:07 -07:00
assert(not cfg.ars_lips or cfg.negate_trials,
"cfg.negate_trials must be true to use cfg.ars_lips")
2018-05-06 20:55:58 -07:00
assert(not cfg.adamant,
"cfg.adamant not yet re-implemented")
2018-05-06 20:55:58 -07:00
return cfg