2018-04-02 06:21:55 -07:00
|
|
|
local function approx_cossim(dim)
|
|
|
|
return math.pow(1.521 * dim - 0.521, -0.5026)
|
|
|
|
end
|
|
|
|
|
2018-05-03 06:33:17 -07:00
|
|
|
local function intmap(x)
|
|
|
|
-- 0 -> 1.0
|
|
|
|
-- -1 -> 0.316
|
|
|
|
-- -2 -> 0.1
|
|
|
|
-- -3 -> 0.0316
|
|
|
|
-- -4 -> 0.01
|
|
|
|
-- etc.
|
|
|
|
return math.pow(10, x / 2)
|
|
|
|
end
|
|
|
|
|
2018-05-06 20:55:58 -07:00
|
|
|
local common_cfg = {
|
2018-04-02 06:21:55 -07:00
|
|
|
defer_prints = true,
|
|
|
|
|
|
|
|
playable_mode = false,
|
2018-05-06 20:55:58 -07:00
|
|
|
playback_mode = false,
|
|
|
|
start_big = false,
|
|
|
|
starting_lives = 0,
|
2018-04-02 07:29:12 -07:00
|
|
|
|
2018-04-02 06:21:55 -07:00
|
|
|
frameskip = 4,
|
|
|
|
-- true greedy epsilon has both deterministic and det_epsilon set.
|
2018-05-02 04:06:28 -07:00
|
|
|
deterministic = false, -- use argmax on outputs instead of random sampling.
|
2018-04-02 06:21:55 -07:00
|
|
|
det_epsilon = false, -- take random actions with probability eps.
|
2018-05-06 20:55:58 -07:00
|
|
|
layernorm = false,
|
2018-04-02 07:29:12 -07:00
|
|
|
|
2018-05-06 20:55:58 -07:00
|
|
|
init_zeros = true, -- instead of he_normal noise or whatever.
|
2018-04-03 09:13:11 -07:00
|
|
|
graycode = false,
|
2018-05-03 06:33:17 -07:00
|
|
|
unperturbed_trial = true, -- do a trial without any noise.
|
2018-04-02 06:21:55 -07:00
|
|
|
negate_trials = true, -- try pairs of normal and negated noise directions.
|
|
|
|
-- ^ note that this now doubles the effective trials.
|
2018-05-06 20:55:58 -07:00
|
|
|
time_inputs = true, -- binary inputs of global frame count
|
2018-05-07 07:22:02 -07:00
|
|
|
normalize_inputs = false,
|
2018-05-03 06:33:17 -07:00
|
|
|
|
2018-05-06 20:57:52 -07:00
|
|
|
ars_lips = false,
|
2018-05-06 20:55:58 -07:00
|
|
|
adamant = false, -- run steps through AMSgrad.
|
2018-05-07 07:22:02 -07:00
|
|
|
adam_b1 = math.pow(10, -1 / 1), -- fewer trials, more momentum!
|
|
|
|
adam_b2 = math.pow(10, -1 / 50),
|
|
|
|
adam_eps = intmap(-1), -- focus on b1 rather than b2.
|
|
|
|
adam_debias = true,
|
2018-05-03 06:33:17 -07:00
|
|
|
|
2018-05-06 20:55:58 -07:00
|
|
|
cap_time = 300,
|
2018-04-02 06:21:55 -07:00
|
|
|
timer_loser = 1/2,
|
2018-05-06 20:55:58 -07:00
|
|
|
decrement_reward = false, -- bad idea, encourages mario to run into goombas.
|
|
|
|
}
|
2018-04-03 09:13:11 -07:00
|
|
|
|
2018-05-06 20:55:58 -07:00
|
|
|
local cfg = {
|
2018-05-07 07:22:02 -07:00
|
|
|
-- best performance so far, albeit a bit slow to train.
|
|
|
|
-- NOTE: uncomment for original values used.
|
2018-05-06 20:55:58 -07:00
|
|
|
log_fn = 'log.csv', -- can be nil to disable logging.
|
|
|
|
params_fn = nil, -- can be nil to generate based on param count.
|
2018-05-07 07:22:02 -07:00
|
|
|
starting_lives = 1,
|
2018-05-06 20:55:58 -07:00
|
|
|
deterministic = true,
|
2018-05-07 07:22:02 -07:00
|
|
|
layernorm = true,
|
|
|
|
epoch_trials = 24, --20,
|
|
|
|
epoch_top_trials = 18, --10,
|
2018-05-06 20:57:52 -07:00
|
|
|
ars_lips = true,
|
2018-05-07 07:22:02 -07:00
|
|
|
deviation = 0.05, --0.1,
|
|
|
|
weight_decay = 0.0002, --0.0004,
|
|
|
|
learning_rate = 2.0, --1.0
|
2018-04-02 06:21:55 -07:00
|
|
|
}
|
|
|
|
|
2018-05-02 04:06:28 -07:00
|
|
|
-- TODO: so, uhh..
|
|
|
|
-- what happens when playback_mode is true but unperturbed_trial is false?
|
|
|
|
|
2018-05-06 20:55:58 -07:00
|
|
|
setmetatable(cfg, {
|
|
|
|
__index = function(t, n)
|
|
|
|
if common_cfg[n] ~= nil then return common_cfg[n] end
|
2018-05-07 00:20:01 -07:00
|
|
|
if n == 'log_fn' then return nil end
|
2018-05-06 20:55:58 -07:00
|
|
|
if n == 'params_fn' then return nil end
|
2018-05-07 00:20:01 -07:00
|
|
|
if n == 'stats_fn' then return nil end
|
2018-05-06 20:55:58 -07:00
|
|
|
error("cannot use undeclared config '" .. tostring(n) .. "'", 2)
|
|
|
|
end
|
|
|
|
})
|
|
|
|
|
2018-04-02 06:21:55 -07:00
|
|
|
cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)
|
|
|
|
|
|
|
|
cfg.eps_start = 1.0 * cfg.frameskip / 64
|
|
|
|
cfg.eps_stop = 0.1 * cfg.eps_start
|
|
|
|
cfg.eps_frames = 1000000
|
|
|
|
cfg.enable_overlay = cfg.playable_mode
|
|
|
|
cfg.enable_network = not cfg.playable_mode
|
|
|
|
|
2018-05-06 20:55:58 -07:00
|
|
|
assert(not cfg.ars_lips or cfg.unperturbed_trial,
|
|
|
|
"cfg.unperturbed_trial must be true to use cfg.ars_lips")
|
|
|
|
|
|
|
|
return cfg
|