49 lines
1.6 KiB
Lua
49 lines
1.6 KiB
Lua
|
local function approx_cossim(dim)
|
||
|
return math.pow(1.521 * dim - 0.521, -0.5026)
|
||
|
end
|
||
|
|
||
|
local cfg = {
|
||
|
defer_prints = true,
|
||
|
|
||
|
playable_mode = false,
|
||
|
start_big = false, --true
|
||
|
starting_lives = 0, --1
|
||
|
--
|
||
|
init_zeros = true, -- instead of he_normal noise or whatever.
|
||
|
frameskip = 4,
|
||
|
-- true greedy epsilon has both deterministic and det_epsilon set.
|
||
|
deterministic = true, -- use argmax on outputs instead of random sampling.
|
||
|
det_epsilon = false, -- take random actions with probability eps.
|
||
|
--
|
||
|
epoch_trials = 50,
|
||
|
epoch_top_trials = 25, -- new with ARS.
|
||
|
unperturbed_trial = true, -- do a trial without any noise.
|
||
|
negate_trials = true, -- try pairs of normal and negated noise directions.
|
||
|
time_inputs = true, -- binary inputs of global frame count
|
||
|
-- ^ note that this now doubles the effective trials.
|
||
|
deviation = 0.05, --0.075 --0.1
|
||
|
--learning_rate = 0.01 / approx_cossim(7051)
|
||
|
learning_rate = 1.0,
|
||
|
--learning_rate = 0.0032 / approx_cossim(66573)
|
||
|
--learning_rate = 0.0056 / approx_cossim(66573)
|
||
|
weight_decay = 0.00032, --0.001 --0.0023
|
||
|
--
|
||
|
cap_time = 200, --400
|
||
|
timer_loser = 1/2,
|
||
|
decrement_reward = false, -- bad idea, encourages mario to kill himself
|
||
|
}
|
||
|
|
||
|
cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)
|
||
|
|
||
|
cfg.eps_start = 1.0 * cfg.frameskip / 64
|
||
|
cfg.eps_stop = 0.1 * cfg.eps_start
|
||
|
cfg.eps_frames = 1000000
|
||
|
cfg.enable_overlay = cfg.playable_mode
|
||
|
cfg.enable_network = not cfg.playable_mode
|
||
|
|
||
|
return setmetatable(cfg, {
|
||
|
__index = function(t, n)
|
||
|
error("cannot use undeclared config '" .. tostring(n) .. "'", 2)
|
||
|
end
|
||
|
})
|