smbot/config.lua

local function approx_cossim(dim)
    return math.pow(1.521 * dim - 0.521, -0.5026)
end

local function intmap(x)
    --  0 -> 1.0
    -- -1 -> 0.316
    -- -2 -> 0.1
    -- -3 -> 0.0316
    -- -4 -> 0.01
    -- etc.
    return math.pow(10, x / 2)
end

local cfg = {
    log_fn = 'log.csv', -- can be nil to disable logging.

    defer_prints = true,

    playable_mode = false,
    start_big = false, --true
    starting_lives = 0, --1

    init_zeros = true, -- instead of he_normal noise or whatever.
    frameskip = 4,
    -- true greedy epsilon has both deterministic and det_epsilon set.
    deterministic = false, -- use argmax on outputs instead of random sampling.
    det_epsilon = false, -- take random actions with probability eps.

    graycode = false,
    epoch_trials = 64 * (7/8),
    epoch_top_trials = 40 * (7/8), -- new with ARS.
    unperturbed_trial = true, -- do a trial without any noise.
    negate_trials = true, -- try pairs of normal and negated noise directions.
    time_inputs = true, -- binary inputs of global frame count
    -- ^ note that this now doubles the effective trials.
    deviation = intmap(-3),
    learning_rate = intmap(-4),
    weight_decay = intmap(-6),

    adamant = true,  -- run steps through AMSgrad.
    adam_b1 = math.pow(10, -1 / 15),
    adam_b2 = math.pow(10, -1 / 100),
    adam_eps = intmap(-8),
    adam_debias = false,

    cap_time = 222, --400
    timer_loser = 1/2,
    decrement_reward = false, -- bad idea, encourages mario to kill himself

    playback_mode = false,
}

-- TODO: so, uhh..
-- what happens when playback_mode is true but unperturbed_trial is false?

cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)

cfg.eps_start = 1.0 * cfg.frameskip / 64
cfg.eps_stop  = 0.1 * cfg.eps_start
cfg.eps_frames = 1000000
cfg.enable_overlay = cfg.playable_mode
cfg.enable_network = not cfg.playable_mode

return setmetatable(cfg, {
    __index = function(t, n)
        error("cannot use undeclared config '" .. tostring(n) .. "'", 2)
    end
})
refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`local function approx_cossim(dim)`
			`return math.pow(1.521 * dim - 0.521, -0.5026)`
			`end`

add AMSgrad optimizer and logging 2018-05-03 06:33:17 -07:00			`local function intmap(x)`
			`-- 0 -> 1.0`
			`-- -1 -> 0.316`
			`-- -2 -> 0.1`
			`-- -3 -> 0.0316`
			`-- -4 -> 0.01`
			`-- etc.`
			`return math.pow(10, x / 2)`
			`end`

refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`local cfg = {`
add AMSgrad optimizer and logging 2018-05-03 06:33:17 -07:00			`log_fn = 'log.csv', -- can be nil to disable logging.`

refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`defer_prints = true,`

			`playable_mode = false,`
			`start_big = false, --true`
			`starting_lives = 0, --1`
add graycode-like distribution option 2018-04-02 07:29:12 -07:00
refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`init_zeros = true, -- instead of he_normal noise or whatever.`
			`frameskip = 4,`
			`-- true greedy epsilon has both deterministic and det_epsilon set.`
tweaks 2018-05-02 04:06:28 -07:00			`deterministic = false, -- use argmax on outputs instead of random sampling.`
refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`det_epsilon = false, -- take random actions with probability eps.`
add graycode-like distribution option 2018-04-02 07:29:12 -07:00
add playback_mode 2018-04-03 09:13:11 -07:00			`graycode = false,`
add AMSgrad optimizer and logging 2018-05-03 06:33:17 -07:00			`epoch_trials = 64 * (7/8),`
			`epoch_top_trials = 40 * (7/8), -- new with ARS.`
			`unperturbed_trial = true, -- do a trial without any noise.`
refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`negate_trials = true, -- try pairs of normal and negated noise directions.`
			`time_inputs = true, -- binary inputs of global frame count`
			`-- ^ note that this now doubles the effective trials.`
add AMSgrad optimizer and logging 2018-05-03 06:33:17 -07:00			`deviation = intmap(-3),`
			`learning_rate = intmap(-4),`
			`weight_decay = intmap(-6),`

			`adamant = true, -- run steps through AMSgrad.`
			`adam_b1 = math.pow(10, -1 / 15),`
			`adam_b2 = math.pow(10, -1 / 100),`
			`adam_eps = intmap(-8),`
			`adam_debias = false,`

			`cap_time = 222, --400`
refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`timer_loser = 1/2,`
			`decrement_reward = false, -- bad idea, encourages mario to kill himself`
add playback_mode 2018-04-03 09:13:11 -07:00
			`playback_mode = false,`
refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`}`

tweaks 2018-05-02 04:06:28 -07:00			`-- TODO: so, uhh..`
			`-- what happens when playback_mode is true but unperturbed_trial is false?`

refactor config vars to their own files 2018-04-02 06:21:55 -07:00			`cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)`

			`cfg.eps_start = 1.0 * cfg.frameskip / 64`
			`cfg.eps_stop = 0.1 * cfg.eps_start`
			`cfg.eps_frames = 1000000`
			`cfg.enable_overlay = cfg.playable_mode`
			`cfg.enable_network = not cfg.playable_mode`

			`return setmetatable(cfg, {`
			`__index = function(t, n)`
			`error("cannot use undeclared config '" .. tostring(n) .. "'", 2)`
			`end`
			`})`