smbot/config.lua

local function approx_cossim(dim)
    return math.pow(1.521 * dim - 0.521, -0.5026)
end

local function intmap(x)
    --  0 -> 1.0
    -- -1 -> 0.316
    -- -2 -> 0.1
    -- -3 -> 0.0316
    -- -4 -> 0.01
    -- etc.
    return math.pow(10, x / 2)
end

local common_cfg = {
    playable_mode = false,
    playback_mode = false,
    start_big = false,
    starting_lives = 0,

    frameskip = 4,
    -- true greedy epsilon has both deterministic and det_epsilon set.
    deterministic = false, -- use argmax on outputs instead of random sampling.
    det_epsilon = false, -- take random actions with probability eps.
    layernorm = false,

    init_zeros = true, -- instead of he_normal noise or whatever.
    graycode = false,
    unperturbed_trial = true, -- do a trial without any noise.
    negate_trials = true, -- try pairs of normal and negated noise directions.
    -- AKA antithetic sampling. note that this doubles the number of trials.
    time_inputs = true, -- binary inputs of global frame count
    normalize_inputs = false,

    learning_rate = 1.0,
    mean_adapt = 1.0, -- for xNES

    es = 'ars',
    ars_lips = false,
    adamant = false, -- run steps through AMSgrad.
    adam_b1 = math.pow(10, -1 / 1), -- fewer trials, more momentum!
    adam_b2 = math.pow(10, -1 / 50),
    adam_eps = intmap(-1), -- focus on b1 rather than b2.
    adam_debias = true,

    cap_time = 300,
    timer_loser = 1/2,
    decrement_reward = false, -- bad idea, encourages mario to run into goombas.
    score_multiplier = 1, -- how much the ingame score influences our rewards.

    starting_world = 1, -- set to 0 for random!
    starting_level = 1, -- set to 0 for random!
}

local cfg = {
    log_fn = 'logs-xnes.csv',
    params_fn = 'params-xnes.txt',

    decrement_reward = true,
    score_multiplier = 5,

    starting_world = 1,
    starting_level = 1,
    starting_lives = 1,
    cap_time = 300,

    deterministic = true,

    epoch_trials = 50,
    epoch_top_trials = 9999,
    negate_trials = false,

    es = 'xnes',
    learning_rate = 0.14,
    deviation = 1.0,
    weight_decay = 0.0,
}

-- TODO: so, uhh..
-- what happens when playback_mode is true but unperturbed_trial is false?

setmetatable(cfg, {
    __index = function(t, n)
        if common_cfg[n] ~= nil then return common_cfg[n] end
        if n == 'log_fn' then return nil end
        if n == 'params_fn' then return nil end
        if n == 'stats_fn' then return nil end
        error("cannot use undeclared config '" .. tostring(n) .. "'", 2)
    end
})

cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)

cfg.eps_start = 1.0 * cfg.frameskip / 64
cfg.eps_stop  = 0.1 * cfg.eps_start
cfg.eps_frames = 1000000
cfg.enable_overlay = cfg.playable_mode
cfg.enable_network = not cfg.playable_mode

assert(not cfg.ars_lips or cfg.unperturbed_trial,
       "cfg.unperturbed_trial must be true to use cfg.ars_lips")
assert(not cfg.ars_lips or cfg.negate_trials,
       "cfg.negate_trials must be true to use cfg.ars_lips")

assert(not cfg.adamant,
       "cfg.adamant not yet re-implemented")

return cfg