diff --git a/ars.lua b/ars.lua index 52e5bd1..9398afb 100644 --- a/ars.lua +++ b/ars.lua @@ -55,6 +55,7 @@ local function collect_best_indices(scored, top, antithetic) end local function kinda_lipschitz(dir, pos, neg, mid) + --[[ -- based on the local lipschitz constant of a quadratic curve -- drawn through the 3 sampled points: positive, negative, and unperturbed. -- it kinda helps? there's probably a better function to base it around. @@ -64,6 +65,10 @@ local function kinda_lipschitz(dir, pos, neg, mid) local l0 = abs(3 * c1 + c0) local l1 = abs(c1 + 3 * c0) return max(l0, l1) / (2 * dev) + --]] + -- based on a piece-wise linear function of the 3 sampled points. + local _, dev = calc_mean_dev(dir) + return max(abs(pos - mid), abs(neg - mid)) / dev end function Ars:init(dims, popsize, poptop, base_rate, sigma, antithetic, diff --git a/config.lua b/config.lua index 63746f6..ee338f3 100644 --- a/config.lua +++ b/config.lua @@ -34,6 +34,7 @@ local defaults = { -- network evaluation (sampling joypad): frameskip = 4, + prob_frameskip = 0.0, -- true greedy epsilon has both deterministic and det_epsilon set. deterministic = false, -- use argmax on outputs instead of random sampling. det_epsilon = false, -- take random actions with probability eps. diff --git a/main.lua b/main.lua index 5b9078d..61df8eb 100644 --- a/main.lua +++ b/main.lua @@ -24,6 +24,7 @@ local trial_frames = 0 local total_frames = 0 local lagless_count = 0 local decisions_made = 0 +local last_decision_frame = -1 local force_start = false local force_start_old = false @@ -676,6 +677,7 @@ while true do if reset then do_reset() lagless_count = 0 + last_decision_frame = -1 end if not cfg.enable_network then @@ -692,8 +694,11 @@ while true do game.W(0x75A, 1) end - local doot = jp == nil or lagless_count % cfg.frameskip == 0 + local delta = lagless_count - last_decision_frame + local doot = jp == nil or delta >= cfg.frameskip + doot = doot and random() >= cfg.prob_frameskip doit(not doot) + if doot then last_decision_frame = lagless_count end -- jp might still be nil if we're not ingame or we're not playing. if jp ~= nil then joypad.write(1, jp) end diff --git a/presets.lua b/presets.lua index 37d9965..bb87166 100644 --- a/presets.lua +++ b/presets.lua @@ -142,6 +142,68 @@ make_preset{ momentum = 0.5, } +make_preset{ + name = 'ars-vanilla', + parent = 'ars', +} + +make_preset{ + name = 'ars-skip', + parent = 'ars', + + frameskip = 1, + prob_frameskip = 0.25, +} + +make_preset{ + name = 'ars-lips', + parent = 'ars', + + ars_lips = true, + momentum = 0.5, + param_rate = 1.0, +} + +make_preset{ + name = 'ars-big', + parent = 'ars', + + epoch_top_trials = 75, + epoch_trials = 100, + momentum = 0.5, + param_rate = 1.0, + + --graycode = true, +} + +make_preset{ + name = 'ars-huge', + parent = 'big-scroll-hidden', + + deterministic = true, + deviation = 0.01, + epoch_top_trials = 75, + epoch_trials = 100, + es = 'ars', + momentum = 0.5, + param_decay = 0.0138, + param_rate = 0.5, +} + +make_preset{ + name = 'ars-stupid', + parent = 'big-scroll-reduced', + + es = 'ars', + epoch_top_trials = 4, + deterministic = false, + deviation = 0.2, + epoch_trials = 4, + param_rate = 0.1, + param_decay = 0.003, + momentum = 0.99, +} + make_preset{ name = 'play',