diff --git a/main.lua b/main.lua index 3d377cb..1ace9bf 100644 --- a/main.lua +++ b/main.lua @@ -29,40 +29,42 @@ local function globalize(t) for k, v in pairs(t) do rawset(_G, k, v) end end local defer_prints = true local playable_mode = false -local start_big = true -local starting_lives = 1 +local start_big = false --true +local starting_lives = 0 --1 -- -local init_zeros = false -- instead of he_normal noise or whatever. +local init_zeros = true -- instead of he_normal noise or whatever. local frameskip = 4 -- true greedy epsilon has both deterministic and det_epsilon set. -local deterministic = false -- use argmax on outputs instead of random sampling. +local deterministic = true -- use argmax on outputs instead of random sampling. local det_epsilon = false -- take random actions with probability eps. local eps_start = 1.0 * frameskip / 64 local eps_stop = 0.1 * eps_start -local eps_frames = 4000000 +local eps_frames = 1000000 -- -local epoch_trials = 15 --18 -local epoch_top_trials = 10 --6 -- new with ARS. +local epoch_trials = 50 +local epoch_top_trials = 25 -- new with ARS. local unperturbed_trial = true -- do a trial without any noise. local negate_trials = true -- try pairs of normal and negated noise directions. +local time_inputs = true -- binary inputs of global frame count -- ^ note that this now doubles the effective trials. -local deviation = 0.025 --0.03 +local deviation = 0.05 --0.075 --0.1 local function approx_cossim(dim) return math.pow(1.521 * dim - 0.521, -0.5026) end --local learning_rate = 0.01 / approx_cossim(7051) +local learning_rate = 1.0 --local learning_rate = 0.0032 / approx_cossim(66573) -local learning_rate = 0.0056 / approx_cossim(66573) -local weight_decay = 1 - 0.9977 +--local learning_rate = 0.0056 / approx_cossim(66573) +local weight_decay = 0.00032 --0.001 --0.0023 -- local cap_time = 200 --400 -local timer_loser = 0 --1/3 +local timer_loser = 1/2 local decrement_reward = false -- bad idea, encourages mario to kill himself -- local enable_overlay = playable_mode local enable_network = not playable_mode -local input_size = 60 -- TODO: let the script figure this out for us. +local input_size = 60 + 4 -- TODO: let the script figure this out for us. local tile_count = 17 * 13 local ok_routines = { @@ -321,7 +323,7 @@ end -- https://gist.githubusercontent.com/1wErt3r/4048722/raw/59e88c0028a58c6d7b9156749230ccac647bc7d4/SMBDIS.ASM local rotation_offsets = { -- FIXME: not all of these are pixel-perfect. - 0, -40, -- 0 + 0, -40, -- 0x00 6, -38, 15, -37, 22, -32, @@ -329,7 +331,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect. 32, -22, 37, -14, 39, -6, - 40, 0, -- 8 + 40, 0, -- 0x08 38, 7, 37, 15, 33, 23, @@ -337,7 +339,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect. 22, 33, 14, 37, 6, 39, - 0, 41, -- 10 + 0, 41, -- 0x10 -7, 40, -16, 38, -22, 34, @@ -345,7 +347,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect. -34, 23, -38, 16, -40, 8, - -40, -0, -- 18 + -40, -0, -- 0x18 -40, -6, -38, -14, -34, -22, @@ -759,6 +761,22 @@ local function learn_from_epoch() print() end +local function joypad_mash(button) + local jp_mash = { + up = false, + down = false, + left = false, + right = false, + A = false, + B = false, + select = false, + start = false, + } + assert(jp_mash[button] == false, "invalid button: "..tostring(button), 1) + jp_mash[button] = emu.framecount() % 2 == 1 + joypad.write(1, jp_mash) +end + local function do_reset() local state = get_state() -- be a little more descriptive. @@ -853,6 +871,12 @@ local function init() emu.unpause() emu.speedmode("turbo") + while emu.framecount() < 195 do -- FIXME: don't hardcode this. + joypad_mash('start') + emu.frameadvance() + end + print(emu.framecount()) + local res, err = pcall(network.load, network) if res == false then print(err) end end @@ -900,6 +924,12 @@ local function doit(dummy) insert(extra_input, vx) insert(extra_input, vy) + if time_inputs then + for i=2,5 do + insert(extra_input, band(total_frames, lshift(1, i))) + end + end + handle_enemies() handle_fireballs() -- blocks being hit. not interactable; we don't care! @@ -998,18 +1028,7 @@ while true do while bad_states[get_state()] do -- mash the start button until we have control. - local jp_mash = { - up = false, - down = false, - left = false, - right = false, - A = false, - B = false, - select = false, - start = emu.framecount() % 2 == 1, - } - joypad.write(1, jp_mash) - + joypad_mash('start') reset = true advance()