diff --git a/main.lua b/main.lua index c6aa93e..f6db737 100644 --- a/main.lua +++ b/main.lua @@ -265,7 +265,7 @@ local function normalize_wrt(x, s, out) return out end --- game-agnostic stuff (i.e. the network itself) +-- network parameters. package.loaded['nn'] = nil -- DEBUG local nn = require("nn") @@ -302,9 +302,8 @@ end -- and here we go with the game stuff. ---[[ -https://gist.githubusercontent.com/1wErt3r/4048722/raw/59e88c0028a58c6d7b9156749230ccac647bc7d4/SMBDIS.ASM ---]] +-- disassembly used for reference: +-- https://gist.githubusercontent.com/1wErt3r/4048722/raw/59e88c0028a58c6d7b9156749230ccac647bc7d4/SMBDIS.ASM local rotation_offsets = { -- FIXME: not all of these are pixel-perfect. 0, -40, -- 0 @@ -553,6 +552,8 @@ local function handle_tiles() end end +-- learning and evaluation. + local function prepare_epoch() print('preparing epoch '..tostring(epoch_i)..'.') base_params = network:collect() @@ -708,7 +709,6 @@ local function do_reset() screen_scroll_delta = 0 emu.frameadvance() -- prevents emulator from quirking up. - --print() load_next_trial() reset = false @@ -728,16 +728,13 @@ local function init() if res == false then print(err) end end -init() - -local dummy_softmax_values = {0, 0} - local function doit(dummy) local ingame_paused = get_state() == "paused" -- every few frames mario stands still, forcibly decrease the timer. -- this includes having the game paused. -- TODO: more robust. doesn't detect moonwalking against a wall. + -- well, that shouldn't happen anymore now that i've disabled left+right. local timer = get_timer() if ingame_paused or random() > 1 - timer_loser and R(0x1D) == 0 and R(0x57) == 0 then timer = timer - 1 @@ -764,8 +761,7 @@ local function doit(dummy) empty(tile_input) empty(extra_input) - -- player - -- TODO: check if mario is playable. + -- TODO: check if mario is in a playable state. local x, y = getxy(0, 0x86, 0xCE, 0x6D, 0xB5) local powerup = R(0x754) local status = R(0x756) @@ -800,10 +796,7 @@ local function doit(dummy) if not ingame_paused then reward = reward + reward_delta end - --gui.text(4, 12, ("%02X"):format(#sprite_input), '#FFFFFF', '#0000003F') - --gui.text(4, 22, ("%02X"):format(#tile_input), '#FFFFFF', '#0000003F') --gui.text(72, 12, ("%+4i"):format(reward_delta), '#FFFFFF', '#0000003F') - --gui.text(112, 12, ("%+4i"):format(reward), '#FFFFFF', '#0000003F') gui.text(96, 16, ("%+4i"):format(reward), '#FFFFFF', '#0000003F') if get_state() == 'dead' and state_old ~= 'dead' then @@ -811,6 +804,7 @@ local function doit(dummy) if R(0x75A, 0) == 0 then reset = true end end if get_state() == 'lose' then + -- this shouldn't happen if we catch the deaths as above. print("ran out of lives.") reset = true end @@ -867,6 +861,8 @@ local function doit(dummy) score_old = get_score() end +init() + while true do gui.text(4, 12, get_state(), '#FFFFFF', '#0000003F')