reduce time waiting at world screen, tweak config

2018-04-02 14:54:53 +02:00 · 2018-04-02 14:54:53 +02:00 · 66bf689e04
commit 66bf689e04
parent 5636c7b2ed
1 changed files with 47 additions and 28 deletions
--- a/main.lua
+++ b/main.lua
@ -29,40 +29,42 @@ local function globalize(t) for k, v in pairs(t) do rawset(_G, k, v) end end
 local defer_prints = true
 local playable_mode = false
-local start_big = true
+local start_big = false --true
-local starting_lives = 1
+local starting_lives = 0 --1
 --
-local init_zeros = false -- instead of he_normal noise or whatever.
+local init_zeros = true -- instead of he_normal noise or whatever.
 local frameskip = 4
 -- true greedy epsilon has both deterministic and det_epsilon set.
-local deterministic = false -- use argmax on outputs instead of random sampling.
+local deterministic = true -- use argmax on outputs instead of random sampling.
 local det_epsilon = false -- take random actions with probability eps.
 local eps_start = 1.0 * frameskip / 64
 local eps_stop  = 0.1 * eps_start
-local eps_frames = 4000000
+local eps_frames = 1000000
 --
-local epoch_trials = 15 --18
+local epoch_trials = 50
-local epoch_top_trials = 10 --6 -- new with ARS.
+local epoch_top_trials = 25 -- new with ARS.
 local unperturbed_trial = true -- do a trial without any noise.
 local negate_trials = true -- try pairs of normal and negated noise directions.
 local time_inputs = true -- binary inputs of global frame count
 -- ^ note that this now doubles the effective trials.
-local deviation = 0.025 --0.03
+local deviation = 0.05 --0.075 --0.1
 local function approx_cossim(dim)
    return math.pow(1.521 * dim - 0.521, -0.5026)
 end
 --local learning_rate = 0.01 / approx_cossim(7051)
 local learning_rate = 1.0
 --local learning_rate = 0.0032 / approx_cossim(66573)
-local learning_rate = 0.0056 / approx_cossim(66573)
+--local learning_rate = 0.0056 / approx_cossim(66573)
-local weight_decay = 1 - 0.9977
+local weight_decay = 0.00032 --0.001 --0.0023
 --
 local cap_time = 200 --400
-local timer_loser = 0 --1/3
+local timer_loser = 1/2
 local decrement_reward = false -- bad idea, encourages mario to kill himself
 --
 local enable_overlay = playable_mode
 local enable_network = not playable_mode
-local input_size = 60 -- TODO: let the script figure this out for us.
+local input_size = 60 + 4 -- TODO: let the script figure this out for us.
 local tile_count = 17 * 13
 local ok_routines = {
@ -321,7 +323,7 @@ end
 -- https://gist.githubusercontent.com/1wErt3r/4048722/raw/59e88c0028a58c6d7b9156749230ccac647bc7d4/SMBDIS.ASM
 local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
-    0, -40, -- 0
+    0, -40, -- 0x00
    6, -38,
    15, -37,
    22, -32,
@ -329,7 +331,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
    32, -22,
    37, -14,
    39, -6,
-    40, 0, -- 8
+    40, 0, -- 0x08
    38, 7,
    37, 15,
    33, 23,
@ -337,7 +339,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
    22, 33,
    14, 37,
    6, 39,
-    0, 41, -- 10
+    0, 41, -- 0x10
    -7, 40,
    -16, 38,
    -22, 34,
@ -345,7 +347,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
    -34, 23,
    -38, 16,
    -40, 8,
-    -40, -0, -- 18
+    -40, -0, -- 0x18
    -40, -6,
    -38, -14,
    -34, -22,
@ -759,6 +761,22 @@ local function learn_from_epoch()
    print()
 end
 local function joypad_mash(button)
    local jp_mash = {
        up = false,
        down = false,
        left = false,
        right = false,
        A = false,
        B = false,
        select = false,
        start = false,
    }
    assert(jp_mash[button] == false, "invalid button: "..tostring(button), 1)
    jp_mash[button] = emu.framecount() % 2 == 1
    joypad.write(1, jp_mash)
 end
 local function do_reset()
    local state = get_state()
    -- be a little more descriptive.
@ -853,6 +871,12 @@ local function init()
    emu.unpause()
    emu.speedmode("turbo")
    while emu.framecount() < 195 do  -- FIXME: don't hardcode this.
        joypad_mash('start')
        emu.frameadvance()
    end
    print(emu.framecount())
    local res, err = pcall(network.load, network)
    if res == false then print(err) end
 end
@ -900,6 +924,12 @@ local function doit(dummy)
    insert(extra_input, vx)
    insert(extra_input, vy)
    if time_inputs then
        for i=2,5 do
            insert(extra_input, band(total_frames, lshift(1, i)))
        end
    end
    handle_enemies()
    handle_fireballs()
    -- blocks being hit. not interactable; we don't care!
@ -998,18 +1028,7 @@ while true do
    while bad_states[get_state()] do
        -- mash the start button until we have control.
-        local jp_mash = {
+        joypad_mash('start')
            up = false,
            down = false,
            left = false,
            right = false,
            A = false,
            B = false,
            select = false,
            start = emu.framecount() % 2 == 1,
        }
        joypad.write(1, jp_mash)
        reset = true
        advance()