reduce time waiting at world screen, tweak config

This commit is contained in:
Connor Olding 2018-04-02 14:54:53 +02:00
parent 5636c7b2ed
commit 66bf689e04

View file

@ -29,40 +29,42 @@ local function globalize(t) for k, v in pairs(t) do rawset(_G, k, v) end end
local defer_prints = true local defer_prints = true
local playable_mode = false local playable_mode = false
local start_big = true local start_big = false --true
local starting_lives = 1 local starting_lives = 0 --1
-- --
local init_zeros = false -- instead of he_normal noise or whatever. local init_zeros = true -- instead of he_normal noise or whatever.
local frameskip = 4 local frameskip = 4
-- true greedy epsilon has both deterministic and det_epsilon set. -- true greedy epsilon has both deterministic and det_epsilon set.
local deterministic = false -- use argmax on outputs instead of random sampling. local deterministic = true -- use argmax on outputs instead of random sampling.
local det_epsilon = false -- take random actions with probability eps. local det_epsilon = false -- take random actions with probability eps.
local eps_start = 1.0 * frameskip / 64 local eps_start = 1.0 * frameskip / 64
local eps_stop = 0.1 * eps_start local eps_stop = 0.1 * eps_start
local eps_frames = 4000000 local eps_frames = 1000000
-- --
local epoch_trials = 15 --18 local epoch_trials = 50
local epoch_top_trials = 10 --6 -- new with ARS. local epoch_top_trials = 25 -- new with ARS.
local unperturbed_trial = true -- do a trial without any noise. local unperturbed_trial = true -- do a trial without any noise.
local negate_trials = true -- try pairs of normal and negated noise directions. local negate_trials = true -- try pairs of normal and negated noise directions.
local time_inputs = true -- binary inputs of global frame count
-- ^ note that this now doubles the effective trials. -- ^ note that this now doubles the effective trials.
local deviation = 0.025 --0.03 local deviation = 0.05 --0.075 --0.1
local function approx_cossim(dim) local function approx_cossim(dim)
return math.pow(1.521 * dim - 0.521, -0.5026) return math.pow(1.521 * dim - 0.521, -0.5026)
end end
--local learning_rate = 0.01 / approx_cossim(7051) --local learning_rate = 0.01 / approx_cossim(7051)
local learning_rate = 1.0
--local learning_rate = 0.0032 / approx_cossim(66573) --local learning_rate = 0.0032 / approx_cossim(66573)
local learning_rate = 0.0056 / approx_cossim(66573) --local learning_rate = 0.0056 / approx_cossim(66573)
local weight_decay = 1 - 0.9977 local weight_decay = 0.00032 --0.001 --0.0023
-- --
local cap_time = 200 --400 local cap_time = 200 --400
local timer_loser = 0 --1/3 local timer_loser = 1/2
local decrement_reward = false -- bad idea, encourages mario to kill himself local decrement_reward = false -- bad idea, encourages mario to kill himself
-- --
local enable_overlay = playable_mode local enable_overlay = playable_mode
local enable_network = not playable_mode local enable_network = not playable_mode
local input_size = 60 -- TODO: let the script figure this out for us. local input_size = 60 + 4 -- TODO: let the script figure this out for us.
local tile_count = 17 * 13 local tile_count = 17 * 13
local ok_routines = { local ok_routines = {
@ -321,7 +323,7 @@ end
-- https://gist.githubusercontent.com/1wErt3r/4048722/raw/59e88c0028a58c6d7b9156749230ccac647bc7d4/SMBDIS.ASM -- https://gist.githubusercontent.com/1wErt3r/4048722/raw/59e88c0028a58c6d7b9156749230ccac647bc7d4/SMBDIS.ASM
local rotation_offsets = { -- FIXME: not all of these are pixel-perfect. local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
0, -40, -- 0 0, -40, -- 0x00
6, -38, 6, -38,
15, -37, 15, -37,
22, -32, 22, -32,
@ -329,7 +331,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
32, -22, 32, -22,
37, -14, 37, -14,
39, -6, 39, -6,
40, 0, -- 8 40, 0, -- 0x08
38, 7, 38, 7,
37, 15, 37, 15,
33, 23, 33, 23,
@ -337,7 +339,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
22, 33, 22, 33,
14, 37, 14, 37,
6, 39, 6, 39,
0, 41, -- 10 0, 41, -- 0x10
-7, 40, -7, 40,
-16, 38, -16, 38,
-22, 34, -22, 34,
@ -345,7 +347,7 @@ local rotation_offsets = { -- FIXME: not all of these are pixel-perfect.
-34, 23, -34, 23,
-38, 16, -38, 16,
-40, 8, -40, 8,
-40, -0, -- 18 -40, -0, -- 0x18
-40, -6, -40, -6,
-38, -14, -38, -14,
-34, -22, -34, -22,
@ -759,6 +761,22 @@ local function learn_from_epoch()
print() print()
end end
local function joypad_mash(button)
local jp_mash = {
up = false,
down = false,
left = false,
right = false,
A = false,
B = false,
select = false,
start = false,
}
assert(jp_mash[button] == false, "invalid button: "..tostring(button), 1)
jp_mash[button] = emu.framecount() % 2 == 1
joypad.write(1, jp_mash)
end
local function do_reset() local function do_reset()
local state = get_state() local state = get_state()
-- be a little more descriptive. -- be a little more descriptive.
@ -853,6 +871,12 @@ local function init()
emu.unpause() emu.unpause()
emu.speedmode("turbo") emu.speedmode("turbo")
while emu.framecount() < 195 do -- FIXME: don't hardcode this.
joypad_mash('start')
emu.frameadvance()
end
print(emu.framecount())
local res, err = pcall(network.load, network) local res, err = pcall(network.load, network)
if res == false then print(err) end if res == false then print(err) end
end end
@ -900,6 +924,12 @@ local function doit(dummy)
insert(extra_input, vx) insert(extra_input, vx)
insert(extra_input, vy) insert(extra_input, vy)
if time_inputs then
for i=2,5 do
insert(extra_input, band(total_frames, lshift(1, i)))
end
end
handle_enemies() handle_enemies()
handle_fireballs() handle_fireballs()
-- blocks being hit. not interactable; we don't care! -- blocks being hit. not interactable; we don't care!
@ -998,18 +1028,7 @@ while true do
while bad_states[get_state()] do while bad_states[get_state()] do
-- mash the start button until we have control. -- mash the start button until we have control.
local jp_mash = { joypad_mash('start')
up = false,
down = false,
left = false,
right = false,
A = false,
B = false,
select = false,
start = emu.framecount() % 2 == 1,
}
joypad.write(1, jp_mash)
reset = true reset = true
advance() advance()