add graycode-like distribution option
This commit is contained in:
parent
6a01f609a9
commit
b453438055
4 changed files with 37 additions and 6 deletions
|
@ -8,13 +8,14 @@ local cfg = {
|
||||||
playable_mode = false,
|
playable_mode = false,
|
||||||
start_big = false, --true
|
start_big = false, --true
|
||||||
starting_lives = 0, --1
|
starting_lives = 0, --1
|
||||||
--
|
|
||||||
init_zeros = true, -- instead of he_normal noise or whatever.
|
init_zeros = true, -- instead of he_normal noise or whatever.
|
||||||
frameskip = 4,
|
frameskip = 4,
|
||||||
-- true greedy epsilon has both deterministic and det_epsilon set.
|
-- true greedy epsilon has both deterministic and det_epsilon set.
|
||||||
deterministic = true, -- use argmax on outputs instead of random sampling.
|
deterministic = true, -- use argmax on outputs instead of random sampling.
|
||||||
det_epsilon = false, -- take random actions with probability eps.
|
det_epsilon = false, -- take random actions with probability eps.
|
||||||
--
|
|
||||||
|
graycode = true,
|
||||||
epoch_trials = 50,
|
epoch_trials = 50,
|
||||||
epoch_top_trials = 25, -- new with ARS.
|
epoch_top_trials = 25, -- new with ARS.
|
||||||
unperturbed_trial = true, -- do a trial without any noise.
|
unperturbed_trial = true, -- do a trial without any noise.
|
||||||
|
@ -27,7 +28,7 @@ local cfg = {
|
||||||
--learning_rate = 0.0032 / approx_cossim(66573)
|
--learning_rate = 0.0032 / approx_cossim(66573)
|
||||||
--learning_rate = 0.0056 / approx_cossim(66573)
|
--learning_rate = 0.0056 / approx_cossim(66573)
|
||||||
weight_decay = 0.00032, --0.001 --0.0023
|
weight_decay = 0.00032, --0.001 --0.0023
|
||||||
--
|
|
||||||
cap_time = 200, --400
|
cap_time = 200, --400
|
||||||
timer_loser = 1/2,
|
timer_loser = 1/2,
|
||||||
decrement_reward = false, -- bad idea, encourages mario to kill himself
|
decrement_reward = false, -- bad idea, encourages mario to kill himself
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
-- "gameconfig" is kind of a misnomer, to be honest.
|
||||||
|
-- it's more like things the end user shouldn't have to change.
|
||||||
|
|
||||||
local gcfg = {
|
local gcfg = {
|
||||||
input_size = 60 + 4, -- TODO: let the script figure this out for us.
|
input_size = 60 + 4, -- TODO: let the script figure this out for us.
|
||||||
tile_count = 17 * 13,
|
tile_count = 17 * 13,
|
||||||
|
|
32
main.lua
32
main.lua
|
@ -61,6 +61,7 @@ local ceil = math.ceil
|
||||||
local min = math.min
|
local min = math.min
|
||||||
local max = math.max
|
local max = math.max
|
||||||
local exp = math.exp
|
local exp = math.exp
|
||||||
|
local pow = math.pow
|
||||||
local log = math.log
|
local log = math.log
|
||||||
local sqrt = math.sqrt
|
local sqrt = math.sqrt
|
||||||
local random = math.random
|
local random = math.random
|
||||||
|
@ -83,6 +84,8 @@ local arshift = bit.arshift
|
||||||
local rol = bit.rol
|
local rol = bit.rol
|
||||||
local ror = bit.ror
|
local ror = bit.ror
|
||||||
|
|
||||||
|
local gui = gui
|
||||||
|
|
||||||
-- utilities.
|
-- utilities.
|
||||||
|
|
||||||
local function boolean_xor(a, b)
|
local function boolean_xor(a, b)
|
||||||
|
@ -423,13 +426,36 @@ local function prepare_epoch()
|
||||||
base_params = network:collect()
|
base_params = network:collect()
|
||||||
empty(trial_noise)
|
empty(trial_noise)
|
||||||
empty(trial_rewards)
|
empty(trial_rewards)
|
||||||
-- TODO: save memory. generate noise as needed by saving the seed
|
|
||||||
|
-- TODO: (optionally) save memory.
|
||||||
|
-- generate noise as needed by saving the seed
|
||||||
-- (the os.time() as of here) and calling nn.normal() each trial.
|
-- (the os.time() as of here) and calling nn.normal() each trial.
|
||||||
|
-- of course this doubles the amount of time we spend generating noise,
|
||||||
|
-- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`.
|
||||||
|
|
||||||
|
local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392
|
||||||
|
print(("chosen precision: %.2f"):format(precision))
|
||||||
|
|
||||||
for i = 1, cfg.epoch_trials do
|
for i = 1, cfg.epoch_trials do
|
||||||
local noise = nn.zeros(#base_params)
|
local noise = nn.zeros(#base_params)
|
||||||
-- NOTE: change in implementation: deviation is multiplied here
|
-- NOTE: change in implementation: deviation is multiplied here
|
||||||
-- and ONLY here now.
|
-- and ONLY here now.
|
||||||
for j = 1, #base_params do noise[j] = cfg.deviation * nn.normal() end
|
if cfg.graycode then
|
||||||
|
--local precision = 1 / cfg.deviation
|
||||||
|
--print(cfg.deviation, precision)
|
||||||
|
for j = 1, #base_params do
|
||||||
|
noise[j] = exp(-precision * nn.uniform())
|
||||||
|
end
|
||||||
|
for j = 1, #base_params do
|
||||||
|
noise[j] = nn.uniform() < 0.5 and noise[j] or -noise[j]
|
||||||
|
end
|
||||||
|
-- TODO? wrap/bound domain to [-1,1].
|
||||||
|
-- dunno if this will work with the learning rate stuff.
|
||||||
|
else
|
||||||
|
for j = 1, #base_params do
|
||||||
|
noise[j] = cfg.deviation * nn.normal()
|
||||||
|
end
|
||||||
|
end
|
||||||
trial_noise[i] = noise
|
trial_noise[i] = noise
|
||||||
end
|
end
|
||||||
trial_i = -1
|
trial_i = -1
|
||||||
|
@ -722,7 +748,7 @@ local function init()
|
||||||
joypad_mash('start')
|
joypad_mash('start')
|
||||||
emu.frameadvance()
|
emu.frameadvance()
|
||||||
end
|
end
|
||||||
print(emu.framecount())
|
--print(emu.framecount())
|
||||||
|
|
||||||
local res, err = pcall(network.load, network)
|
local res, err = pcall(network.load, network)
|
||||||
if res == false then print(err) end
|
if res == false then print(err) end
|
||||||
|
|
1
nn.lua
1
nn.lua
|
@ -747,6 +747,7 @@ return {
|
||||||
indexof = indexof,
|
indexof = indexof,
|
||||||
contains = contains,
|
contains = contains,
|
||||||
prod = prod,
|
prod = prod,
|
||||||
|
uniform = uniform,
|
||||||
normal = normal,
|
normal = normal,
|
||||||
zeros = zeros,
|
zeros = zeros,
|
||||||
arange = arange,
|
arange = arange,
|
||||||
|
|
Loading…
Reference in a new issue