add graycode-like distribution option

2018-04-02 16:29:12 +02:00 · 2018-04-02 16:29:12 +02:00 · b453438055
commit b453438055
parent 6a01f609a9
4 changed files with 37 additions and 6 deletions
--- a/config.lua
+++ b/config.lua
@ -8,13 +8,14 @@ local cfg = {
    playable_mode = false,
    start_big = false, --true
    starting_lives = 0, --1
-    --
+
    init_zeros = true, -- instead of he_normal noise or whatever.
    frameskip = 4,
    -- true greedy epsilon has both deterministic and det_epsilon set.
    deterministic = true, -- use argmax on outputs instead of random sampling.
    det_epsilon = false, -- take random actions with probability eps.
-    --
+
    graycode = true,
    epoch_trials = 50,
    epoch_top_trials = 25, -- new with ARS.
    unperturbed_trial = true, -- do a trial without any noise.
@ -27,7 +28,7 @@ local cfg = {
    --learning_rate = 0.0032 / approx_cossim(66573)
    --learning_rate = 0.0056 / approx_cossim(66573)
    weight_decay = 0.00032, --0.001 --0.0023
-    --
+
    cap_time = 200, --400
    timer_loser = 1/2,
    decrement_reward = false, -- bad idea, encourages mario to kill himself
--- a/gameconfig.lua
+++ b/gameconfig.lua
@ -1,3 +1,6 @@
 -- "gameconfig" is kind of a misnomer, to be honest.
 -- it's more like things the end user shouldn't have to change.
 local gcfg = {
    input_size = 60 + 4, -- TODO: let the script figure this out for us.
    tile_count = 17 * 13,
--- a/main.lua
+++ b/main.lua
@ -61,6 +61,7 @@ local ceil = math.ceil
 local min = math.min
 local max = math.max
 local exp = math.exp
 local pow = math.pow
 local log = math.log
 local sqrt = math.sqrt
 local random = math.random
@ -83,6 +84,8 @@ local arshift = bit.arshift
 local rol = bit.rol
 local ror = bit.ror
 local gui = gui
 -- utilities.
 local function boolean_xor(a, b)
@ -423,13 +426,36 @@ local function prepare_epoch()
    base_params = network:collect()
    empty(trial_noise)
    empty(trial_rewards)
-    -- TODO: save memory. generate noise as needed by saving the seed
+
    -- TODO: (optionally) save memory.
    --       generate noise as needed by saving the seed
    --       (the os.time() as of here) and calling nn.normal() each trial.
    -- of course this doubles the amount of time we spend generating noise,
    -- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`.
    local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392
    print(("chosen precision: %.2f"):format(precision))
    for i = 1, cfg.epoch_trials do
        local noise = nn.zeros(#base_params)
        -- NOTE: change in implementation: deviation is multiplied here
        --       and ONLY here now.
-        for j = 1, #base_params do noise[j] = cfg.deviation * nn.normal() end
+        if cfg.graycode then
            --local precision = 1 / cfg.deviation
            --print(cfg.deviation, precision)
            for j = 1, #base_params do
                noise[j] = exp(-precision * nn.uniform())
            end
            for j = 1, #base_params do
                noise[j] = nn.uniform() < 0.5 and noise[j] or -noise[j]
            end
            -- TODO? wrap/bound domain to [-1,1].
            --       dunno if this will work with the learning rate stuff.
        else
            for j = 1, #base_params do
                noise[j] = cfg.deviation * nn.normal()
            end
        end
        trial_noise[i] = noise
    end
    trial_i = -1
@ -722,7 +748,7 @@ local function init()
        joypad_mash('start')
        emu.frameadvance()
    end
-    print(emu.framecount())
+    --print(emu.framecount())
    local res, err = pcall(network.load, network)
    if res == false then print(err) end
--- a/nn.lua
+++ b/nn.lua
@ -747,6 +747,7 @@ return {
    indexof = indexof,
    contains = contains,
    prod = prod,
    uniform = uniform,
    normal = normal,
    zeros = zeros,
    arange = arange,