From b45343805576000d315e18894a412cf400613f14 Mon Sep 17 00:00:00 2001
From: Connor Olding <cloningdonor@gmail.com>
Date: Mon, 2 Apr 2018 16:29:12 +0200
Subject: [PATCH] add graycode-like distribution option

---
 config.lua     |  7 ++++---
 gameconfig.lua |  3 +++
 main.lua       | 32 +++++++++++++++++++++++++++++---
 nn.lua         |  1 +
 4 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/config.lua b/config.lua
index a619aa8..e8cb96d 100644
--- a/config.lua
+++ b/config.lua
@@ -8,13 +8,14 @@ local cfg = {
     playable_mode = false,
     start_big = false, --true
     starting_lives = 0, --1
-    --
+
     init_zeros = true, -- instead of he_normal noise or whatever.
     frameskip = 4,
     -- true greedy epsilon has both deterministic and det_epsilon set.
     deterministic = true, -- use argmax on outputs instead of random sampling.
     det_epsilon = false, -- take random actions with probability eps.
-    --
+
+    graycode = true,
     epoch_trials = 50,
     epoch_top_trials = 25, -- new with ARS.
     unperturbed_trial = true, -- do a trial without any noise.
@@ -27,7 +28,7 @@ local cfg = {
     --learning_rate = 0.0032 / approx_cossim(66573)
     --learning_rate = 0.0056 / approx_cossim(66573)
     weight_decay = 0.00032, --0.001 --0.0023
-    --
+
     cap_time = 200, --400
     timer_loser = 1/2,
     decrement_reward = false, -- bad idea, encourages mario to kill himself
diff --git a/gameconfig.lua b/gameconfig.lua
index 3ec69ac..34e1b5a 100644
--- a/gameconfig.lua
+++ b/gameconfig.lua
@@ -1,3 +1,6 @@
+-- "gameconfig" is kind of a misnomer, to be honest.
+-- it's more like things the end user shouldn't have to change.
+
 local gcfg = {
     input_size = 60 + 4, -- TODO: let the script figure this out for us.
     tile_count = 17 * 13,
diff --git a/main.lua b/main.lua
index ac09e90..4ab813f 100644
--- a/main.lua
+++ b/main.lua
@@ -61,6 +61,7 @@ local ceil = math.ceil
 local min = math.min
 local max = math.max
 local exp = math.exp
+local pow = math.pow
 local log = math.log
 local sqrt = math.sqrt
 local random = math.random
@@ -83,6 +84,8 @@ local arshift = bit.arshift
 local rol = bit.rol
 local ror = bit.ror
 
+local gui = gui
+
 -- utilities.
 
 local function boolean_xor(a, b)
@@ -423,13 +426,36 @@ local function prepare_epoch()
     base_params = network:collect()
     empty(trial_noise)
     empty(trial_rewards)
-    -- TODO: save memory. generate noise as needed by saving the seed
+
+    -- TODO: (optionally) save memory.
+    --       generate noise as needed by saving the seed
     --       (the os.time() as of here) and calling nn.normal() each trial.
+    -- of course this doubles the amount of time we spend generating noise,
+    -- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`.
+
+    local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392
+    print(("chosen precision: %.2f"):format(precision))
+
     for i = 1, cfg.epoch_trials do
         local noise = nn.zeros(#base_params)
         -- NOTE: change in implementation: deviation is multiplied here
         --       and ONLY here now.
-        for j = 1, #base_params do noise[j] = cfg.deviation * nn.normal() end
+        if cfg.graycode then
+            --local precision = 1 / cfg.deviation
+            --print(cfg.deviation, precision)
+            for j = 1, #base_params do
+                noise[j] = exp(-precision * nn.uniform())
+            end
+            for j = 1, #base_params do
+                noise[j] = nn.uniform() < 0.5 and noise[j] or -noise[j]
+            end
+            -- TODO? wrap/bound domain to [-1,1].
+            --       dunno if this will work with the learning rate stuff.
+        else
+            for j = 1, #base_params do
+                noise[j] = cfg.deviation * nn.normal()
+            end
+        end
         trial_noise[i] = noise
     end
     trial_i = -1
@@ -722,7 +748,7 @@ local function init()
         joypad_mash('start')
         emu.frameadvance()
     end
-    print(emu.framecount())
+    --print(emu.framecount())
 
     local res, err = pcall(network.load, network)
     if res == false then print(err) end
diff --git a/nn.lua b/nn.lua
index 8fcb092..85b9a10 100644
--- a/nn.lua
+++ b/nn.lua
@@ -747,6 +747,7 @@ return {
     indexof = indexof,
     contains = contains,
     prod = prod,
+    uniform = uniform,
     normal = normal,
     zeros = zeros,
     arange = arange,