add xNES preset, add options, allow preset specified by argument

2018-06-13 03:00:42 +02:00 · 2018-06-13 03:00:42 +02:00 · 7800510d1f
commit 7800510d1f
parent 403127bd66
2 changed files with 51 additions and 10 deletions
--- a/config.lua
+++ b/config.lua
@ -1,4 +1,4 @@
-local preset = 'snes'
+preset = rawget(_G, 'preset') or 'ars'

 local common_cfg = {
    -- read-only modes:
@ -26,6 +26,8 @@ local common_cfg = {

    -- network layers:
    layernorm = false,
+    reduce_tiles = false,
+    bias_out = true,

    -- network evaluation (sampling joypad):
    frameskip = 4,
@ -39,6 +41,7 @@ local common_cfg = {
    epoch_top_trials = 9999, -- for ARS.

    -- sampling:
+    deviation = 1.0,
    unperturbed_trial = true, -- perform an extra trial without any noise.
    graycode = false, -- for ARS.
    negate_trials = true, -- try pairs of normal and negated noise directions.
@ -47,7 +50,7 @@ local common_cfg = {

    -- epoch-related rates:
    learning_rate = 1.0,
-    mean_adapt = 1.0, -- for xNES.
+    mean_adapt = 1.0, -- for SNES, xNES.
    weight_decay = 0.0,
    sigma_decay = 0.0, -- for SNES.
 }
@ -68,15 +71,49 @@ if preset == 'snes' then
        epoch_trials = 100,
        negate_trials = false,

+        deviation = 1.0,
        min_refresh = 0.2,

        learning_rate = 0.1, -- TODO: rename to learn_primary or something.
        mean_adapt = 0.5, -- TODO: rename to learn_secondary or something.
-        deviation = 1.0,
-        weight_decay = 0.01, -- note: multiplied by its std, and mean_adapt.
+        weight_decay = 0.02, -- note: multiplied by its std, and mean_adapt.
        sigma_decay = 0.01, -- note: multiplied by learning_rate.
    }

+elseif preset == 'xnes' then
+
+    cfg = {
+        es = 'xnes',
+
+        log_fn = 'logs-xnes.csv',
+        params_fn = 'params-xnes.txt',
+
+        start_big = true,
+        min_time = 300,
+        timer_loser = 1.0,
+
+        decrement_reward = false,
+        score_multiplier = 0,
+
+        init_zeros = true,
+
+        reduce_tiles = true,
+        bias_out = false,
+
+        deterministic = false,
+
+        deviation = 0.5,
+        negate_trials = false,
+        min_refresh = 0.1,
+
+        epoch_trials = 50,
+
+        learning_rate = 0.01,
+        mean_adapt = 1.0,
+        weight_decay = 0.0,
+        sigma_decay = 0.0,
+    }
+
 elseif preset == 'ars' then

    cfg = {
@ -90,10 +127,11 @@ elseif preset == 'ars' then
        min_time = 300,
        timer_loser = 1.0,

+        deviation = 0.1,
+
        epoch_trials = 25,

        learning_rate = 1.0,
-        deviation = 0.1,
        weight_decay = 0.0025,
    }

--- a/main.lua
+++ b/main.lua
@ -2,6 +2,7 @@ local globalize = require("strict")

 -- configuration.

+globalize{preset = arg}
 local cfg = require("config")
 local gcfg = require("gameconfig")

@ -151,10 +152,12 @@ local function make_network(input_size)
    nn_ty = nn_tx:feed(nn.Embed(#game.valid_tiles, 2))

    nn_tz = nn_ty
-    --nn_tz = nn_tz:feed(nn.Reshape{13, 17 * 2})
-    --nn_tz = nn_tz:feed(nn.DenseBroadcast(5))
-    --nn_tz = nn_tz:feed(nn.Relu())
+    if cfg.reduce_tiles then
+        nn_tz = nn_tz:feed(nn.Reshape{13, 17 * 2})
+        nn_tz = nn_tz:feed(nn.DenseBroadcast(5))
+        nn_tz = nn_tz:feed(nn.Relu())
        -- note: due to a quirk in Merge, we don't need to flatten nn_tz.
+    end

    nn_y = nn.Merge()
    nn_x:feed(nn_y)
@ -171,7 +174,7 @@ local function make_network(input_size)
    --]]

    nn_z = nn_y
-    nn_z = nn_z:feed(nn.Dense(#gcfg.jp_lut), true)
+    nn_z = nn_z:feed(nn.Dense(#gcfg.jp_lut), true, cfg.bias_out)
    nn_z = nn_z:feed(nn.Softmax())
    return nn.Model({nn_x, nn_tx}, {nn_z})
 end