diff --git a/config.lua b/config.lua
index b2aca37..c7ce9b3 100644
--- a/config.lua
+++ b/config.lua
@@ -12,22 +12,22 @@ local cfg = {
     init_zeros = true, -- instead of he_normal noise or whatever.
     frameskip = 4,
     -- true greedy epsilon has both deterministic and det_epsilon set.
-    deterministic = true, -- use argmax on outputs instead of random sampling.
+    deterministic = false, -- use argmax on outputs instead of random sampling.
     det_epsilon = false, -- take random actions with probability eps.
 
     graycode = false,
-    epoch_trials = 50,
-    epoch_top_trials = 25, -- new with ARS.
-    unperturbed_trial = true, -- do a trial without any noise.
+    epoch_trials = 5,
+    epoch_top_trials = 2, -- new with ARS.
+    unperturbed_trial = false, -- do a trial without any noise.
     negate_trials = true, -- try pairs of normal and negated noise directions.
     time_inputs = true, -- binary inputs of global frame count
     -- ^ note that this now doubles the effective trials.
-    deviation = 0.05, --0.075 --0.1
+    deviation = 0.32,
     --learning_rate = 0.01 / approx_cossim(7051)
-    learning_rate = 1.0,
+    learning_rate = 0.32,
     --learning_rate = 0.0032 / approx_cossim(66573)
     --learning_rate = 0.0056 / approx_cossim(66573)
-    weight_decay = 0.00032, --0.001 --0.0023
+    weight_decay = 0.0032,
 
     cap_time = 200, --400
     timer_loser = 1/2,
@@ -36,6 +36,9 @@ local cfg = {
     playback_mode = false,
 }
 
+-- TODO: so, uhh..
+-- what happens when playback_mode is true but unperturbed_trial is false?
+
 cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)
 
 cfg.eps_start = 1.0 * cfg.frameskip / 64
diff --git a/main.lua b/main.lua
index f52b5c7..0c71453 100644
--- a/main.lua
+++ b/main.lua
@@ -436,14 +436,16 @@ local function prepare_epoch()
     -- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`.
 
     local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392
-    print(("chosen precision: %.2f"):format(precision))
+    if cfg.graycode then
+        print(("chosen precision: %.2f"):format(precision))
+    end
 
     for i = 1, cfg.epoch_trials do
         local noise = nn.zeros(#base_params)
         -- NOTE: change in implementation: deviation is multiplied here
         --       and ONLY here now.
-        if i % 2 == 0 then -- FIXME: just messing around.
-        --if cfg.graycode then
+        --if i % 2 == 0 then -- FIXME: just messing around.
+        if cfg.graycode then
             --local precision = 1 / cfg.deviation
             --print(cfg.deviation, precision)
             for j = 1, #base_params do
@@ -598,7 +600,14 @@ local function learn_from_epoch()
         top_rewards[sind + 0] = trial_rewards[sind + 0]
         top_rewards[sind + 1] = trial_rewards[sind + 1]
     end
-    print("top:", top_rewards)
+    --print("top:", top_rewards)
+
+    local delta_rewards = {} -- only used for printing.
+    for i, ind in ipairs(indices) do
+        local sind = (ind - 1) * 2 + 1
+        delta_rewards[i] = abs(top_rewards[sind + 0] - top_rewards[sind + 1])
+    end
+    print("best deltas:", delta_rewards)
 
     local _, reward_dev = calc_mean_dev(top_rewards)
     --print("mean, dev:", _, reward_dev)