tweaks
This commit is contained in:
parent
2bdd67b721
commit
7831f534c9
2 changed files with 23 additions and 11 deletions
17
config.lua
17
config.lua
|
@ -12,22 +12,22 @@ local cfg = {
|
||||||
init_zeros = true, -- instead of he_normal noise or whatever.
|
init_zeros = true, -- instead of he_normal noise or whatever.
|
||||||
frameskip = 4,
|
frameskip = 4,
|
||||||
-- true greedy epsilon has both deterministic and det_epsilon set.
|
-- true greedy epsilon has both deterministic and det_epsilon set.
|
||||||
deterministic = true, -- use argmax on outputs instead of random sampling.
|
deterministic = false, -- use argmax on outputs instead of random sampling.
|
||||||
det_epsilon = false, -- take random actions with probability eps.
|
det_epsilon = false, -- take random actions with probability eps.
|
||||||
|
|
||||||
graycode = false,
|
graycode = false,
|
||||||
epoch_trials = 50,
|
epoch_trials = 5,
|
||||||
epoch_top_trials = 25, -- new with ARS.
|
epoch_top_trials = 2, -- new with ARS.
|
||||||
unperturbed_trial = true, -- do a trial without any noise.
|
unperturbed_trial = false, -- do a trial without any noise.
|
||||||
negate_trials = true, -- try pairs of normal and negated noise directions.
|
negate_trials = true, -- try pairs of normal and negated noise directions.
|
||||||
time_inputs = true, -- binary inputs of global frame count
|
time_inputs = true, -- binary inputs of global frame count
|
||||||
-- ^ note that this now doubles the effective trials.
|
-- ^ note that this now doubles the effective trials.
|
||||||
deviation = 0.05, --0.075 --0.1
|
deviation = 0.32,
|
||||||
--learning_rate = 0.01 / approx_cossim(7051)
|
--learning_rate = 0.01 / approx_cossim(7051)
|
||||||
learning_rate = 1.0,
|
learning_rate = 0.32,
|
||||||
--learning_rate = 0.0032 / approx_cossim(66573)
|
--learning_rate = 0.0032 / approx_cossim(66573)
|
||||||
--learning_rate = 0.0056 / approx_cossim(66573)
|
--learning_rate = 0.0056 / approx_cossim(66573)
|
||||||
weight_decay = 0.00032, --0.001 --0.0023
|
weight_decay = 0.0032,
|
||||||
|
|
||||||
cap_time = 200, --400
|
cap_time = 200, --400
|
||||||
timer_loser = 1/2,
|
timer_loser = 1/2,
|
||||||
|
@ -36,6 +36,9 @@ local cfg = {
|
||||||
playback_mode = false,
|
playback_mode = false,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- TODO: so, uhh..
|
||||||
|
-- what happens when playback_mode is true but unperturbed_trial is false?
|
||||||
|
|
||||||
cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)
|
cfg.epoch_top_trials = math.min(cfg.epoch_trials, cfg.epoch_top_trials)
|
||||||
|
|
||||||
cfg.eps_start = 1.0 * cfg.frameskip / 64
|
cfg.eps_start = 1.0 * cfg.frameskip / 64
|
||||||
|
|
17
main.lua
17
main.lua
|
@ -436,14 +436,16 @@ local function prepare_epoch()
|
||||||
-- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`.
|
-- but that's a fair tradeoff for dividing memory used by noise by `epoch_trials`.
|
||||||
|
|
||||||
local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392
|
local precision = (pow(cfg.deviation, 1/-0.51175585) - 8.68297257) / 1.66484392
|
||||||
print(("chosen precision: %.2f"):format(precision))
|
if cfg.graycode then
|
||||||
|
print(("chosen precision: %.2f"):format(precision))
|
||||||
|
end
|
||||||
|
|
||||||
for i = 1, cfg.epoch_trials do
|
for i = 1, cfg.epoch_trials do
|
||||||
local noise = nn.zeros(#base_params)
|
local noise = nn.zeros(#base_params)
|
||||||
-- NOTE: change in implementation: deviation is multiplied here
|
-- NOTE: change in implementation: deviation is multiplied here
|
||||||
-- and ONLY here now.
|
-- and ONLY here now.
|
||||||
if i % 2 == 0 then -- FIXME: just messing around.
|
--if i % 2 == 0 then -- FIXME: just messing around.
|
||||||
--if cfg.graycode then
|
if cfg.graycode then
|
||||||
--local precision = 1 / cfg.deviation
|
--local precision = 1 / cfg.deviation
|
||||||
--print(cfg.deviation, precision)
|
--print(cfg.deviation, precision)
|
||||||
for j = 1, #base_params do
|
for j = 1, #base_params do
|
||||||
|
@ -598,7 +600,14 @@ local function learn_from_epoch()
|
||||||
top_rewards[sind + 0] = trial_rewards[sind + 0]
|
top_rewards[sind + 0] = trial_rewards[sind + 0]
|
||||||
top_rewards[sind + 1] = trial_rewards[sind + 1]
|
top_rewards[sind + 1] = trial_rewards[sind + 1]
|
||||||
end
|
end
|
||||||
print("top:", top_rewards)
|
--print("top:", top_rewards)
|
||||||
|
|
||||||
|
local delta_rewards = {} -- only used for printing.
|
||||||
|
for i, ind in ipairs(indices) do
|
||||||
|
local sind = (ind - 1) * 2 + 1
|
||||||
|
delta_rewards[i] = abs(top_rewards[sind + 0] - top_rewards[sind + 1])
|
||||||
|
end
|
||||||
|
print("best deltas:", delta_rewards)
|
||||||
|
|
||||||
local _, reward_dev = calc_mean_dev(top_rewards)
|
local _, reward_dev = calc_mean_dev(top_rewards)
|
||||||
--print("mean, dev:", _, reward_dev)
|
--print("mean, dev:", _, reward_dev)
|
||||||
|
|
Loading…
Add table
Reference in a new issue