add Lipschitz heuristic/approximation
This commit is contained in:
parent
ee066154b2
commit
5201b75509
2 changed files with 28 additions and 8 deletions
|
@ -33,6 +33,7 @@ local common_cfg = {
|
||||||
-- ^ note that this now doubles the effective trials.
|
-- ^ note that this now doubles the effective trials.
|
||||||
time_inputs = true, -- binary inputs of global frame count
|
time_inputs = true, -- binary inputs of global frame count
|
||||||
|
|
||||||
|
ars_lips = false,
|
||||||
adamant = false, -- run steps through AMSgrad.
|
adamant = false, -- run steps through AMSgrad.
|
||||||
|
|
||||||
cap_time = 300,
|
cap_time = 300,
|
||||||
|
@ -50,6 +51,7 @@ local cfg = {
|
||||||
epoch_top_trials = 10,
|
epoch_top_trials = 10,
|
||||||
learning_rate = 1.0,
|
learning_rate = 1.0,
|
||||||
|
|
||||||
|
ars_lips = true,
|
||||||
deviation = 0.1,
|
deviation = 0.1,
|
||||||
weight_decay = 0.004,
|
weight_decay = 0.004,
|
||||||
|
|
||||||
|
|
20
main.lua
20
main.lua
|
@ -656,11 +656,12 @@ local function learn_from_epoch()
|
||||||
end
|
end
|
||||||
print("best deltas:", top_delta_rewards)
|
print("best deltas:", top_delta_rewards)
|
||||||
|
|
||||||
|
if not cfg.ars_lips then
|
||||||
local _, reward_dev = calc_mean_dev(top_rewards)
|
local _, reward_dev = calc_mean_dev(top_rewards)
|
||||||
--print("mean, dev:", _, reward_dev)
|
--print("mean, dev:", _, reward_dev)
|
||||||
if reward_dev == 0 then reward_dev = 1 end
|
if reward_dev == 0 then reward_dev = 1 end
|
||||||
|
|
||||||
for i, v in ipairs(top_rewards) do top_rewards[i] = v / reward_dev end
|
for i, v in ipairs(top_rewards) do top_rewards[i] = v / reward_dev end
|
||||||
|
end
|
||||||
|
|
||||||
-- NOTE: step no longer directly incorporates learning_rate.
|
-- NOTE: step no longer directly incorporates learning_rate.
|
||||||
for i = 1, cfg.epoch_trials do
|
for i = 1, cfg.epoch_trials do
|
||||||
|
@ -668,11 +669,28 @@ local function learn_from_epoch()
|
||||||
local pos = top_rewards[ind + 0]
|
local pos = top_rewards[ind + 0]
|
||||||
local neg = top_rewards[ind + 1]
|
local neg = top_rewards[ind + 1]
|
||||||
local reward = pos - neg
|
local reward = pos - neg
|
||||||
|
if reward ~= 0 then
|
||||||
local noise = trial_noise[i]
|
local noise = trial_noise[i]
|
||||||
|
|
||||||
|
if cfg.ars_lips then
|
||||||
|
local _, dev = calc_mean_dev(noise)
|
||||||
|
local c0 = neg - current_cost
|
||||||
|
local c1 = pos - current_cost
|
||||||
|
local l0 = abs(3 * c1 + c0)
|
||||||
|
local l1 = abs(c1 + 3 * c0)
|
||||||
|
local lips = max(l0, l1) / (2 * dev)
|
||||||
|
--reward = pos / lips - neg / lips
|
||||||
|
local old_reward = reward
|
||||||
|
reward = reward / lips
|
||||||
|
reward = reward / cfg.deviation -- FIXME: hack?
|
||||||
|
--print(("trial %i reward: %.0f -> %.5f"):format(i, old_reward, reward))
|
||||||
|
end
|
||||||
|
|
||||||
for j, v in ipairs(noise) do
|
for j, v in ipairs(noise) do
|
||||||
step[j] = step[j] + reward * v / cfg.epoch_top_trials
|
step[j] = step[j] + reward * v / cfg.epoch_top_trials
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
local step_mean, step_dev = calc_mean_dev(step)
|
local step_mean, step_dev = calc_mean_dev(step)
|
||||||
print("step mean:", step_mean)
|
print("step mean:", step_mean)
|
||||||
|
|
Loading…
Add table
Reference in a new issue