add evaluation counting to ARS, cleanup

2018-06-13 21:54:04 +02:00 · 2018-06-13 21:54:04 +02:00 · 601d78bfda
commit 601d78bfda
parent 6498b4143f
1 changed files with 9 additions and 12 deletions
--- a/ars.lua
+++ b/ars.lua
@ -63,6 +63,8 @@ function Ars:init(dims, popsize, poptop, learning_rate, sigma, antithetic)
    if self.antithetic then self.popsize = self.popsize * 2 end

    self._params = nn.zeros(self.dims)
+
+    self.evals = 0
 end

 function Ars:params(new_params)
@ -113,8 +115,13 @@ function Ars:ask(graycode)
 end

 function Ars:tell(scored, unperturbed_score)
+    local use_lips = unperturbed_score ~= nil and self.antithetic
+    self.evals = self.evals + #scored
+    if use_lips then self.evals = self.evals + 1 end
+
+    -- FIXME: the non-antithetic case seems to be broken.
+
    local indices = collect_best_indices(scored, self.poptop, self.antithetic)
-    --print("best trials:", indices)

    local top_rewards = {}
    for i = 1, #scored do top_rewards[i] = 0 end
@ -123,16 +130,6 @@ function Ars:tell(scored, unperturbed_score)
        top_rewards[sind + 0] = scored[sind + 0]
        top_rewards[sind + 1] = scored[sind + 1]
    end
-    --print("top:", top_rewards)
-
-    if self.antithetic then
-        local top_delta_rewards = {} -- only used for printing.
-        for i, ind in ipairs(indices) do
-            local sind = (ind - 1) * 2 + 1
-            top_delta_rewards[i] = abs(top_rewards[sind + 0] - top_rewards[sind + 1])
-        end
-        --print("best deltas:", top_delta_rewards)
-    end

    local step = nn.zeros(self.dims)
    local _, reward_dev = calc_mean_dev(top_rewards)
@ -147,7 +144,7 @@ function Ars:tell(scored, unperturbed_score)
            if reward ~= 0 then
                local noisy = self.noise[i]

-                if unperturbed_score ~= nil then
+                if use_lips then
                    local lips = kinda_lipschitz(noisy, pos, neg, unperturbed_score)
                    reward = reward / lips / self.sigma
                else