-- Exponential Natural Evolution Strategies -- http://people.idsia.ch/~juergen/xNES2010gecco.pdf -- not to be confused with the Nintendo Entertainment System. local assert = assert local exp = math.exp local floor = math.floor local ipairs = ipairs local log = math.log local max = math.max local pairs = pairs local pow = math.pow local sqrt = math.sqrt local unpack = table.unpack or unpack local Base = require "Base" local nn = require "nn" local dot_mv = nn.dot_mv local normal = nn.normal local zeros = nn.zeros local util = require "util" local argsort = util.argsort local Xnes = Base:extend() local function make_utility(popsize, out) local utility = out or {} local temp = log(popsize / 2 + 1) for i=1, popsize do utility[i] = max(0, temp - log(i)) end local sum = 0 for _, v in ipairs(utility) do sum = sum + v end for i, v in ipairs(utility) do utility[i] = v / sum - 1 / popsize end return utility end local function make_covars(dims, sigma, out) local covars = out or zeros{dims, dims} local c = sigma / dims -- simplified form of the determinant of the matrix we're going to create: local det = pow(1 - c, dims - 1) * (c * (dims - 1) + 1) -- multiplying by this constant makes the determinant 1: local m = pow(1 / det, 1 / dims) local filler = c * m for i=1, #covars do covars[i] = filler end -- diagonals: for i=1, dims do covars[i + dims * (i - 1)] = m end return covars end function Xnes:init(dims, popsize, base_rate, sigma, antithetic) -- heuristic borrowed from CMA-ES: self.dims = dims self.popsize = popsize or 4 + (3 * floor(log(dims))) base_rate = base_rate or 3/5 * (3 + log(dims)) / (dims * sqrt(dims)) self.param_rate = 1.0 self.sigma_rate = base_rate self.covar_rate = base_rate self.sigma = sigma or 1 self.antithetic = antithetic and true or false if self.antithetic then self.popsize = self.popsize * 2 end self.utility = make_utility(self.popsize) self.mean = zeros{dims} -- note: this is technically the co-standard-deviation. -- you can imagine the "s" standing for "sqrt" if you like. self.covars = make_covars(self.dims, self.sigma, self.covars) end function Xnes:params(new_mean) if new_mean ~= nil then assert(#self.mean == #new_mean, "new parameters have the wrong size") for i, v in ipairs(new_mean) do self.mean[i] = v end end return self.mean end function Xnes:decay(param_decay, sigma_decay) if sigma_decay > 0 then self.sigma = self.sigma * (1 - sigma_decay) end if param_decay > 0 then for i, v in ipairs(self.mean) do self.mean[i] = v * (1 - self.param_rate * param_decay * self.sigma) end end end function Xnes:ask_once(asked, noise) asked = asked or zeros(self.dims) noise = noise or {} for i=1, self.dims do noise[i] = normal() end noise.shape = {#noise} dot_mv(self.covars, noise, asked) for i, v in ipairs(asked) do asked[i] = self.mean[i] + self.sigma * v end return asked, noise end function Xnes:ask_twice(asked0, asked1, noise0, noise1) asked0 = asked0 or zeros(self.dims) asked1 = asked1 or zeros(self.dims) noise0 = noise0 or {} noise1 = noise1 or {} for i=1, self.dims do noise0[i] = normal() end noise0.shape = {#noise0} dot_mv(self.covars, noise0, asked0) for i, v in ipairs(asked0) do asked0[i] = self.mean[i] + self.sigma * v asked1[i] = self.mean[i] - self.sigma * v end for i, v in ipairs(noise0) do noise1[i] = -v end return asked0, asked1, noise0, noise1 end function Xnes:ask(asked, noise) -- return a list of parameters for the user to score, -- and later pass to :tell(). if asked == nil then asked = {} for i=1, self.popsize do asked[i] = zeros(self.dims) end end if noise == nil then noise = {} for i=1, self.popsize do noise[i] = zeros(self.dims) end end if self.antithetic then for i=1, self.popsize do self:ask_twice(asked[i+0], asked[i+1], noise[i+0], noise[i+1]) end else for i=1, self.popsize do self:ask_once(asked[i], noise[i]) end end self.noise = noise return asked, noise end function Xnes:tell(scored, noise) local noise = noise or self.noise assert(noise, "missing noise argument") local arg = argsort(scored, function(a, b) return a > b end) local g_delta = zeros{self.dims} for p=1, self.popsize do local noise_p = noise[arg[p]] for i=1, self.dims do g_delta[i] = g_delta[i] + self.utility[p] * noise_p[i] end end local g_covars = zeros{self.dims, self.dims} local traced = 0 for p=1, self.popsize do local noise_p = noise[arg[p]] for i=1, self.dims do for j=1, self.dims do local ind = (i - 1) * self.dims + j local zzt = noise_p[i] * noise_p[j] - (i == j and 1 or 0) local temp = self.utility[p] * zzt g_covars[ind] = g_covars[ind] + temp traced = traced + temp end end end local g_sigma = traced / self.dims for i=1, self.dims do local ind = (i - 1) * self.dims + i g_covars[ind] = g_covars[ind] - g_sigma end -- finally, update according to the gradients. local dotted = dot_mv(self.covars, g_delta) local step = {} for i, v in ipairs(dotted) do step[i] = self.sigma * v end for i, v in ipairs(self.mean) do self.mean[i] = v + self.param_rate * step[i] end self.sigma = self.sigma * exp(self.sigma_rate * 0.5 * g_sigma) for i, v in ipairs(self.covars) do self.covars[i] = v * exp(self.covar_rate * 0.5 * g_covars[i]) end -- bookkeeping: self.noise = nil return step end return { make_utility = make_utility, make_covars = make_covars, Xnes = Xnes, }