-- Exponential Natural Evolution Strategies -- http://people.idsia.ch/~juergen/xNES2010gecco.pdf -- not to be confused with the Nintendo Entertainment System. local assert = assert local exp = math.exp local floor = math.floor local ipairs = ipairs local log = math.log local max = math.max local pairs = pairs local pow = math.pow local sqrt = math.sqrt local unpack = table.unpack or unpack local Base = require "Base" local nn = require "nn" local normal = nn.normal local zeros = nn.zeros local util = require "util" local argsort = util.argsort local Xnes = Base:extend() local function dot_mv(mat, vec, out) -- treats matrix as a matrix. -- treats vec as a column vector, flattened. assert(#mat.shape == 2) local d0, d1 = unpack(mat.shape) assert(d1 == #vec) local out_shape = {d0} if out == nil then out = zeros(out_shape) else assert(d0 == #out, "given output is the wrong size") end for i=1, d0 do local sum = 0 for j=1, d1 do sum = sum + mat[(i - 1) * d1 + j] * vec[j] end out[i] = sum end return out end local function make_utility(popsize, out) local utility = out or {} local temp = log(popsize / 2 + 1) for i=1, popsize do utility[i] = max(0, temp - log(i)) end local sum = 0 for _, v in ipairs(utility) do sum = sum + v end for i, v in ipairs(utility) do utility[i] = v / sum - 1 / popsize end return utility end local function make_covars(dims, sigma, out) local covars = out or zeros{dims, dims} local c = sigma / dims -- simplified form of the determinant of the matrix we're going to create: local det = pow(1 - c, dims - 1) * (c * (dims - 1) + 1) -- multiplying by this constant makes the determinant 1: local m = pow(1 / det, 1 / dims) local filler = c * m for i=1, #covars do covars[i] = filler end -- diagonals: for i=1, dims do covars[i + dims * (i - 1)] = m end return covars end function Xnes:init(dims, popsize, learning_rate, sigma, antithetic) -- heuristic borrowed from CMA-ES: self.dims = dims self.popsize = popsize or 4 + (3 * floor(log(dims))) self.learning_rate = learning_rate or 3/5 * (3 + log(dims)) / (dims * sqrt(dims)) self.sigma = sigma or 1 self.antithetic = antithetic and true or false if self.antithetic then self.popsize = self.popsize * 2 end self.utility = make_utility(self.popsize) self.mean = zeros{dims} -- note: this is technically the co-standard-deviation. -- you can imagine the "s" standing for "sqrt" if you like. self.covars = make_covars(self.dims, self.sigma, self.covars) --self.log_sigma = log(self.sigma) --self.log_covars = zeros{dims, dims} --for i, v in ipairs(self.covars) do self.log_covars[i] = log(v) end end function Xnes:params(new_mean, new_covars) if new_mean ~= nil then assert(#self.mean == #new_mean, "new parameters have the wrong size") for i, v in ipairs(new_mean) do self.mean[i] = v end end if new_covars ~= nil then -- TODO: assert determinant of new_covars is 1. error("TODO") end return self.mean end function Xnes:ask_once(asked, noise) asked = asked or zeros(self.dims) noise = noise or {} for i=1, self.dims do noise[i] = normal() end noise.shape = {#noise} dot_mv(self.covars, noise, asked) for i, v in ipairs(asked) do asked[i] = self.mean[i] + self.sigma * v end return asked, noise end function Xnes:ask_twice(asked0, asked1, noise0, noise1) asked0 = asked0 or zeros(self.dims) asked1 = asked1 or zeros(self.dims) noise0 = noise0 or {} noise1 = noise1 or {} for i=1, self.dims do noise0[i] = normal() end noise0.shape = {#noise0} dot_mv(self.covars, noise0, asked0) for i, v in ipairs(asked0) do asked0[i] = self.mean[i] + self.sigma * v asked1[i] = self.mean[i] - self.sigma * v end for i, v in ipairs(noise0) do noise1[i] = -v end return asked0, asked1, noise0, noise1 end function Xnes:ask(asked, noise) -- return a list of parameters for the user to score, -- and later pass to :tell(). if asked == nil then asked = {} for i=1, self.popsize do asked[i] = zeros(self.dims) end end if noise == nil then noise = {} for i=1, self.popsize do noise[i] = zeros(self.dims) end end if self.antithetic then for i=1, self.popsize do self:ask_twice(asked[i+0], asked[i+1], noise[i+0], noise[i+1]) end else for i=1, self.popsize do self:ask_once(asked[i], noise[i]) end end self.noise = noise return asked, noise end function Xnes:tell(scored, noise) local noise = noise or self.noise assert(noise, "missing noise argument") local arg = argsort(scored, function(a, b) return a > b end) local g_delta = zeros{self.dims} for p=1, self.popsize do local noise_p = noise[arg[p]] for i=1, self.dims do g_delta[i] = g_delta[i] + self.utility[p] * noise_p[i] end end local g_covars = zeros{self.dims, self.dims} local traced = 0 for p=1, self.popsize do local noise_p = noise[arg[p]] for i=1, self.dims do for j=1, self.dims do local ind = (i - 1) * self.dims + j local zzt = noise_p[i] * noise_p[j] - (i == j and 1 or 0) local temp = self.utility[p] * zzt g_covars[ind] = g_covars[ind] + temp traced = traced + temp end end end local g_sigma = traced / self.dims for i=1, self.dims do local ind = (i - 1) * self.dims + i g_covars[ind] = g_covars[ind] - g_sigma end -- finally, update according to the gradients. local dotted = dot_mv(self.covars, g_delta) for i, v in ipairs(self.mean) do self.mean[i] = v + self.sigma * dotted[i] end --[[ --self.log_sigma = self.log_sigma + self.learning_rate / 2 * g_sigma for i, v in ipairs(self.log_covars) do self.log_covars[i] = v + lr * g_covars[i] end --]] local lr = self.learning_rate * 0.5 self.sigma = self.sigma * exp(lr * g_sigma) for i, v in ipairs(self.covars) do self.covars[i] = v * exp(lr * g_covars[i]) end -- bookkeeping: --self.sigma = exp(self.log_sigma) --for i, v in ipairs(self.log_covars) do self.covars[i] = exp(v) end self.noise = nil end return { dot_mv = dot_mv, make_utility = make_utility, make_covars = make_covars, Xnes = Xnes, }