smbot/xnes.lua

239 lines
6.5 KiB
Lua
Raw Normal View History

2018-06-09 09:27:13 -07:00
-- Exponential Natural Evolution Strategies
-- http://people.idsia.ch/~juergen/xNES2010gecco.pdf
-- not to be confused with the Nintendo Entertainment System.
local assert = assert
local exp = math.exp
local floor = math.floor
local ipairs = ipairs
local log = math.log
local max = math.max
local pairs = pairs
local pow = math.pow
local sqrt = math.sqrt
local unpack = table.unpack or unpack
local Base = require "Base"
local nn = require "nn"
local normal = nn.normal
local zeros = nn.zeros
local util = require "util"
local argsort = util.argsort
local Xnes = Base:extend()
local function dot_mv(mat, vec, out)
-- treats matrix as a matrix.
-- treats vec as a column vector, flattened.
assert(#mat.shape == 2)
local d0, d1 = unpack(mat.shape)
assert(d1 == #vec)
local out_shape = {d0}
if out == nil then
out = zeros(out_shape)
else
assert(d0 == #out, "given output is the wrong size")
end
for i=1, d0 do
local sum = 0
for j=1, d1 do
sum = sum + mat[(i - 1) * d1 + j] * vec[j]
end
out[i] = sum
end
return out
end
local function make_utility(popsize, out)
local utility = out or {}
local temp = log(popsize / 2 + 1)
for i=1, popsize do utility[i] = max(0, temp - log(i)) end
local sum = 0
for _, v in ipairs(utility) do sum = sum + v end
for i, v in ipairs(utility) do utility[i] = v / sum - 1 / popsize end
return utility
end
local function make_covars(dims, sigma, out)
local covars = out or zeros{dims, dims}
local c = sigma / dims
-- simplified form of the determinant of the matrix we're going to create:
local det = pow(1 - c, dims - 1) * (c * (dims - 1) + 1)
-- multiplying by this constant makes the determinant 1:
local m = pow(1 / det, 1 / dims)
local filler = c * m
for i=1, #covars do covars[i] = filler end
-- diagonals:
for i=1, dims do covars[i + dims * (i - 1)] = m end
return covars
end
2018-06-10 07:33:38 -07:00
function Xnes:init(dims, popsize, learning_rate, sigma, antithetic)
2018-06-09 09:27:13 -07:00
-- heuristic borrowed from CMA-ES:
self.dims = dims
self.popsize = popsize or 4 + (3 * floor(log(dims)))
self.learning_rate = learning_rate or 3/5 * (3 + log(dims)) / (dims * sqrt(dims))
self.sigma = sigma or 1
2018-06-10 07:33:38 -07:00
self.antithetic = antithetic and true or false
if self.antithetic then self.popsize = self.popsize * 2 end
2018-06-09 09:27:13 -07:00
self.utility = make_utility(self.popsize)
self.mean = zeros{dims}
-- note: this is technically the co-standard-deviation.
-- you can imagine the "s" standing for "sqrt" if you like.
self.covars = make_covars(self.dims, self.sigma, self.covars)
--self.log_sigma = log(self.sigma)
--self.log_covars = zeros{dims, dims}
--for i, v in ipairs(self.covars) do self.log_covars[i] = log(v) end
end
function Xnes:params(new_mean, new_covars)
if new_mean ~= nil then
assert(#self.mean == #new_mean, "new parameters have the wrong size")
for i, v in ipairs(new_mean) do self.mean[i] = v end
end
if new_covars ~= nil then
-- TODO: assert determinant of new_covars is 1.
error("TODO")
end
return self.mean
end
function Xnes:ask_once(asked, noise)
asked = asked or zeros(self.dims)
noise = noise or {}
for i=1, self.dims do noise[i] = normal() end
noise.shape = {#noise}
dot_mv(self.covars, noise, asked)
for i, v in ipairs(asked) do asked[i] = self.mean[i] + self.sigma * v end
return asked, noise
end
2018-06-10 07:33:38 -07:00
function Xnes:ask_twice(asked0, asked1, noise0, noise1)
asked0 = asked0 or zeros(self.dims)
asked1 = asked1 or zeros(self.dims)
noise0 = noise0 or {}
noise1 = noise1 or {}
for i=1, self.dims do noise0[i] = normal() end
noise0.shape = {#noise0}
dot_mv(self.covars, noise0, asked0)
for i, v in ipairs(asked0) do
asked0[i] = self.mean[i] + self.sigma * v
asked1[i] = self.mean[i] - self.sigma * v
end
for i, v in ipairs(noise0) do noise1[i] = -v end
return asked0, asked1, noise0, noise1
end
2018-06-09 09:27:13 -07:00
function Xnes:ask(asked, noise)
-- return a list of parameters for the user to score,
-- and later pass to :tell().
if asked == nil then
asked = {}
for i=1, self.popsize do asked[i] = zeros(self.dims) end
end
if noise == nil then
noise = {}
for i=1, self.popsize do noise[i] = zeros(self.dims) end
end
2018-06-10 07:33:38 -07:00
if self.antithetic then
for i=1, self.popsize do
self:ask_twice(asked[i+0], asked[i+1], noise[i+0], noise[i+1])
end
else
for i=1, self.popsize do
self:ask_once(asked[i], noise[i])
end
end
2018-06-09 09:27:13 -07:00
self.noise = noise
return asked, noise
end
function Xnes:tell(scored, noise)
local noise = noise or self.noise
assert(noise, "missing noise argument")
local arg = argsort(scored, function(a, b) return a > b end)
local g_delta = zeros{self.dims}
for p=1, self.popsize do
local noise_p = noise[arg[p]]
for i=1, self.dims do
g_delta[i] = g_delta[i] + self.utility[p] * noise_p[i]
end
end
local g_covars = zeros{self.dims, self.dims}
local traced = 0
for p=1, self.popsize do
local noise_p = noise[arg[p]]
for i=1, self.dims do
for j=1, self.dims do
local ind = (i - 1) * self.dims + j
local zzt = noise_p[i] * noise_p[j] - (i == j and 1 or 0)
local temp = self.utility[p] * zzt
g_covars[ind] = g_covars[ind] + temp
traced = traced + temp
end
end
end
local g_sigma = traced / self.dims
for i=1, self.dims do
local ind = (i - 1) * self.dims + i
g_covars[ind] = g_covars[ind] - g_sigma
end
-- finally, update according to the gradients.
local dotted = dot_mv(self.covars, g_delta)
for i, v in ipairs(self.mean) do
self.mean[i] = v + self.sigma * dotted[i]
end
--[[
--self.log_sigma = self.log_sigma + self.learning_rate / 2 * g_sigma
for i, v in ipairs(self.log_covars) do
self.log_covars[i] = v + lr * g_covars[i]
end
--]]
local lr = self.learning_rate * 0.5
self.sigma = self.sigma * exp(lr * g_sigma)
for i, v in ipairs(self.covars) do
self.covars[i] = v * exp(lr * g_covars[i])
end
-- bookkeeping:
--self.sigma = exp(self.log_sigma)
--for i, v in ipairs(self.log_covars) do self.covars[i] = exp(v) end
self.noise = nil
end
return {
dot_mv = dot_mv,
make_utility = make_utility,
make_covars = make_covars,
Xnes = Xnes,
}