This commit is contained in:
Connor Olding 2017-02-15 20:18:53 -08:00
parent d232d81f5a
commit 389bde3cdb
2 changed files with 73 additions and 34 deletions

View file

@ -20,14 +20,14 @@ class Dummy:
# Loss functions {{{1
class SquaredHalved(Loss):
class SquaredHalved(ResidualLoss):
def f(self, r):
return np.square(r) / 2
def df(self, r):
return r
class SomethingElse(Loss):
class SomethingElse(ResidualLoss):
# generalizes Absolute and SquaredHalved
# plot: https://www.desmos.com/calculator/fagjg9vuz7
def __init__(self, a=4/3):
@ -42,6 +42,8 @@ class SomethingElse(Loss):
def df(self, r):
return np.sign(r) * np.abs(r)**self.c
# Nonparametric Layers {{{1
# Parametric Layers {{{1
class LayerNorm(Layer):
@ -238,7 +240,7 @@ def multiresnet(x, width, depth, block=2, multi=1,
return y
# etc. {{{1
# Toy Data {{{1
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform)
activations = dict(sigmoid=Sigmoid, tanh=Tanh, relu=Relu, elu=Elu, gelu=GeluApprox)
@ -317,6 +319,8 @@ def toy_data(train_samples, valid_samples, problem=2):
return (inputs, outputs), (valid_inputs, valid_outputs)
# Model Creation {{{1
def model_from_config(config, input_features, output_features, callbacks):
# Our Test Model
@ -337,6 +341,7 @@ def model_from_config(config, input_features, output_features, callbacks):
#
# FIXME: unused variable
training = config.epochs > 0 and config.restarts >= 0
if config.fn_load is not None:
@ -427,7 +432,7 @@ def model_from_config(config, input_features, output_features, callbacks):
return model, learner, ritual, (loss, mloss)
# main {{{1
# main program {{{1
def run(program, args=[]):
@ -527,8 +532,7 @@ def run(program, args=[]):
def measure_error():
def print_error(name, inputs, outputs, comparison=None):
predicted = model.forward(inputs)
residual = predicted - outputs
err = ritual.measure(residual)
err = ritual.measure(predicted, outputs)
log(name + " loss", "{:12.6e}".format(err))
# TODO: print logarithmic difference as it might be more meaningful
# (fewer results stuck around -99%)
@ -549,8 +553,6 @@ def run(program, args=[]):
measure_error()
assert inputs.shape[0] % config.batch_size == 0, \
"inputs is not evenly divisible by batch_size" # TODO: lift this restriction
ritual.prepare(model)
while learner.next():
indices = np.arange(inputs.shape[0])
@ -587,7 +589,7 @@ def run(program, args=[]):
return 0
# do main {{{1
# run main program {{{1
if __name__ == '__main__':
import sys

View file

@ -36,23 +36,40 @@ def init_he_uniform(size, ins, outs):
# Loss functions {{{1
class Loss:
per_batch = False
pass
def mean(self, r):
return np.average(self.f(r))
class CategoricalCrossentropy(Loss):
# lifted from theano
def dmean(self, r):
d = self.df(r)
return d / len(d)
def __init__(self, eps=1e-8):
self.eps = _f(eps)
class Squared(Loss):
def F(self, p, y):
# TODO: assert dimensionality and p > 0 (if not self.unsafe?)
p = np.clip(p, self.eps, 1 - self.eps)
f = np.sum(-y * np.log(p) - (1 - y) * np.log(1 - p), axis=-1)
return np.mean(f, axis=-1)
def dF(self, p, y):
p = np.clip(p, self.eps, 1 - self.eps)
df = (p - y) / (p * (1 - p))
return df / y.shape[-1]
class ResidualLoss(Loss):
def F(self, p, y): # mean
return np.mean(self.f(p - y))
def dF(self, p, y): # dmean
return self.df(p - y) / y.shape[-1]
class Squared(ResidualLoss):
def f(self, r):
return np.square(r)
def df(self, r):
return 2 * r
class Absolute(Loss):
class Absolute(ResidualLoss):
def f(self, r):
return np.abs(r)
@ -301,14 +318,6 @@ class Layer:
# Nonparametric Layers {{{1
class Sum(Layer):
def multi(self, B):
return np.sum(B, axis=0)
def dmulti(self, dB):
#assert len(dB) == 1, "unimplemented"
return dB[0] # TODO: does this always work?
class Input(Layer):
def __init__(self, shape):
assert shape is not None
@ -336,6 +345,14 @@ class Affine(Layer):
def dF(self, dY):
return dY * self.a
class Sum(Layer):
def multi(self, B):
return np.sum(B, axis=0)
def dmulti(self, dB):
#assert len(dB) == 1, "unimplemented"
return dB[0] # TODO: does this always work?
class Sigmoid(Layer): # aka Logistic
def F(self, X):
self.sig = sigmoid(X)
@ -387,6 +404,25 @@ class GeluApprox(Layer):
def dF(self, dY):
return dY * self.sig * (1 + self.a * (1 - self.sig))
class Softmax(Layer):
# lifted from theano
def __init__(self, axis=-1):
super().__init__()
self.axis = int(axis)
def F(self, X):
alpha = np.max(X, axis=-1, keepdims=True)
num = np.exp(X - alpha)
den = np.sum(num, axis=-1, keepdims=True)
self.sm = num / den
return self.sm
def dF(self, dY):
dYsm = dY * self.sm
dX = dYsm - np.sum(dYsm, axis=-1, keepdims=True) * self.sm
return dX
# Parametric Layers {{{1
class Dense(Layer):
@ -560,17 +596,16 @@ class Ritual: # i'm just making up names at this point
def reset(self):
self.learner.reset(optim=True)
def measure(self, residual):
return self.mloss.mean(residual)
def measure(self, p, y):
return self.mloss.F(p, y)
def derive(self, residual):
return self.loss.dmean(residual)
def derive(self, p, y):
return self.loss.dF(p, y)
def learn(self, inputs, outputs):
predicted = self.model.forward(inputs)
residual = predicted - outputs
self.model.backward(self.derive(residual))
return residual
self.model.backward(self.derive(predicted, outputs))
return predicted
def update(self):
self.learner.optim.update(self.model.dW, self.model.W)
@ -585,6 +620,8 @@ class Ritual: # i'm just making up names at this point
cumsum_loss = _0
batch_count = inputs.shape[0] // batch_size
losses = []
assert inputs.shape[0] % batch_size == 0, \
"inputs is not evenly divisible by batch_size" # TODO: lift this restriction
for b in range(batch_count):
self.bn += 1
bi = b * batch_size
@ -594,10 +631,10 @@ class Ritual: # i'm just making up names at this point
if self.learner.per_batch:
self.learner.batch(b / batch_count)
residual = self.learn(batch_inputs, batch_outputs)
predicted = self.learn(batch_inputs, batch_outputs)
self.update()
batch_loss = self.measure(residual)
batch_loss = self.measure(predicted, batch_outputs)
if np.isnan(batch_loss):
raise Exception("nan")
cumsum_loss += batch_loss