.

2017-02-15 20:18:53 -08:00 · 2017-02-15 20:18:53 -08:00 · 389bde3cdb
commit 389bde3cdb
parent d232d81f5a
2 changed files with 73 additions and 34 deletions
--- a/optim_nn.py
+++ b/optim_nn.py
@ -20,14 +20,14 @@ class Dummy:
 # Loss functions {{{1
-class SquaredHalved(Loss):
+class SquaredHalved(ResidualLoss):
    def f(self, r):
        return np.square(r) / 2
    def df(self, r):
        return r
-class SomethingElse(Loss):
+class SomethingElse(ResidualLoss):
    # generalizes Absolute and SquaredHalved
    # plot: https://www.desmos.com/calculator/fagjg9vuz7
    def __init__(self, a=4/3):
@ -42,6 +42,8 @@ class SomethingElse(Loss):
    def df(self, r):
        return np.sign(r) * np.abs(r)**self.c
 # Nonparametric Layers {{{1
 # Parametric Layers {{{1
 class LayerNorm(Layer):
@ -238,7 +240,7 @@ def multiresnet(x, width, depth, block=2, multi=1,
    return y
-# etc. {{{1
+# Toy Data {{{1
 inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform)
 activations = dict(sigmoid=Sigmoid, tanh=Tanh, relu=Relu, elu=Elu, gelu=GeluApprox)
@ -317,6 +319,8 @@ def toy_data(train_samples, valid_samples, problem=2):
    return (inputs, outputs), (valid_inputs, valid_outputs)
 # Model Creation {{{1
 def model_from_config(config, input_features, output_features, callbacks):
    # Our Test Model
@ -337,6 +341,7 @@ def model_from_config(config, input_features, output_features, callbacks):
    #
    # FIXME: unused variable
    training = config.epochs > 0 and config.restarts >= 0
    if config.fn_load is not None:
@ -427,7 +432,7 @@ def model_from_config(config, input_features, output_features, callbacks):
    return model, learner, ritual, (loss, mloss)
-# main {{{1
+# main program {{{1
 def run(program, args=[]):
@ -527,8 +532,7 @@ def run(program, args=[]):
    def measure_error():
        def print_error(name, inputs, outputs, comparison=None):
            predicted = model.forward(inputs)
-            residual = predicted - outputs
+            err = ritual.measure(predicted, outputs)
            err = ritual.measure(residual)
            log(name + " loss", "{:12.6e}".format(err))
            # TODO: print logarithmic difference as it might be more meaningful
            # (fewer results stuck around -99%)
@ -549,8 +553,6 @@ def run(program, args=[]):
    measure_error()
    assert inputs.shape[0] % config.batch_size == 0, \
           "inputs is not evenly divisible by batch_size" # TODO: lift this restriction
    ritual.prepare(model)
    while learner.next():
        indices = np.arange(inputs.shape[0])
@ -587,7 +589,7 @@ def run(program, args=[]):
    return 0
-# do main {{{1
+# run main program {{{1
 if __name__ == '__main__':
    import sys
--- a/optim_nn_core.py
+++ b/optim_nn_core.py
@ -36,23 +36,40 @@ def init_he_uniform(size, ins, outs):
 # Loss functions {{{1
 class Loss:
-    per_batch = False
+    pass
-    def mean(self, r):
+class CategoricalCrossentropy(Loss):
-        return np.average(self.f(r))
+    # lifted from theano
-    def dmean(self, r):
+    def __init__(self, eps=1e-8):
-        d = self.df(r)
+        self.eps = _f(eps)
        return d / len(d)
-class Squared(Loss):
+    def F(self, p, y):
        # TODO: assert dimensionality and p > 0 (if not self.unsafe?)
        p = np.clip(p, self.eps, 1 - self.eps)
        f = np.sum(-y * np.log(p) - (1 - y) * np.log(1 - p), axis=-1)
        return np.mean(f, axis=-1)
    def dF(self, p, y):
        p = np.clip(p, self.eps, 1 - self.eps)
        df = (p - y) / (p * (1 - p))
        return df / y.shape[-1]
 class ResidualLoss(Loss):
    def F(self, p, y): # mean
        return np.mean(self.f(p - y))
    def dF(self, p, y): # dmean
        return self.df(p - y) / y.shape[-1]
 class Squared(ResidualLoss):
    def f(self, r):
        return np.square(r)
    def df(self, r):
        return 2 * r
-class Absolute(Loss):
+class Absolute(ResidualLoss):
    def f(self, r):
        return np.abs(r)
@ -301,14 +318,6 @@ class Layer:
 # Nonparametric Layers {{{1
 class Sum(Layer):
    def multi(self, B):
        return np.sum(B, axis=0)
    def dmulti(self, dB):
        #assert len(dB) == 1, "unimplemented"
        return dB[0] # TODO: does this always work?
 class Input(Layer):
    def __init__(self, shape):
        assert shape is not None
@ -336,6 +345,14 @@ class Affine(Layer):
    def dF(self, dY):
        return dY * self.a
 class Sum(Layer):
    def multi(self, B):
        return np.sum(B, axis=0)
    def dmulti(self, dB):
        #assert len(dB) == 1, "unimplemented"
        return dB[0] # TODO: does this always work?
 class Sigmoid(Layer): # aka Logistic
    def F(self, X):
        self.sig = sigmoid(X)
@ -387,6 +404,25 @@ class GeluApprox(Layer):
    def dF(self, dY):
        return dY * self.sig * (1 + self.a * (1 - self.sig))
 class Softmax(Layer):
    # lifted from theano
    def __init__(self, axis=-1):
        super().__init__()
        self.axis = int(axis)
    def F(self, X):
        alpha = np.max(X, axis=-1, keepdims=True)
        num = np.exp(X - alpha)
        den = np.sum(num, axis=-1, keepdims=True)
        self.sm = num / den
        return self.sm
    def dF(self, dY):
        dYsm = dY * self.sm
        dX = dYsm - np.sum(dYsm, axis=-1, keepdims=True) * self.sm
        return dX
 # Parametric Layers {{{1
 class Dense(Layer):
@ -560,17 +596,16 @@ class Ritual: # i'm just making up names at this point
    def reset(self):
        self.learner.reset(optim=True)
-    def measure(self, residual):
+    def measure(self, p, y):
-        return self.mloss.mean(residual)
+        return self.mloss.F(p, y)
-    def derive(self, residual):
+    def derive(self, p, y):
-        return self.loss.dmean(residual)
+        return self.loss.dF(p, y)
    def learn(self, inputs, outputs):
        predicted = self.model.forward(inputs)
-        residual = predicted - outputs
+        self.model.backward(self.derive(predicted, outputs))
-        self.model.backward(self.derive(residual))
+        return predicted
        return residual
    def update(self):
        self.learner.optim.update(self.model.dW, self.model.W)
@ -585,6 +620,8 @@ class Ritual: # i'm just making up names at this point
        cumsum_loss = _0
        batch_count = inputs.shape[0] // batch_size
        losses = []
        assert inputs.shape[0] % batch_size == 0, \
          "inputs is not evenly divisible by batch_size" # TODO: lift this restriction
        for b in range(batch_count):
            self.bn += 1
            bi = b * batch_size
@ -594,10 +631,10 @@ class Ritual: # i'm just making up names at this point
            if self.learner.per_batch:
                self.learner.batch(b / batch_count)
-            residual = self.learn(batch_inputs, batch_outputs)
+            predicted = self.learn(batch_inputs, batch_outputs)
            self.update()
-            batch_loss = self.measure(residual)
+            batch_loss = self.measure(predicted, batch_outputs)
            if np.isnan(batch_loss):
                raise Exception("nan")
            cumsum_loss += batch_loss