.

2017-02-15 20:18:53 -08:00 · 2017-02-15 20:18:53 -08:00 · 389bde3cdb
commit 389bde3cdb
parent d232d81f5a
2 changed files with 73 additions and 34 deletions
--- a/optim_nn.py
+++ b/optim_nn.py
@ -20,14 +20,14 @@ class Dummy:

 # Loss functions {{{1

-class SquaredHalved(Loss):
+class SquaredHalved(ResidualLoss):
    def f(self, r):
        return np.square(r) / 2

    def df(self, r):
        return r

-class SomethingElse(Loss):
+class SomethingElse(ResidualLoss):
    # generalizes Absolute and SquaredHalved
    # plot: https://www.desmos.com/calculator/fagjg9vuz7
    def __init__(self, a=4/3):
@ -42,6 +42,8 @@ class SomethingElse(Loss):
    def df(self, r):
        return np.sign(r) * np.abs(r)**self.c

+# Nonparametric Layers {{{1
+
 # Parametric Layers {{{1

 class LayerNorm(Layer):
@ -238,7 +240,7 @@ def multiresnet(x, width, depth, block=2, multi=1,

    return y

-# etc. {{{1
+# Toy Data {{{1

 inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform)
 activations = dict(sigmoid=Sigmoid, tanh=Tanh, relu=Relu, elu=Elu, gelu=GeluApprox)
@ -317,6 +319,8 @@ def toy_data(train_samples, valid_samples, problem=2):

    return (inputs, outputs), (valid_inputs, valid_outputs)

+# Model Creation {{{1
+
 def model_from_config(config, input_features, output_features, callbacks):
    # Our Test Model

@ -337,6 +341,7 @@ def model_from_config(config, input_features, output_features, callbacks):

    #

+    # FIXME: unused variable
    training = config.epochs > 0 and config.restarts >= 0

    if config.fn_load is not None:
@ -427,7 +432,7 @@ def model_from_config(config, input_features, output_features, callbacks):

    return model, learner, ritual, (loss, mloss)

-# main {{{1
+# main program {{{1

 def run(program, args=[]):

@ -527,8 +532,7 @@ def run(program, args=[]):
    def measure_error():
        def print_error(name, inputs, outputs, comparison=None):
            predicted = model.forward(inputs)
-            residual = predicted - outputs
-            err = ritual.measure(residual)
+            err = ritual.measure(predicted, outputs)
            log(name + " loss", "{:12.6e}".format(err))
            # TODO: print logarithmic difference as it might be more meaningful
            # (fewer results stuck around -99%)
@ -549,8 +553,6 @@ def run(program, args=[]):

    measure_error()

-    assert inputs.shape[0] % config.batch_size == 0, \
-           "inputs is not evenly divisible by batch_size" # TODO: lift this restriction
    ritual.prepare(model)
    while learner.next():
        indices = np.arange(inputs.shape[0])
@ -587,7 +589,7 @@ def run(program, args=[]):

    return 0

-# do main {{{1
+# run main program {{{1

 if __name__ == '__main__':
    import sys
--- a/optim_nn_core.py
+++ b/optim_nn_core.py
@ -36,23 +36,40 @@ def init_he_uniform(size, ins, outs):
 # Loss functions {{{1

 class Loss:
-    per_batch = False
+    pass

-    def mean(self, r):
-        return np.average(self.f(r))
+class CategoricalCrossentropy(Loss):
+    # lifted from theano

-    def dmean(self, r):
-        d = self.df(r)
-        return d / len(d)
+    def __init__(self, eps=1e-8):
+        self.eps = _f(eps)

-class Squared(Loss):
+    def F(self, p, y):
+        # TODO: assert dimensionality and p > 0 (if not self.unsafe?)
+        p = np.clip(p, self.eps, 1 - self.eps)
+        f = np.sum(-y * np.log(p) - (1 - y) * np.log(1 - p), axis=-1)
+        return np.mean(f, axis=-1)
+
+    def dF(self, p, y):
+        p = np.clip(p, self.eps, 1 - self.eps)
+        df = (p - y) / (p * (1 - p))
+        return df / y.shape[-1]
+
+class ResidualLoss(Loss):
+    def F(self, p, y): # mean
+        return np.mean(self.f(p - y))
+
+    def dF(self, p, y): # dmean
+        return self.df(p - y) / y.shape[-1]
+
+class Squared(ResidualLoss):
    def f(self, r):
        return np.square(r)

    def df(self, r):
        return 2 * r

-class Absolute(Loss):
+class Absolute(ResidualLoss):
    def f(self, r):
        return np.abs(r)

@ -301,14 +318,6 @@ class Layer:

 # Nonparametric Layers {{{1

-class Sum(Layer):
-    def multi(self, B):
-        return np.sum(B, axis=0)
-
-    def dmulti(self, dB):
-        #assert len(dB) == 1, "unimplemented"
-        return dB[0] # TODO: does this always work?
-
 class Input(Layer):
    def __init__(self, shape):
        assert shape is not None
@ -336,6 +345,14 @@ class Affine(Layer):
    def dF(self, dY):
        return dY * self.a

+class Sum(Layer):
+    def multi(self, B):
+        return np.sum(B, axis=0)
+
+    def dmulti(self, dB):
+        #assert len(dB) == 1, "unimplemented"
+        return dB[0] # TODO: does this always work?
+
 class Sigmoid(Layer): # aka Logistic
    def F(self, X):
        self.sig = sigmoid(X)
@ -387,6 +404,25 @@ class GeluApprox(Layer):
    def dF(self, dY):
        return dY * self.sig * (1 + self.a * (1 - self.sig))

+class Softmax(Layer):
+    # lifted from theano
+
+    def __init__(self, axis=-1):
+        super().__init__()
+        self.axis = int(axis)
+
+    def F(self, X):
+        alpha = np.max(X, axis=-1, keepdims=True)
+        num = np.exp(X - alpha)
+        den = np.sum(num, axis=-1, keepdims=True)
+        self.sm = num / den
+        return self.sm
+
+    def dF(self, dY):
+        dYsm = dY * self.sm
+        dX = dYsm - np.sum(dYsm, axis=-1, keepdims=True) * self.sm
+        return dX
+
 # Parametric Layers {{{1

 class Dense(Layer):
@ -560,17 +596,16 @@ class Ritual: # i'm just making up names at this point
    def reset(self):
        self.learner.reset(optim=True)

-    def measure(self, residual):
-        return self.mloss.mean(residual)
+    def measure(self, p, y):
+        return self.mloss.F(p, y)

-    def derive(self, residual):
-        return self.loss.dmean(residual)
+    def derive(self, p, y):
+        return self.loss.dF(p, y)

    def learn(self, inputs, outputs):
        predicted = self.model.forward(inputs)
-        residual = predicted - outputs
-        self.model.backward(self.derive(residual))
-        return residual
+        self.model.backward(self.derive(predicted, outputs))
+        return predicted

    def update(self):
        self.learner.optim.update(self.model.dW, self.model.W)
@ -585,6 +620,8 @@ class Ritual: # i'm just making up names at this point
        cumsum_loss = _0
        batch_count = inputs.shape[0] // batch_size
        losses = []
+        assert inputs.shape[0] % batch_size == 0, \
+          "inputs is not evenly divisible by batch_size" # TODO: lift this restriction
        for b in range(batch_count):
            self.bn += 1
            bi = b * batch_size
@ -594,10 +631,10 @@ class Ritual: # i'm just making up names at this point
            if self.learner.per_batch:
                self.learner.batch(b / batch_count)

-            residual = self.learn(batch_inputs, batch_outputs)
+            predicted = self.learn(batch_inputs, batch_outputs)
            self.update()

-            batch_loss = self.measure(residual)
+            batch_loss = self.measure(predicted, batch_outputs)
            if np.isnan(batch_loss):
                raise Exception("nan")
            cumsum_loss += batch_loss