.

2017-01-10 14:33:12 -08:00 · 2017-01-10 14:33:12 -08:00 · cf9010d25f
commit cf9010d25f
parent 5b2a293b8c
1 changed files with 24 additions and 22 deletions
--- a/optim_nn.py
+++ b/optim_nn.py
@ -195,12 +195,10 @@ class Layer:
    def forward(self, lut):
        assert len(self.parents) > 0, self
        #print("      forwarding", self)
        B = []
        for parent in self.parents:
            # TODO: skip over irrelevant nodes (if any)
            X = lut[parent]
            #print("collected parent", parent)
            self.validate_input(X)
            B.append(X)
        Y = self.multi(B)
@ -209,12 +207,10 @@ class Layer:
    def backward(self, lut):
        assert len(self.children) > 0, self
        #print("     backwarding", self)
        dB = []
        for child in self.children:
            # TODO: skip over irrelevant nodes (if any)
            dY = lut[child]
            #print(" collected child", child)
            self.validate_output(dY)
            dB.append(dY)
        dX = self.dmulti(dB)
@ -346,11 +342,7 @@ class Model:
        assert isinstance(y, Layer), y
        self.x = x
        self.y = y
        self.ordered_nodes = self.traverse([], self.y)
        node_names = ' '.join([str(node) for node in self.ordered_nodes])
        print('{} nodes: {}'.format(len(self.ordered_nodes), node_names))
        self.make_weights()
    def make_weights(self):
@ -358,7 +350,6 @@ class Model:
        for node in self.ordered_nodes:
            if node.size is not None:
                self.param_count += node.size
        print(self.param_count)
        self.W  = np.zeros(self.param_count, dtype=nf)
        self.dW = np.zeros(self.param_count, dtype=nf)
@ -369,8 +360,6 @@ class Model:
                node.init(self.W[offset:end], self.dW[offset:end])
                offset += node.size
        #print(self.W, self.dW)
    def traverse(self, nodes, node):
        if node == x:
            return [node]
@ -425,6 +414,11 @@ class Model:
        raise NotImplementedError("unimplemented", self)
 if __name__ == '__main__':
    import sys
    lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs)
    def log(left, right):
        lament("{:>20}:   {}".format(left, right))
    # Config
    from dotmap import DotMap
@ -446,7 +440,7 @@ if __name__ == '__main__':
        nesterov = False, # only used with SGD or Adam
        momentum = 0.33, # only used with SGD
-        # learning parameters: SGD with restarts
+        # learning parameters: SGD with restarts (kinda)
        LR = 1e-2,
        epochs = 6,
        LR_halve_every = 2,
@ -510,6 +504,10 @@ if __name__ == '__main__':
    model = Model(x, y)
    node_names = ' '.join([str(node) for node in model.ordered_nodes])
    log('{} nodes'.format(len(model.ordered_nodes)), node_names)
    log('parameters', model.param_count)
    training = config.epochs > 0 and config.restarts >= 0
    if not training:
@ -536,26 +534,31 @@ if __name__ == '__main__':
    LR = config.LR
    LRprod = 0.5**(1/config.LR_halve_every)
    LRE = LR * (LRprod**config.LR_restart_advance)**config.restarts * LRprod**(config.epochs - 1)
    log("final learning rate", "{:10.8f}".format(LRE))
    # Training
    def measure_loss():
        predicted = model.forward(inputs / x_scale)
        residual = predicted - outputs / y_scale
        err = loss.mean(residual)
-        print("train loss: {:11.7f}".format(err))
+        log("train loss", "{:11.7f}".format(err))
-        print("improvement: {:+7.2f}%".format((0.0007031 / err - 1) * 100))
+        log("improvement", "{:+7.2f}%".format((0.0007031 / err - 1) * 100))
        predicted = model.forward(valid_inputs / x_scale)
        residual = predicted - valid_outputs / y_scale
        err = loss.mean(residual)
-        print("valid loss: {:11.7f}".format(err))
+        log("valid loss", "{:11.7f}".format(err))
-        print("improvement: {:+7.2f}%".format((0.0007159 / err - 1) * 100))
+        log("improvement", "{:+7.2f}%".format((0.0007159 / err - 1) * 100))
    for i in range(config.restarts + 1):
        measure_loss()
        if i > 0:
-            print("restarting")
+            log("restarting", i)
            LR *= LRprod**config.LR_restart_advance
            #optim.reset()
        assert inputs.shape[0] % config.batch_size == 0, \
               "inputs is not evenly divisible by batch_size" # TODO: lift this restriction
@ -567,6 +570,7 @@ if __name__ == '__main__':
            shuffled_outputs = outputs[indices] / y_scale
            optim.alpha = LR * LRprod**e
            #log("learning rate", "{:10.8f}".format(optim.alpha))
            cumsum_loss = 0
            for b in range(batch_count):
@ -581,9 +585,7 @@ if __name__ == '__main__':
                # note: we don't actually need this for training, only monitoring.
                cumsum_loss += loss.mean(residual)
-            print("avg loss: {:10.6f}".format(cumsum_loss / batch_count))
+            log("average loss", "{:11.7f}".format(cumsum_loss / batch_count))
        LR *= LRprod**config.LR_restart_advance
    measure_loss()
@ -596,5 +598,5 @@ if __name__ == '__main__':
    b = (64, 128, 192)
    X = np.expand_dims(np.hstack((a, b)), 0) / x_scale
    P = model.forward(X) * y_scale
-    print("truth:", rgbcompare(a, b))
+    log("truth", rgbcompare(a, b))
-    print("network:", np.squeeze(P))
+    log("network", np.squeeze(P))