From 9138f73141d5daf01bfcfad2477faa1fac4f6f19 Mon Sep 17 00:00:00 2001
From: Connor Olding <cloningdonor@gmail.com>
Date: Thu, 3 Aug 2017 03:35:02 +0000
Subject: [PATCH] update mnist training

crank up the learning rate on emnist and use momentum with gradient clipping.
add a simple restart callback.
remove batch size adapation crap.
remove confidence measures.
---
 onn_mnist.py | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/onn_mnist.py b/onn_mnist.py
index 15e094e..0aeac1b 100755
--- a/onn_mnist.py
+++ b/onn_mnist.py
@@ -7,16 +7,16 @@ from dotmap import DotMap
 lower_priority()
 np.random.seed(42069)
 
-use_emnist = False
-
 measure_every_epoch = True
 
+target_boost = lambda y: y
+
+use_emnist = False
 if use_emnist:
-    lr = 0.005
+    lr = 1.0
     epochs = 48
     starts = 2
     bs = 400
-    lr *= np.sqrt(bs)
 
     learner_class = SGDR
     restart_decay = 0.5
@@ -28,7 +28,8 @@ if use_emnist:
     output_activation = Softmax
     normalize = True
 
-    optim = Adam()
+    optim = MomentumClip(mu=0.7, nesterov=True)
+    restart_optim = False
 
     reg =       None # L1L2(2.0e-5, 1.0e-4)
     final_reg = None # L1L2(2.0e-5, 1.0e-4)
@@ -44,11 +45,10 @@ if use_emnist:
     mnist_classes = 47
 
 else:
-    lr = 0.0005
+    lr = 0.01
     epochs = 60
     starts = 3
     bs = 500
-    lr *= np.sqrt(bs)
 
     learner_class = SGDR
     restart_decay = 0.5
@@ -61,6 +61,7 @@ else:
     normalize = True
 
     optim = MomentumClip(0.8, 0.8)
+    restart_optim = False
 
     reg       = None # L1L2(1e-6, 1e-5) # L1L2(3.2e-5, 3.2e-4)
     final_reg = None # L1L2(1e-6, 1e-5) # L1L2(3.2e-5, 1e-3)
@@ -146,20 +147,28 @@ y = y.feed(output_activation())
 
 model = Model(x, y, unsafe=True)
 
+def rscb(restart):
+    log("restarting", restart)
+    if restart_optim:
+        optim.reset()
+
 if learner_class == SGDR:
     learner = learner_class(optim, epochs=epochs//starts, rate=lr,
                             restarts=starts-1, restart_decay=restart_decay,
-                            expando=lambda i:0)
+                            expando=lambda i:0,
+                            callback=rscb)
 elif learner_class in (TriangularCLR, SineCLR, WaveCLR):
     learner = learner_class(optim, epochs=epochs, lower_rate=0, upper_rate=lr,
-                            frequency=epochs//starts)
+                            frequency=epochs//starts,
+                            callback=rscb)
 elif learner_class is AnnealingLearner:
     learner = learner_class(optim, epochs=epochs, rate=lr,
                             halve_every=epochs//starts)
 elif learner_class is DumbLearner:
     learner = learner_class(self, optim, epochs=epochs//starts, rate=lr,
                             halve_every=epochs//(2*starts),
-                            restarts=starts-1, restart_advance=epochs//starts)
+                            restarts=starts-1, restart_advance=epochs//starts,
+                            callback=rscb)
 elif learner_class is Learner:
     learner = Learner(optim, epochs=epochs, rate=lr)
 else:
@@ -186,8 +195,6 @@ logs = DotMap(
     train_mlosses = [],
     valid_losses = [],
     valid_mlosses = [],
-    #train_confid = [],
-    #valid_confid = [],
     learning_rate = [],
     momentum = [],
 )
@@ -196,25 +203,18 @@ def measure_error(quiet=False):
     def print_error(name, inputs, outputs, comparison=None):
         loss, mloss, _, _ = ritual.test_batched(inputs, outputs, bs, return_losses='both')
 
-        c = Confidence()
-        predicted = ritual.model.forward(inputs, deterministic=True)
-        confid = c.forward(predicted)
-
         if not quiet:
             log(name + " loss", "{:12.6e}".format(loss))
             log(name + " accuracy", "{:6.2f}%".format(mloss * 100))
-            log(name + " confidence", "{:6.2f}%".format(confid * 100))
 
-        return loss, mloss, confid
+        return loss, mloss
 
-    loss, mloss, confid = print_error("train", inputs, outputs)
+    loss, mloss = print_error("train", inputs, outputs)
     logs.train_losses.append(loss)
     logs.train_mlosses.append(mloss)
-    #logs.train_confid.append(confid)
-    loss, mloss, confid = print_error("valid", valid_inputs, valid_outputs)
+    loss, mloss = print_error("valid", valid_inputs, valid_outputs)
     logs.valid_losses.append(loss)
     logs.valid_mlosses.append(mloss)
-    #logs.valid_confid.append(confid)
 
 measure_error()