various parameter tweaks

2017-06-17 16:46:39 +00:00 · 2017-06-17 16:46:39 +00:00 · cf1b7c1c13
commit cf1b7c1c13
parent e5cea3f847
2 changed files with 18 additions and 15 deletions
--- a/optim_nn.py
+++ b/optim_nn.py
@ -764,7 +764,7 @@ def run(program, args=None):
        # style of resnet (order of layers, which layers, etc.)
        parallel_style = 'onelesssum',
-        activation = 'lecun',
+        activation = 'selu',
        optim = 'adam', # note: most features only implemented for Adam
        optim_decay1 = 24,  #  first momentum given in epochs (optional)
@ -774,20 +774,20 @@ def run(program, args=None):
        # learning parameters
        learner = 'sgdr',
-        learn = 1e-2,
+        learn = 0.00125,
        epochs = 24,
        learn_halve_every = 16, # only used with anneal/dumb
-        restarts = 5,
+        restarts = 4,
        restart_decay = 0.25, # only used with SGDR
        expando = lambda i: 24 * i,
        # misc
-        init = 'glorot_uniform',
+        init = 'gaussian_unit',
        loss = 'mse',
        mloss = 'mse',
        ritual = 'default',
        restart_optim = False, # restarts also reset internal state of optimizer
-        warmup = True, # train a couple epochs on gaussian noise and reset
+        warmup = False, # train a couple epochs on gaussian noise and reset
        # logging/output
        log10_loss = True, # personally, i'm sick of looking linear loss values!
@ -811,6 +811,8 @@ def run(program, args=None):
              'init', 'loss', 'mloss', 'ritual']:
        config[k] = config[k].lower()
    config.learn *= np.sqrt(config.batch_size)
    config.pprint()
    # Toy Data {{{2
--- a/optim_nn_mnist.py
+++ b/optim_nn_mnist.py
@ -5,27 +5,27 @@ from optim_nn_core import _f
 #np.random.seed(42069)
-use_emnist = False
+use_emnist = True
 measure_every_epoch = True
 if use_emnist:
-    lr = 0.01
+    lr = 0.0005
    epochs = 48
    starts = 2
-    bs = 200
+    bs = 400
    learner_class = SGDR
    restart_decay = 0.5
-    n_dense = 0
+    n_dense = 2
-    n_denses = 2
+    n_denses = 0
    new_dims = (28, 28)
    activation = GeluApprox
-    reg = None
+    reg = L1L2(3.2e-5, 3.2e-4)
-    final_reg = None
+    final_reg = L1L2(3.2e-5, 1e-3)
-    dropout = None
+    dropout = 0.05
    actreg_lamb = None
    load_fn = None
@ -37,7 +37,7 @@ if use_emnist:
    mnist_classes = 47
 else:
-    lr = 0.01
+    lr = 0.0005
    epochs = 60
    starts = 3
    bs = 500
@ -129,6 +129,8 @@ y = y.feed(Softmax())
 model = Model(x, y, unsafe=True)
 lr *= np.sqrt(bs)
 optim = Adam()
 if learner_class == SGDR:
    learner = learner_class(optim, epochs=epochs//starts, rate=lr,
@ -176,7 +178,6 @@ def measure_error(quiet=False):
        return loss, mloss, confid
    #if not quiet:
    loss, mloss, confid = print_error("train", inputs, outputs)
    train_losses.append(loss)
    train_mlosses.append(mloss)