various parameter tweaks
This commit is contained in:
parent
e5cea3f847
commit
cf1b7c1c13
2 changed files with 18 additions and 15 deletions
12
optim_nn.py
12
optim_nn.py
|
@ -764,7 +764,7 @@ def run(program, args=None):
|
||||||
|
|
||||||
# style of resnet (order of layers, which layers, etc.)
|
# style of resnet (order of layers, which layers, etc.)
|
||||||
parallel_style = 'onelesssum',
|
parallel_style = 'onelesssum',
|
||||||
activation = 'lecun',
|
activation = 'selu',
|
||||||
|
|
||||||
optim = 'adam', # note: most features only implemented for Adam
|
optim = 'adam', # note: most features only implemented for Adam
|
||||||
optim_decay1 = 24, # first momentum given in epochs (optional)
|
optim_decay1 = 24, # first momentum given in epochs (optional)
|
||||||
|
@ -774,20 +774,20 @@ def run(program, args=None):
|
||||||
|
|
||||||
# learning parameters
|
# learning parameters
|
||||||
learner = 'sgdr',
|
learner = 'sgdr',
|
||||||
learn = 1e-2,
|
learn = 0.00125,
|
||||||
epochs = 24,
|
epochs = 24,
|
||||||
learn_halve_every = 16, # only used with anneal/dumb
|
learn_halve_every = 16, # only used with anneal/dumb
|
||||||
restarts = 5,
|
restarts = 4,
|
||||||
restart_decay = 0.25, # only used with SGDR
|
restart_decay = 0.25, # only used with SGDR
|
||||||
expando = lambda i: 24 * i,
|
expando = lambda i: 24 * i,
|
||||||
|
|
||||||
# misc
|
# misc
|
||||||
init = 'glorot_uniform',
|
init = 'gaussian_unit',
|
||||||
loss = 'mse',
|
loss = 'mse',
|
||||||
mloss = 'mse',
|
mloss = 'mse',
|
||||||
ritual = 'default',
|
ritual = 'default',
|
||||||
restart_optim = False, # restarts also reset internal state of optimizer
|
restart_optim = False, # restarts also reset internal state of optimizer
|
||||||
warmup = True, # train a couple epochs on gaussian noise and reset
|
warmup = False, # train a couple epochs on gaussian noise and reset
|
||||||
|
|
||||||
# logging/output
|
# logging/output
|
||||||
log10_loss = True, # personally, i'm sick of looking linear loss values!
|
log10_loss = True, # personally, i'm sick of looking linear loss values!
|
||||||
|
@ -811,6 +811,8 @@ def run(program, args=None):
|
||||||
'init', 'loss', 'mloss', 'ritual']:
|
'init', 'loss', 'mloss', 'ritual']:
|
||||||
config[k] = config[k].lower()
|
config[k] = config[k].lower()
|
||||||
|
|
||||||
|
config.learn *= np.sqrt(config.batch_size)
|
||||||
|
|
||||||
config.pprint()
|
config.pprint()
|
||||||
|
|
||||||
# Toy Data {{{2
|
# Toy Data {{{2
|
||||||
|
|
|
@ -5,27 +5,27 @@ from optim_nn_core import _f
|
||||||
|
|
||||||
#np.random.seed(42069)
|
#np.random.seed(42069)
|
||||||
|
|
||||||
use_emnist = False
|
use_emnist = True
|
||||||
|
|
||||||
measure_every_epoch = True
|
measure_every_epoch = True
|
||||||
|
|
||||||
if use_emnist:
|
if use_emnist:
|
||||||
lr = 0.01
|
lr = 0.0005
|
||||||
epochs = 48
|
epochs = 48
|
||||||
starts = 2
|
starts = 2
|
||||||
bs = 200
|
bs = 400
|
||||||
|
|
||||||
learner_class = SGDR
|
learner_class = SGDR
|
||||||
restart_decay = 0.5
|
restart_decay = 0.5
|
||||||
|
|
||||||
n_dense = 0
|
n_dense = 2
|
||||||
n_denses = 2
|
n_denses = 0
|
||||||
new_dims = (28, 28)
|
new_dims = (28, 28)
|
||||||
activation = GeluApprox
|
activation = GeluApprox
|
||||||
|
|
||||||
reg = None
|
reg = L1L2(3.2e-5, 3.2e-4)
|
||||||
final_reg = None
|
final_reg = L1L2(3.2e-5, 1e-3)
|
||||||
dropout = None
|
dropout = 0.05
|
||||||
actreg_lamb = None
|
actreg_lamb = None
|
||||||
|
|
||||||
load_fn = None
|
load_fn = None
|
||||||
|
@ -37,7 +37,7 @@ if use_emnist:
|
||||||
mnist_classes = 47
|
mnist_classes = 47
|
||||||
|
|
||||||
else:
|
else:
|
||||||
lr = 0.01
|
lr = 0.0005
|
||||||
epochs = 60
|
epochs = 60
|
||||||
starts = 3
|
starts = 3
|
||||||
bs = 500
|
bs = 500
|
||||||
|
@ -129,6 +129,8 @@ y = y.feed(Softmax())
|
||||||
|
|
||||||
model = Model(x, y, unsafe=True)
|
model = Model(x, y, unsafe=True)
|
||||||
|
|
||||||
|
lr *= np.sqrt(bs)
|
||||||
|
|
||||||
optim = Adam()
|
optim = Adam()
|
||||||
if learner_class == SGDR:
|
if learner_class == SGDR:
|
||||||
learner = learner_class(optim, epochs=epochs//starts, rate=lr,
|
learner = learner_class(optim, epochs=epochs//starts, rate=lr,
|
||||||
|
@ -176,7 +178,6 @@ def measure_error(quiet=False):
|
||||||
|
|
||||||
return loss, mloss, confid
|
return loss, mloss, confid
|
||||||
|
|
||||||
#if not quiet:
|
|
||||||
loss, mloss, confid = print_error("train", inputs, outputs)
|
loss, mloss, confid = print_error("train", inputs, outputs)
|
||||||
train_losses.append(loss)
|
train_losses.append(loss)
|
||||||
train_mlosses.append(mloss)
|
train_mlosses.append(mloss)
|
||||||
|
|
Loading…
Reference in a new issue