optim/optim_nn.py

597 lines
20 KiB
Python
Raw Normal View History

2017-01-09 03:37:35 -08:00
#!/usr/bin/env python3
2017-02-13 17:53:10 -08:00
# BIG TODO: ensure numpy isn't upcasting to float64 *anywhere*.
# this is gonna take some work.
2017-02-08 22:43:41 -08:00
# external packages required for full functionality:
# numpy scipy h5py sklearn dotmap
2017-02-12 17:29:52 -08:00
from optim_nn_core import *
2017-02-14 13:02:30 -08:00
from optim_nn_core import _check, _f
2017-01-09 03:37:35 -08:00
2017-02-12 17:29:52 -08:00
import sys
lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs)
2017-01-12 08:04:42 -08:00
2017-02-12 17:29:52 -08:00
def log(left, right):
lament("{:>20}: {}".format(left, right))
2017-01-09 03:37:35 -08:00
2017-02-12 17:41:47 -08:00
class Dummy:
pass
2017-02-12 17:29:52 -08:00
# Loss functions {{{1
2017-01-10 13:21:26 -08:00
2017-02-15 20:18:53 -08:00
class SquaredHalved(ResidualLoss):
2017-01-09 03:37:35 -08:00
def f(self, r):
return np.square(r) / 2
def df(self, r):
return r
2017-02-15 20:18:53 -08:00
class SomethingElse(ResidualLoss):
2017-02-13 17:53:10 -08:00
# generalizes Absolute and SquaredHalved
2017-01-12 08:04:42 -08:00
# plot: https://www.desmos.com/calculator/fagjg9vuz7
def __init__(self, a=4/3):
assert 1 <= a <= 2, "parameter out of range"
2017-02-14 13:02:30 -08:00
self.a = _f(a / 2)
self.b = _f(2 / a)
self.c = _f(2 / a - 1)
2017-01-12 08:04:42 -08:00
def f(self, r):
return self.a * np.abs(r)**self.b
def df(self, r):
return np.sign(r) * np.abs(r)**self.c
2017-02-15 20:18:53 -08:00
# Nonparametric Layers {{{1
2017-02-12 17:29:52 -08:00
# Parametric Layers {{{1
2017-02-01 22:03:12 -08:00
2017-02-08 18:10:25 -08:00
class LayerNorm(Layer):
# paper: https://arxiv.org/abs/1607.06450
# my implementation may be incorrect.
2017-02-01 22:03:12 -08:00
def __init__(self, eps=1e-3, axis=-1):
super().__init__()
2017-02-14 13:02:30 -08:00
self.eps = _f(eps)
2017-02-01 22:03:12 -08:00
self.axis = int(axis)
def F(self, X):
self.center = X - np.mean(X, axis=self.axis, keepdims=True)
#self.var = np.var(X, axis=self.axis, keepdims=True) + self.eps
self.var = np.mean(np.square(self.center), axis=self.axis, keepdims=True) + self.eps
self.std = np.sqrt(self.var) + self.eps
Y = self.center / self.std
return Y
def dF(self, dY):
length = self.input_shape[self.axis]
dstd = dY * (-self.center / self.var)
dvar = dstd * (0.5 / self.std)
dcenter2 = dvar * (1 / length)
dcenter = dY * (1 / self.std)
dcenter += dcenter2 * (2 * self.center)
dX = dcenter - dcenter / length
2017-01-09 03:37:35 -08:00
return dX
2017-02-12 17:29:52 -08:00
# Rituals {{{1
2017-01-12 14:45:07 -08:00
2017-02-03 22:15:54 -08:00
def stochastic_multiply(W, gamma=0.5, allow_negation=True):
# paper: https://arxiv.org/abs/1606.01981
2017-02-08 18:10:25 -08:00
2017-02-03 22:15:54 -08:00
assert W.ndim == 1, W.ndim
assert 0 < gamma < 1, gamma
size = len(W)
alpha = np.max(np.abs(W))
# NOTE: numpy gives [low, high) but the paper advocates [low, high]
mult = np.random.uniform(gamma, 1/gamma, size=size)
if allow_negation: # TODO: verify this is correct. seems to wreak havok.
prob = (W / alpha + 1) / 2
samples = np.random.random_sample(size=size)
mult *= np.where(samples < prob, 1, -1)
np.multiply(W, mult, out=W)
class StochMRitual(Ritual):
# paper: https://arxiv.org/abs/1606.01981
# this probably doesn't make sense for regression problems,
# let alone small models, but here it is anyway!
def __init__(self, learner=None, loss=None, mloss=None, gamma=0.5):
super().__init__(learner, loss, mloss)
2017-02-14 13:02:30 -08:00
self.gamma = _f(gamma)
2017-02-03 22:15:54 -08:00
def prepare(self, model):
self.W = np.copy(model.W)
super().prepare(model)
def learn(self, inputs, outputs):
# an experiment:
#assert self.learner.rate < 10, self.learner.rate
#self.gamma = 1 - 1/2**(1 - np.log10(self.learner.rate))
self.W[:] = self.model.W
for layer in self.model.ordered_nodes:
if isinstance(layer, Dense):
stochastic_multiply(layer.coeffs.ravel(), gamma=self.gamma,
allow_negation=True)
residual = super().learn(inputs, outputs)
self.model.W[:] = self.W
return residual
def update(self):
super().update()
f = 0.5
for layer in self.model.ordered_nodes:
if isinstance(layer, Dense):
np.clip(layer.W, -layer.std * f, layer.std * f, out=layer.W)
2017-02-08 18:10:25 -08:00
# np.clip(layer.W, -1, 1, out=layer.W)
class NoisyRitual(Ritual):
def __init__(self, learner=None, loss=None, mloss=None,
input_noise=0, output_noise=0, gradient_noise=0):
2017-02-14 13:02:30 -08:00
self.input_noise = _f(input_noise)
self.output_noise = _f(output_noise)
self.gradient_noise = _f(gradient_noise)
2017-02-08 18:10:25 -08:00
super().__init__(learner, loss, mloss)
2017-02-08 22:43:41 -08:00
def learn(self, inputs, outputs):
# this is pretty crude
s = self.input_noise
noisy_inputs = inputs + np.random.normal(0, s, size=inputs.shape)
s = self.output_noise
noisy_outputs = outputs + np.random.normal(0, s, size=outputs.shape)
return super().learn(noisy_inputs, noisy_outputs)
2017-02-08 18:10:25 -08:00
def update(self):
# gradient noise paper: https://arxiv.org/abs/1511.06807
if self.gradient_noise > 0:
size = len(self.model.dW)
gamma = 0.55
s = self.gradient_noise / (1 + self.bn) ** gamma
# experiments:
#s = np.sqrt(self.learner.rate)
#s = np.square(self.learner.rate)
#s = self.learner.rate / self.en
self.model.dW += np.random.normal(0, s, size=size)
super().update()
2017-02-03 22:15:54 -08:00
2017-02-12 17:29:52 -08:00
# Learners {{{1
2017-02-01 22:21:25 -08:00
class DumbLearner(AnnealingLearner):
# this is my own awful contraption. it's not really "SGD with restarts".
def __init__(self, optim, epochs=100, rate=None, halve_every=10, restarts=0, restart_advance=20, callback=None):
self.restart_epochs = int(epochs)
self.restarts = int(restarts)
self.restart_advance = float(restart_advance)
self.restart_callback = callback
epochs = self.restart_epochs * (self.restarts + 1)
super().__init__(optim, epochs, rate, halve_every)
def rate_at(self, epoch):
sub_epoch = epoch % self.restart_epochs
restart = epoch // self.restart_epochs
return super().rate_at(sub_epoch) * (self.anneal**self.restart_advance)**restart
def next(self):
if not super().next():
return False
sub_epoch = self.epoch % self.restart_epochs
restart = self.epoch // self.restart_epochs
if restart > 0 and sub_epoch == 0:
if self.restart_callback is not None:
self.restart_callback(restart)
return True
2017-01-13 03:29:19 -08:00
def multiresnet(x, width, depth, block=2, multi=1,
activation=Relu, style='batchless',
init=init_he_normal):
2017-01-12 08:04:42 -08:00
y = x
last_size = x.output_shape[0]
2017-02-01 22:03:12 -08:00
FC = lambda size: Dense(size, init)
#FC = lambda size: DenseOneLess(size, init)
2017-01-12 08:04:42 -08:00
for d in range(depth):
size = width
if last_size != size:
2017-01-13 03:29:19 -08:00
y = y.feed(Dense(size, init))
2017-01-12 08:04:42 -08:00
if style == 'batchless':
skip = y
merger = Sum()
skip.feed(merger)
z_start = skip.feed(activation())
for i in range(multi):
z = z_start
for i in range(block):
if i > 0:
z = z.feed(activation())
2017-02-01 22:03:12 -08:00
z = z.feed(FC(size))
2017-01-12 08:04:42 -08:00
z.feed(merger)
y = merger
elif style == 'onelesssum':
2017-02-08 18:10:25 -08:00
# this is my own awful contraption.
2017-01-12 08:04:42 -08:00
is_last = d + 1 == depth
needs_sum = not is_last or multi > 1
skip = y
if needs_sum:
merger = Sum()
if not is_last:
skip.feed(merger)
z_start = skip.feed(activation())
for i in range(multi):
z = z_start
for i in range(block):
if i > 0:
z = z.feed(activation())
2017-02-01 22:03:12 -08:00
z = z.feed(FC(size))
2017-01-12 08:04:42 -08:00
if needs_sum:
z.feed(merger)
if needs_sum:
y = merger
else:
y = z
else:
raise Exception('unknown resnet style', style)
last_size = size
return y
2017-02-15 20:18:53 -08:00
# Toy Data {{{1
2017-02-12 17:29:52 -08:00
2017-01-13 03:29:19 -08:00
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform)
activations = dict(sigmoid=Sigmoid, tanh=Tanh, relu=Relu, elu=Elu, gelu=GeluApprox)
2017-02-08 22:43:41 -08:00
def normalize_data(data, mean=None, std=None):
# in-place
if mean is None or std is None:
mean = np.mean(data, axis=0)
std = np.std(data, axis=0)
# TODO: construct function call string for copy-paste convenience
2017-02-15 10:43:57 -08:00
lament('mean:', mean)
lament('std: ', std)
2017-02-08 22:43:41 -08:00
sys.exit(1)
2017-02-15 10:43:57 -08:00
data -= _f(mean)
data /= _f(std)
2017-02-08 22:43:41 -08:00
def toy_data(train_samples, valid_samples, problem=2):
total_samples = train_samples + valid_samples
2017-02-15 10:43:57 -08:00
if problem == 0:
from ml.cie_mlp_data import rgbcompare, input_samples, output_samples, \
inputs, outputs, valid_inputs, valid_outputs, \
x_scale, y_scale
inputs, outputs = _f(inputs), _f(outputs)
valid_inputs, valid_outputs = _f(valid_inputs), _f(valid_outputs)
normalize_data(inputs, 127.5, 73.9)
normalize_data(outputs, 44.8, 21.7)
normalize_data(valid_inputs, 127.5, 73.9)
normalize_data(valid_outputs, 44.8, 21.7)
elif problem == 1:
2017-02-08 22:43:41 -08:00
from sklearn.datasets import make_friedman1
inputs, outputs = make_friedman1(total_samples)
2017-02-14 13:02:30 -08:00
inputs, outputs = _f(inputs), _f(outputs)
2017-02-08 22:43:41 -08:00
outputs = np.expand_dims(outputs, -1)
2017-02-15 10:43:57 -08:00
normalize_data(inputs, 0.5, 1/np.sqrt(12))
normalize_data(outputs, 14.4, 4.9)
2017-02-08 22:43:41 -08:00
elif problem == 2:
from sklearn.datasets import make_friedman2
inputs, outputs = make_friedman2(total_samples)
2017-02-14 13:02:30 -08:00
inputs, outputs = _f(inputs), _f(outputs)
2017-02-08 22:43:41 -08:00
outputs = np.expand_dims(outputs, -1)
normalize_data(inputs,
[5.00e+01, 9.45e+02, 5.01e-01, 5.98e+00],
[2.89e+01, 4.72e+02, 2.89e-01, 2.87e+00])
2017-02-15 10:43:57 -08:00
normalize_data(outputs, [482], [380])
2017-02-08 22:43:41 -08:00
elif problem == 3:
from sklearn.datasets import make_friedman3
inputs, outputs = make_friedman3(total_samples)
2017-02-14 13:02:30 -08:00
inputs, outputs = _f(inputs), _f(outputs)
2017-02-08 22:43:41 -08:00
outputs = np.expand_dims(outputs, -1)
normalize_data(inputs,
[4.98e+01, 9.45e+02, 4.99e-01, 6.02e+00],
[2.88e+01, 4.73e+02, 2.90e-01, 2.87e+00])
2017-02-15 10:43:57 -08:00
normalize_data(outputs, [1.32327931], [0.31776295])
2017-02-08 22:43:41 -08:00
else:
raise Exception("unknown toy data set", problem)
2017-02-15 10:43:57 -08:00
if problem != 0:
# split off a validation set
indices = np.arange(inputs.shape[0])
np.random.shuffle(indices)
valid_inputs = inputs[indices][-valid_samples:]
valid_outputs = outputs[indices][-valid_samples:]
inputs = inputs[indices][:-valid_samples]
outputs = outputs[indices][:-valid_samples]
2017-02-08 22:43:41 -08:00
return (inputs, outputs), (valid_inputs, valid_outputs)
2017-02-15 20:18:53 -08:00
# Model Creation {{{1
2017-02-12 17:41:47 -08:00
def model_from_config(config, input_features, output_features, callbacks):
2017-01-09 03:37:35 -08:00
# Our Test Model
2017-01-13 03:29:19 -08:00
init = inits[config.init]
activation = activations[config.activation]
2017-02-08 22:43:41 -08:00
x = Input(shape=(input_features,))
2017-01-09 03:37:35 -08:00
y = x
2017-01-12 08:04:42 -08:00
y = multiresnet(y,
config.res_width, config.res_depth,
config.res_block, config.res_multi,
2017-02-01 22:03:12 -08:00
activation=activation, init=init,
style=config.parallel_style)
2017-02-08 22:43:41 -08:00
if y.output_shape[0] != output_features:
y = y.feed(Dense(output_features, init))
2017-01-09 03:37:35 -08:00
2017-01-11 02:20:49 -08:00
model = Model(x, y, unsafe=config.unsafe)
2017-01-09 03:37:35 -08:00
2017-02-01 22:21:25 -08:00
#
2017-02-15 20:18:53 -08:00
# FIXME: unused variable
2017-01-09 03:37:35 -08:00
training = config.epochs > 0 and config.restarts >= 0
2017-01-13 03:29:19 -08:00
if config.fn_load is not None:
log('loading weights', config.fn_load)
model.load_weights(config.fn_load)
2017-01-09 03:37:35 -08:00
2017-02-01 22:21:25 -08:00
#
2017-01-10 04:27:49 -08:00
if config.optim == 'adam':
assert not config.nesterov, "unimplemented"
2017-02-13 17:53:10 -08:00
d1 = config.optim_decay1 if 'optim_decay1' in config else 9.5
d2 = config.optim_decay2 if 'optim_decay2' in config else 999.5
b1 = np.exp(-1/d1)
b2 = np.exp(-1/d2)
optim = Adam(b1=b1, b1_t=b1, b2=b2, b2_t=b2)
elif config.optim in ('rms', 'rmsprop'):
d2 = config.optim_decay2 if 'optim_decay2' in config else 99.5
mu = np.exp(-1/d2)
optim = RMSprop(mu=mu)
2017-01-10 04:27:49 -08:00
elif config.optim == 'sgd':
if config.momentum != 0:
optim = Momentum(mu=config.momentum, nesterov=config.nesterov)
else:
optim = Optimizer()
else:
raise Exception('unknown optimizer', config.optim)
2017-01-09 03:37:35 -08:00
2017-02-01 22:21:25 -08:00
def rscb(restart):
2017-02-12 17:41:47 -08:00
callbacks.restart()
2017-02-01 22:21:25 -08:00
log("restarting", restart)
if config.restart_optim:
optim.reset()
#
2017-02-08 18:10:25 -08:00
if config.learner == 'sgdr':
expando = config.expando if 'expando' in config else None
2017-02-02 14:25:40 -08:00
learner = SGDR(optim, epochs=config.epochs, rate=config.learn,
2017-02-08 18:10:25 -08:00
restart_decay=config.restart_decay, restarts=config.restarts,
callback=rscb, expando=expando)
2017-02-02 14:25:40 -08:00
# final learning rate isn't of interest here; it's gonna be close to 0.
2017-02-08 22:43:41 -08:00
log('total epochs', learner.epochs)
2017-02-08 18:10:25 -08:00
elif config.learner == 'anneal':
learner = AnnealingLearner(optim, epochs=config.epochs, rate=config.learn,
halve_every=config.learn_halve_every)
elif config.learner == 'dumb':
2017-02-02 14:25:40 -08:00
learner = DumbLearner(optim, epochs=config.epochs, rate=config.learn,
halve_every=config.learn_halve_every,
restarts=config.restarts, restart_advance=config.learn_restart_advance,
callback=rscb)
log("final learning rate", "{:10.8f}".format(learner.final_rate))
2017-02-08 18:10:25 -08:00
elif config.learner == 'sgd':
learner = Learner(optim, epochs=config.epochs, rate=config.learn)
log("final learning rate", "{:10.8f}".format(learner.final_rate))
else:
raise Exception('unknown learner', config.learner)
2017-02-01 22:21:25 -08:00
#
2017-01-12 08:04:42 -08:00
def lookup_loss(maybe_name):
if isinstance(maybe_name, Loss):
return maybe_name
elif maybe_name == 'mse':
return Squared()
elif maybe_name == 'mshe': # mushy
return SquaredHalved()
2017-02-08 22:43:41 -08:00
elif maybe_name == 'mae':
return Absolute()
2017-02-03 22:15:54 -08:00
elif maybe_name == 'msee':
return SomethingElse()
2017-01-12 08:04:42 -08:00
raise Exception('unknown objective', maybe_name)
loss = lookup_loss(config.loss)
mloss = lookup_loss(config.mloss) if config.mloss else loss
2017-01-09 03:37:35 -08:00
2017-02-08 18:10:25 -08:00
if config.ritual == 'default':
2017-02-03 22:15:54 -08:00
ritual = Ritual(learner=learner, loss=loss, mloss=mloss)
elif config.ritual == 'stochm':
ritual = StochMRitual(learner=learner, loss=loss, mloss=mloss)
2017-02-08 18:10:25 -08:00
elif config.ritual == 'noisy':
ritual = NoisyRitual(learner=learner, loss=loss, mloss=mloss,
2017-02-08 22:43:41 -08:00
input_noise=1e-1, output_noise=1e-2,
gradient_noise=2e-7)
2017-02-03 22:15:54 -08:00
else:
raise Exception('unknown ritual', config.ritual)
2017-01-10 14:33:12 -08:00
2017-02-12 17:29:52 -08:00
#
return model, learner, ritual, (loss, mloss)
2017-02-15 20:18:53 -08:00
# main program {{{1
2017-02-12 17:29:52 -08:00
def run(program, args=[]):
2017-02-13 17:53:10 -08:00
np.random.seed(42069)
# Config {{{2
2017-02-12 17:29:52 -08:00
from dotmap import DotMap
config = DotMap(
fn_load = None,
fn_save = 'optim_nn.h5',
log_fn = 'losses.npz',
# multi-residual network parameters
res_width = 28,
res_depth = 2,
res_block = 3, # normally 2 for plain resnet
res_multi = 2, # normally 1 for plain resnet
# style of resnet (order of layers, which layers, etc.)
parallel_style = 'onelesssum',
activation = 'gelu',
optim = 'adam',
2017-02-13 17:53:10 -08:00
optim_decay1 = 2, # given in epochs (optional)
optim_decay2 = 100, # given in epochs (optional)
2017-02-12 17:29:52 -08:00
momentum = 0.50, # only used with SGD
2017-02-15 10:43:57 -08:00
nesterov = False, # only used with SGD or Adam
2017-02-12 17:29:52 -08:00
batch_size = 64,
# learning parameters
learner = 'sgdr',
learn = 1e-2,
epochs = 24,
restarts = 2,
restart_decay = 0.25, # only used with SGDR
expando = lambda i: i + 1,
# misc
init = 'he_normal',
2017-02-14 13:02:30 -08:00
loss = 'msee',
2017-02-12 17:29:52 -08:00
mloss = 'mse',
ritual = 'default',
restart_optim = False, # restarts also reset internal state of optimizer
problem = 3,
2017-02-15 10:43:57 -08:00
compare = (
# best results for ~10,000 parameters
# training/validation pairs for each problem (starting from problem 0):
#(5.08e-05, 6.78e-05),
(7.577717e-04, 1.255284e-03),
(3.032806e-06, 3.963775e-06),
(3.676451e-07, 4.495362e-07),
(1.854613e-05, 1.623881e-05)
),
2017-02-12 17:29:52 -08:00
unsafe = True, # aka gotta go fast mode
)
for k in ['parallel_style', 'activation', 'optim', 'learner', 'init', 'loss', 'mloss', 'ritual']:
config[k] = config[k].lower()
config.pprint()
2017-02-13 17:53:10 -08:00
# Toy Data {{{2
2017-02-12 17:29:52 -08:00
# (our model is probably complete overkill for this, so TODO: better data)
(inputs, outputs), (valid_inputs, valid_outputs) = \
toy_data(2**14, 2**11, problem=config.problem)
input_features = inputs.shape[-1]
output_features = outputs.shape[-1]
2017-02-12 17:41:47 -08:00
callbacks = Dummy()
2017-02-12 17:29:52 -08:00
model, learner, ritual, (loss, mloss) = \
2017-02-12 17:41:47 -08:00
model_from_config(config, input_features, output_features, callbacks)
2017-02-12 17:29:52 -08:00
# Model Information
if 0:
node_names = ' '.join([str(node) for node in model.ordered_nodes])
log('{} nodes'.format(len(model.ordered_nodes)), node_names)
else:
for node in model.ordered_nodes:
children = [str(n) for n in node.children]
if len(children) > 0:
sep = '->'
print(str(node)+sep+('\n'+str(node)+sep).join(children))
log('parameters', model.param_count)
2017-02-13 17:53:10 -08:00
# Training {{{2
2017-01-09 03:37:35 -08:00
2017-01-12 08:04:42 -08:00
batch_losses = []
train_losses = []
valid_losses = []
def measure_error():
def print_error(name, inputs, outputs, comparison=None):
predicted = model.forward(inputs)
2017-02-15 20:18:53 -08:00
err = ritual.measure(predicted, outputs)
2017-02-08 22:43:41 -08:00
log(name + " loss", "{:12.6e}".format(err))
# TODO: print logarithmic difference as it might be more meaningful
# (fewer results stuck around -99%)
2017-01-12 08:04:42 -08:00
if comparison:
log("improvement", "{:+7.2f}%".format((comparison / err - 1) * 100))
return err
train_err = print_error("train",
2017-02-08 22:43:41 -08:00
inputs, outputs,
2017-02-15 10:43:57 -08:00
config.compare[config.problem][0])
2017-01-12 08:04:42 -08:00
valid_err = print_error("valid",
2017-02-08 22:43:41 -08:00
valid_inputs, valid_outputs,
2017-02-15 10:43:57 -08:00
config.compare[config.problem][1])
2017-01-12 08:04:42 -08:00
train_losses.append(train_err)
valid_losses.append(valid_err)
2017-01-09 03:37:35 -08:00
2017-02-12 17:41:47 -08:00
callbacks.restart = measure_error
2017-02-01 22:21:25 -08:00
measure_error()
2017-01-09 03:37:35 -08:00
2017-02-03 22:15:54 -08:00
ritual.prepare(model)
2017-02-01 22:21:25 -08:00
while learner.next():
indices = np.arange(inputs.shape[0])
np.random.shuffle(indices)
2017-02-08 22:43:41 -08:00
shuffled_inputs = inputs[indices]
shuffled_outputs = outputs[indices]
2017-02-01 22:21:25 -08:00
2017-02-03 22:15:54 -08:00
avg_loss, losses = ritual.train_batched(
2017-02-01 22:21:25 -08:00
shuffled_inputs, shuffled_outputs,
config.batch_size,
return_losses=True)
batch_losses += losses
#log("learning rate", "{:10.8f}".format(learner.rate))
#log("average loss", "{:11.7f}".format(avg_loss))
2017-02-08 22:43:41 -08:00
fmt = "epoch {:4.0f}, rate {:10.8f}, loss {:12.6e}"
2017-02-01 22:21:25 -08:00
log("info", fmt.format(learner.epoch + 1, learner.rate, avg_loss))
2017-01-09 03:37:35 -08:00
2017-01-12 08:04:42 -08:00
measure_error()
2017-01-09 03:37:35 -08:00
2017-01-13 03:29:19 -08:00
if config.fn_save is not None:
log('saving weights', config.fn_save)
model.save_weights(config.fn_save, overwrite=True)
2017-01-09 03:37:35 -08:00
2017-01-12 08:04:42 -08:00
if config.log_fn is not None:
2017-02-12 17:29:52 -08:00
log('saving losses', config.log_fn)
2017-01-12 08:04:42 -08:00
np.savez_compressed(config.log_fn,
2017-02-14 13:02:30 -08:00
batch_losses=np.array(batch_losses, dtype=_f),
train_losses=np.array(train_losses, dtype=_f),
valid_losses=np.array(valid_losses, dtype=_f))
2017-01-12 14:45:07 -08:00
2017-02-13 17:53:10 -08:00
# Evaluation {{{2
# TODO: write this portion again
2017-01-12 14:45:07 -08:00
return 0
2017-02-15 20:18:53 -08:00
# run main program {{{1
2017-02-13 17:53:10 -08:00
2017-01-12 14:45:07 -08:00
if __name__ == '__main__':
import sys
sys.exit(run(sys.argv[0], sys.argv[1:]))