diff --git a/optim_nn_core.py b/optim_nn_core.py index 6b1bd96..27df335 100644 --- a/optim_nn_core.py +++ b/optim_nn_core.py @@ -79,8 +79,8 @@ class Accuracy(Loss): raise NotImplementedError("cannot take the gradient of Accuracy") class Confidence(Loss): - def forward(self, p, y): - categories = y.shape[-1] + def forward(self, p, y=None): + categories = p.shape[-1] #confidence = (p - 1/categories) / (1 - categories) #confidence = 1 - np.min(p, axis=-1) * categories confidence = (np.max(p, axis=-1) - 1/categories) / (1 - 1/categories) @@ -89,7 +89,7 @@ class Confidence(Loss): # but we don't compensate for that. keep it simple. return np.mean(confidence) - def backward(self, p, y): + def backward(self, p, y=None): raise NotImplementedError("this is probably a bad idea") class ResidualLoss(Loss): diff --git a/optim_nn_mnist.py b/optim_nn_mnist.py index 5d2b4b9..ce3e257 100644 --- a/optim_nn_mnist.py +++ b/optim_nn_mnist.py @@ -5,26 +5,52 @@ from optim_nn_core import _f #np.random.seed(42069) -# train loss: 7.048363e-03 -# train accuracy: 99.96% -# valid loss: 3.062232e-01 -# valid accuracy: 97.22% -# TODO: add dropout or something to lessen overfitting +use_emnist = False -lr = 0.0032 -epochs = 125 -starts = 5 -restart_decay = 0.5 -bs = 100 - -log_fn = 'mnist_losses.npz' measure_every_epoch = True -mnist_dim = 28 -mnist_classes = 10 +if use_emnist: + lr = 0.01 + epochs = 48 + starts = 2 + bs = 200 + + sgdr = True + restart_decay = 0.5 + + n_dense = 0 + n_denses = 2 + new_dims = (28, 28) + activation = GeluApprox + + log_fn = 'emnist_losses.npz' + fn = 'emnist-balanced.npz' + mnist_dim = 28 + mnist_classes = 47 + +else: + lr = 0.0032 + epochs = 125 + starts = 5 + bs = 200 + + activation = Relu + + sgdr = False + restart_decay = 0.5 + + n_dense = 1 + n_denses = 1 + new_dims = (4, 12) + + log_fn = 'mnist_losses.npz' + fn = 'mnist.npz' + mnist_dim = 28 + mnist_classes = 10 + def get_mnist(fn='mnist.npz'): import os.path - if not os.path.exists(fn): + if fn == 'mnist.npz' and not os.path.exists(fn): from keras.datasets import mnist from keras.utils.np_utils import to_categorical (X_train, y_train), (X_test, y_test) = mnist.load_data() @@ -46,17 +72,23 @@ def get_mnist(fn='mnist.npz'): with np.load(fn) as f: return f['X_train'], f['Y_train'], f['X_test'], f['Y_test'] -inputs, outputs, valid_inputs, valid_outputs = get_mnist() +inputs, outputs, valid_inputs, valid_outputs = get_mnist(fn) x = Input(shape=inputs.shape[1:]) y = x -y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim,))) -y = y.feed(Denses(4, axis=0, init=init_he_normal)) -y = y.feed(Denses(12, axis=1, init=init_he_normal)) +y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim))) +for i in range(n_denses): + if i > 0: + y = y.feed(activation()) + y = y.feed(Denses(new_dims[0], axis=0, init=init_he_normal)) + y = y.feed(Denses(new_dims[1], axis=1, init=init_he_normal)) y = y.feed(Flatten()) -y = y.feed(Dense(y.output_shape[0], init=init_he_normal)) -y = y.feed(Relu()) +for i in range(n_dense): + if i > 0: + y = y.feed(activation()) + y = y.feed(Dense(y.output_shape[0], init=init_he_normal)) +y = y.feed(activation()) y = y.feed(Dense(mnist_classes, init=init_glorot_uniform)) y = y.feed(Softmax()) @@ -64,7 +96,7 @@ y = y.feed(Softmax()) model = Model(x, y, unsafe=True) optim = Adam() -if 0: +if sgdr: learner = SGDR(optim, epochs=epochs//starts, rate=lr, restarts=starts-1, restart_decay=restart_decay, expando=lambda i:0) @@ -78,6 +110,8 @@ loss = CategoricalCrossentropy() mloss = Accuracy() ritual = Ritual(learner=learner, loss=loss, mloss=mloss) +#ritual = NoisyRitual(learner=learner, loss=loss, mloss=mloss, +# input_noise=1e-1, output_noise=3.2e-2, gradient_noise=1e-1) log('parameters', model.param_count) @@ -87,21 +121,32 @@ batch_losses, batch_mlosses = [], [] train_losses, train_mlosses = [], [] valid_losses, valid_mlosses = [], [] +train_confid, valid_confid = [], [] + def measure_error(quiet=False): def print_error(name, inputs, outputs, comparison=None): loss, mloss, _, _ = ritual.test_batched(inputs, outputs, bs, return_losses='both') + + c = Confidence() + predicted = ritual.model.forward(inputs) + confid = c.forward(predicted) + if not quiet: log(name + " loss", "{:12.6e}".format(loss)) log(name + " accuracy", "{:6.2f}%".format(mloss * 100)) - return loss, mloss + log(name + " confidence", "{:6.2f}%".format(confid * 100)) - if not quiet: - loss, mloss = print_error("train", inputs, outputs) - train_losses.append(loss) - train_mlosses.append(mloss) - loss, mloss = print_error("valid", valid_inputs, valid_outputs) + return loss, mloss, confid + + #if not quiet: + loss, mloss, confid = print_error("train", inputs, outputs) + train_losses.append(loss) + train_mlosses.append(mloss) + train_confid.append(confid) + loss, mloss, confid = print_error("valid", valid_inputs, valid_outputs) valid_losses.append(loss) valid_mlosses.append(mloss) + valid_confid.append(confid) measure_error() @@ -137,4 +182,6 @@ if log_fn: train_losses =np.array(train_losses, dtype=_f), train_mlosses=np.array(train_mlosses, dtype=_f), valid_losses =np.array(valid_losses, dtype=_f), - valid_mlosses=np.array(valid_mlosses, dtype=_f)) + valid_mlosses=np.array(valid_mlosses, dtype=_f), + train_confid =np.array(train_confid, dtype=_f), + valid_confid =np.array(valid_confid, dtype=_f))