This commit is contained in:
Connor Olding 2017-03-12 17:41:18 -07:00
parent 0306b6f1e0
commit ec08ba7684
2 changed files with 79 additions and 32 deletions

View file

@ -79,8 +79,8 @@ class Accuracy(Loss):
raise NotImplementedError("cannot take the gradient of Accuracy")
class Confidence(Loss):
def forward(self, p, y):
categories = y.shape[-1]
def forward(self, p, y=None):
categories = p.shape[-1]
#confidence = (p - 1/categories) / (1 - categories)
#confidence = 1 - np.min(p, axis=-1) * categories
confidence = (np.max(p, axis=-1) - 1/categories) / (1 - 1/categories)
@ -89,7 +89,7 @@ class Confidence(Loss):
# but we don't compensate for that. keep it simple.
return np.mean(confidence)
def backward(self, p, y):
def backward(self, p, y=None):
raise NotImplementedError("this is probably a bad idea")
class ResidualLoss(Loss):

View file

@ -5,26 +5,52 @@ from optim_nn_core import _f
#np.random.seed(42069)
# train loss: 7.048363e-03
# train accuracy: 99.96%
# valid loss: 3.062232e-01
# valid accuracy: 97.22%
# TODO: add dropout or something to lessen overfitting
use_emnist = False
lr = 0.0032
epochs = 125
starts = 5
restart_decay = 0.5
bs = 100
log_fn = 'mnist_losses.npz'
measure_every_epoch = True
mnist_dim = 28
mnist_classes = 10
if use_emnist:
lr = 0.01
epochs = 48
starts = 2
bs = 200
sgdr = True
restart_decay = 0.5
n_dense = 0
n_denses = 2
new_dims = (28, 28)
activation = GeluApprox
log_fn = 'emnist_losses.npz'
fn = 'emnist-balanced.npz'
mnist_dim = 28
mnist_classes = 47
else:
lr = 0.0032
epochs = 125
starts = 5
bs = 200
activation = Relu
sgdr = False
restart_decay = 0.5
n_dense = 1
n_denses = 1
new_dims = (4, 12)
log_fn = 'mnist_losses.npz'
fn = 'mnist.npz'
mnist_dim = 28
mnist_classes = 10
def get_mnist(fn='mnist.npz'):
import os.path
if not os.path.exists(fn):
if fn == 'mnist.npz' and not os.path.exists(fn):
from keras.datasets import mnist
from keras.utils.np_utils import to_categorical
(X_train, y_train), (X_test, y_test) = mnist.load_data()
@ -46,17 +72,23 @@ def get_mnist(fn='mnist.npz'):
with np.load(fn) as f:
return f['X_train'], f['Y_train'], f['X_test'], f['Y_test']
inputs, outputs, valid_inputs, valid_outputs = get_mnist()
inputs, outputs, valid_inputs, valid_outputs = get_mnist(fn)
x = Input(shape=inputs.shape[1:])
y = x
y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim,)))
y = y.feed(Denses(4, axis=0, init=init_he_normal))
y = y.feed(Denses(12, axis=1, init=init_he_normal))
y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim)))
for i in range(n_denses):
if i > 0:
y = y.feed(activation())
y = y.feed(Denses(new_dims[0], axis=0, init=init_he_normal))
y = y.feed(Denses(new_dims[1], axis=1, init=init_he_normal))
y = y.feed(Flatten())
y = y.feed(Dense(y.output_shape[0], init=init_he_normal))
y = y.feed(Relu())
for i in range(n_dense):
if i > 0:
y = y.feed(activation())
y = y.feed(Dense(y.output_shape[0], init=init_he_normal))
y = y.feed(activation())
y = y.feed(Dense(mnist_classes, init=init_glorot_uniform))
y = y.feed(Softmax())
@ -64,7 +96,7 @@ y = y.feed(Softmax())
model = Model(x, y, unsafe=True)
optim = Adam()
if 0:
if sgdr:
learner = SGDR(optim, epochs=epochs//starts, rate=lr,
restarts=starts-1, restart_decay=restart_decay,
expando=lambda i:0)
@ -78,6 +110,8 @@ loss = CategoricalCrossentropy()
mloss = Accuracy()
ritual = Ritual(learner=learner, loss=loss, mloss=mloss)
#ritual = NoisyRitual(learner=learner, loss=loss, mloss=mloss,
# input_noise=1e-1, output_noise=3.2e-2, gradient_noise=1e-1)
log('parameters', model.param_count)
@ -87,21 +121,32 @@ batch_losses, batch_mlosses = [], []
train_losses, train_mlosses = [], []
valid_losses, valid_mlosses = [], []
train_confid, valid_confid = [], []
def measure_error(quiet=False):
def print_error(name, inputs, outputs, comparison=None):
loss, mloss, _, _ = ritual.test_batched(inputs, outputs, bs, return_losses='both')
c = Confidence()
predicted = ritual.model.forward(inputs)
confid = c.forward(predicted)
if not quiet:
log(name + " loss", "{:12.6e}".format(loss))
log(name + " accuracy", "{:6.2f}%".format(mloss * 100))
return loss, mloss
log(name + " confidence", "{:6.2f}%".format(confid * 100))
if not quiet:
loss, mloss = print_error("train", inputs, outputs)
train_losses.append(loss)
train_mlosses.append(mloss)
loss, mloss = print_error("valid", valid_inputs, valid_outputs)
return loss, mloss, confid
#if not quiet:
loss, mloss, confid = print_error("train", inputs, outputs)
train_losses.append(loss)
train_mlosses.append(mloss)
train_confid.append(confid)
loss, mloss, confid = print_error("valid", valid_inputs, valid_outputs)
valid_losses.append(loss)
valid_mlosses.append(mloss)
valid_confid.append(confid)
measure_error()
@ -137,4 +182,6 @@ if log_fn:
train_losses =np.array(train_losses, dtype=_f),
train_mlosses=np.array(train_mlosses, dtype=_f),
valid_losses =np.array(valid_losses, dtype=_f),
valid_mlosses=np.array(valid_mlosses, dtype=_f))
valid_mlosses=np.array(valid_mlosses, dtype=_f),
train_confid =np.array(train_confid, dtype=_f),
valid_confid =np.array(valid_confid, dtype=_f))