.
This commit is contained in:
parent
0306b6f1e0
commit
ec08ba7684
2 changed files with 79 additions and 32 deletions
|
@ -79,8 +79,8 @@ class Accuracy(Loss):
|
|||
raise NotImplementedError("cannot take the gradient of Accuracy")
|
||||
|
||||
class Confidence(Loss):
|
||||
def forward(self, p, y):
|
||||
categories = y.shape[-1]
|
||||
def forward(self, p, y=None):
|
||||
categories = p.shape[-1]
|
||||
#confidence = (p - 1/categories) / (1 - categories)
|
||||
#confidence = 1 - np.min(p, axis=-1) * categories
|
||||
confidence = (np.max(p, axis=-1) - 1/categories) / (1 - 1/categories)
|
||||
|
@ -89,7 +89,7 @@ class Confidence(Loss):
|
|||
# but we don't compensate for that. keep it simple.
|
||||
return np.mean(confidence)
|
||||
|
||||
def backward(self, p, y):
|
||||
def backward(self, p, y=None):
|
||||
raise NotImplementedError("this is probably a bad idea")
|
||||
|
||||
class ResidualLoss(Loss):
|
||||
|
|
|
@ -5,26 +5,52 @@ from optim_nn_core import _f
|
|||
|
||||
#np.random.seed(42069)
|
||||
|
||||
# train loss: 7.048363e-03
|
||||
# train accuracy: 99.96%
|
||||
# valid loss: 3.062232e-01
|
||||
# valid accuracy: 97.22%
|
||||
# TODO: add dropout or something to lessen overfitting
|
||||
use_emnist = False
|
||||
|
||||
lr = 0.0032
|
||||
epochs = 125
|
||||
starts = 5
|
||||
restart_decay = 0.5
|
||||
bs = 100
|
||||
|
||||
log_fn = 'mnist_losses.npz'
|
||||
measure_every_epoch = True
|
||||
|
||||
mnist_dim = 28
|
||||
mnist_classes = 10
|
||||
if use_emnist:
|
||||
lr = 0.01
|
||||
epochs = 48
|
||||
starts = 2
|
||||
bs = 200
|
||||
|
||||
sgdr = True
|
||||
restart_decay = 0.5
|
||||
|
||||
n_dense = 0
|
||||
n_denses = 2
|
||||
new_dims = (28, 28)
|
||||
activation = GeluApprox
|
||||
|
||||
log_fn = 'emnist_losses.npz'
|
||||
fn = 'emnist-balanced.npz'
|
||||
mnist_dim = 28
|
||||
mnist_classes = 47
|
||||
|
||||
else:
|
||||
lr = 0.0032
|
||||
epochs = 125
|
||||
starts = 5
|
||||
bs = 200
|
||||
|
||||
activation = Relu
|
||||
|
||||
sgdr = False
|
||||
restart_decay = 0.5
|
||||
|
||||
n_dense = 1
|
||||
n_denses = 1
|
||||
new_dims = (4, 12)
|
||||
|
||||
log_fn = 'mnist_losses.npz'
|
||||
fn = 'mnist.npz'
|
||||
mnist_dim = 28
|
||||
mnist_classes = 10
|
||||
|
||||
def get_mnist(fn='mnist.npz'):
|
||||
import os.path
|
||||
if not os.path.exists(fn):
|
||||
if fn == 'mnist.npz' and not os.path.exists(fn):
|
||||
from keras.datasets import mnist
|
||||
from keras.utils.np_utils import to_categorical
|
||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
||||
|
@ -46,17 +72,23 @@ def get_mnist(fn='mnist.npz'):
|
|||
with np.load(fn) as f:
|
||||
return f['X_train'], f['Y_train'], f['X_test'], f['Y_test']
|
||||
|
||||
inputs, outputs, valid_inputs, valid_outputs = get_mnist()
|
||||
inputs, outputs, valid_inputs, valid_outputs = get_mnist(fn)
|
||||
|
||||
x = Input(shape=inputs.shape[1:])
|
||||
y = x
|
||||
|
||||
y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim,)))
|
||||
y = y.feed(Denses(4, axis=0, init=init_he_normal))
|
||||
y = y.feed(Denses(12, axis=1, init=init_he_normal))
|
||||
y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim)))
|
||||
for i in range(n_denses):
|
||||
if i > 0:
|
||||
y = y.feed(activation())
|
||||
y = y.feed(Denses(new_dims[0], axis=0, init=init_he_normal))
|
||||
y = y.feed(Denses(new_dims[1], axis=1, init=init_he_normal))
|
||||
y = y.feed(Flatten())
|
||||
y = y.feed(Dense(y.output_shape[0], init=init_he_normal))
|
||||
y = y.feed(Relu())
|
||||
for i in range(n_dense):
|
||||
if i > 0:
|
||||
y = y.feed(activation())
|
||||
y = y.feed(Dense(y.output_shape[0], init=init_he_normal))
|
||||
y = y.feed(activation())
|
||||
|
||||
y = y.feed(Dense(mnist_classes, init=init_glorot_uniform))
|
||||
y = y.feed(Softmax())
|
||||
|
@ -64,7 +96,7 @@ y = y.feed(Softmax())
|
|||
model = Model(x, y, unsafe=True)
|
||||
|
||||
optim = Adam()
|
||||
if 0:
|
||||
if sgdr:
|
||||
learner = SGDR(optim, epochs=epochs//starts, rate=lr,
|
||||
restarts=starts-1, restart_decay=restart_decay,
|
||||
expando=lambda i:0)
|
||||
|
@ -78,6 +110,8 @@ loss = CategoricalCrossentropy()
|
|||
mloss = Accuracy()
|
||||
|
||||
ritual = Ritual(learner=learner, loss=loss, mloss=mloss)
|
||||
#ritual = NoisyRitual(learner=learner, loss=loss, mloss=mloss,
|
||||
# input_noise=1e-1, output_noise=3.2e-2, gradient_noise=1e-1)
|
||||
|
||||
log('parameters', model.param_count)
|
||||
|
||||
|
@ -87,21 +121,32 @@ batch_losses, batch_mlosses = [], []
|
|||
train_losses, train_mlosses = [], []
|
||||
valid_losses, valid_mlosses = [], []
|
||||
|
||||
train_confid, valid_confid = [], []
|
||||
|
||||
def measure_error(quiet=False):
|
||||
def print_error(name, inputs, outputs, comparison=None):
|
||||
loss, mloss, _, _ = ritual.test_batched(inputs, outputs, bs, return_losses='both')
|
||||
|
||||
c = Confidence()
|
||||
predicted = ritual.model.forward(inputs)
|
||||
confid = c.forward(predicted)
|
||||
|
||||
if not quiet:
|
||||
log(name + " loss", "{:12.6e}".format(loss))
|
||||
log(name + " accuracy", "{:6.2f}%".format(mloss * 100))
|
||||
return loss, mloss
|
||||
log(name + " confidence", "{:6.2f}%".format(confid * 100))
|
||||
|
||||
if not quiet:
|
||||
loss, mloss = print_error("train", inputs, outputs)
|
||||
train_losses.append(loss)
|
||||
train_mlosses.append(mloss)
|
||||
loss, mloss = print_error("valid", valid_inputs, valid_outputs)
|
||||
return loss, mloss, confid
|
||||
|
||||
#if not quiet:
|
||||
loss, mloss, confid = print_error("train", inputs, outputs)
|
||||
train_losses.append(loss)
|
||||
train_mlosses.append(mloss)
|
||||
train_confid.append(confid)
|
||||
loss, mloss, confid = print_error("valid", valid_inputs, valid_outputs)
|
||||
valid_losses.append(loss)
|
||||
valid_mlosses.append(mloss)
|
||||
valid_confid.append(confid)
|
||||
|
||||
measure_error()
|
||||
|
||||
|
@ -137,4 +182,6 @@ if log_fn:
|
|||
train_losses =np.array(train_losses, dtype=_f),
|
||||
train_mlosses=np.array(train_mlosses, dtype=_f),
|
||||
valid_losses =np.array(valid_losses, dtype=_f),
|
||||
valid_mlosses=np.array(valid_mlosses, dtype=_f))
|
||||
valid_mlosses=np.array(valid_mlosses, dtype=_f),
|
||||
train_confid =np.array(train_confid, dtype=_f),
|
||||
valid_confid =np.array(valid_confid, dtype=_f))
|
||||
|
|
Loading…
Reference in a new issue