add dropout and deterministic predictions

This commit is contained in:
Connor Olding 2017-04-11 10:32:48 +00:00
parent 9b730b0516
commit c49e498aa0
2 changed files with 52 additions and 23 deletions

View file

@ -374,6 +374,9 @@ class Layer:
def forward(self, X):
raise NotImplementedError("unimplemented", self)
def forward_deterministic(self, X):
return self.forward(X)
def backward(self, dY):
raise NotImplementedError("unimplemented", self)
@ -390,11 +393,13 @@ class Layer:
self.parents.append(parent)
# TODO: better names for these (still)
def _propagate(self, edges):
def _propagate(self, edges, deterministic):
if not self.unsafe:
assert len(edges) == 1, self
return self.forward(edges[0])
if deterministic:
return self.forward_deterministic(edges[0])
else:
return self.forward(edges[0])
def _backpropagate(self, edges):
if len(edges) == 1:
@ -437,7 +442,7 @@ class Layer:
for k, w in self.weights.items():
w.allocate(ins, outs, allocator=allocator)
def propagate(self, values):
def propagate(self, values, deterministic):
if not self.unsafe:
assert self.parents, self
edges = []
@ -447,7 +452,7 @@ class Layer:
if not self.unsafe:
self.validate_input(X)
edges.append(X)
Y = self._propagate(edges)
Y = self._propagate(edges, deterministic)
if not self.unsafe:
self.validate_output(Y)
return Y
@ -525,7 +530,7 @@ class ConstAffine(Layer):
return dY * self.a
class Sum(Layer):
def _propagate(self, edges):
def _propagate(self, edges, deterministic):
return np.sum(edges, axis=0)
def _backpropagate(self, edges):
@ -546,6 +551,23 @@ class ActivityRegularizer(Layer):
def backward(self, dY):
return dY + self.reg.backward(self.X)
class Dropout(Layer):
def __init__(self, dropout=0.0):
super().__init__()
self.p = _f(1 - dropout)
assert 0 <= self.p <= 1
def forward(self, X):
self.mask = (np.random.rand(*X.shape) < self.p) / self.p
return X * self.mask
def forward_deterministic(self, X):
#self.mask = _1
return X
def backward(self, dY):
return dY * self.mask
# Activation Layers {{{2
class Sigmoid(Layer): # aka Logistic
@ -710,13 +732,13 @@ class Model:
nodes.append(node)
return nodes
def forward(self, X):
def forward(self, X, deterministic=False):
values = dict()
input_node = self.ordered_nodes[0]
output_node = self.ordered_nodes[-1]
values[input_node] = input_node._propagate(np.expand_dims(X, 0))
values[input_node] = input_node._propagate(np.expand_dims(X, 0), deterministic)
for node in self.ordered_nodes[1:]:
values[node] = node.propagate(values)
values[node] = node.propagate(values, deterministic)
return values[output_node]
def backward(self, error):
@ -861,7 +883,7 @@ class Ritual: # i'm just making up names at this point
self.learner.batch(b / batch_count)
if test_only:
predicted = self.model.forward(batch_inputs)
predicted = self.model.forward(batch_inputs, deterministic=True)
else:
predicted = self.learn(batch_inputs, batch_outputs)
self.update()
@ -873,6 +895,7 @@ class Ritual: # i'm just making up names at this point
losses.append(batch_loss)
cumsum_loss += batch_loss
# NOTE: this can use the non-deterministic predictions. fixme?
batch_mloss = self.measure(predicted, batch_outputs)
if np.isnan(batch_mloss):
raise Exception("nan")
@ -915,7 +938,7 @@ class Ritual: # i'm just making up names at this point
self.learner.batch(b / batch_count)
if test_only:
predicted = self.model.forward(batch_inputs)
predicted = self.model.forward(batch_inputs, deterministic=True)
else:
predicted = self.learn(batch_inputs, batch_outputs)
self.update()
@ -927,6 +950,7 @@ class Ritual: # i'm just making up names at this point
losses.append(batch_loss)
cumsum_loss += batch_loss
# NOTE: this can use the non-deterministic predictions. fixme?
batch_mloss = self.measure(predicted, batch_outputs)
if np.isnan(batch_mloss):
raise Exception("nan")

View file

@ -26,6 +26,7 @@ if use_emnist:
reg = None
final_reg = None
dropout = None
actreg_lamb = None
load_fn = None
@ -53,6 +54,7 @@ else:
reg = L1L2(3.2e-5, 3.2e-4)
final_reg = L1L2(3.2e-5, 1e-3)
dropout = 0.10
actreg_lamb = None # 1e-3
load_fn = None
@ -89,14 +91,17 @@ def get_mnist(fn='mnist.npz'):
inputs, outputs, valid_inputs, valid_outputs = get_mnist(fn)
def actreg(y):
if not actreg_lamb:
return y
lamb = actreg_lamb # * np.prod(y.output_shape)
reg = SaturateRelu(lamb)
act = ActivityRegularizer(reg)
reg.lamb_orig = reg.lamb # HACK
return y.feed(act)
def regulate(y):
if actreg_lamb:
assert type(activation) == Relu, type(activation)
lamb = actreg_lamb # * np.prod(y.output_shape)
reg = SaturateRelu(lamb)
act = ActivityRegularizer(reg)
reg.lamb_orig = reg.lamb # HACK
y = y.feed(act)
if dropout:
y = y.feed(Dropout(dropout))
return y
x = Input(shape=inputs.shape[1:])
y = x
@ -104,7 +109,7 @@ y = x
y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim)))
for i in range(n_denses):
if i > 0:
y = actreg(y)
y = regulate(y)
y = y.feed(activation())
y = y.feed(Denses(new_dims[0], axis=0, init=init_he_normal,
reg_w=reg, reg_b=reg))
@ -113,11 +118,11 @@ for i in range(n_denses):
y = y.feed(Flatten())
for i in range(n_dense):
if i > 0:
y = actreg(y)
y = regulate(y)
y = y.feed(activation())
y = y.feed(Dense(y.output_shape[0], init=init_he_normal,
reg_w=reg, reg_b=reg))
y = actreg(y)
y = regulate(y)
y = y.feed(activation())
y = y.feed(Dense(mnist_classes, init=init_glorot_uniform,
@ -162,7 +167,7 @@ def measure_error(quiet=False):
loss, mloss, _, _ = ritual.test_batched(inputs, outputs, bs, return_losses='both')
c = Confidence()
predicted = ritual.model.forward(inputs)
predicted = ritual.model.forward(inputs, deterministic=True)
confid = c.forward(predicted)
if not quiet: