add dropout and deterministic predictions
This commit is contained in:
parent
9b730b0516
commit
c49e498aa0
2 changed files with 52 additions and 23 deletions
|
@ -374,6 +374,9 @@ class Layer:
|
|||
def forward(self, X):
|
||||
raise NotImplementedError("unimplemented", self)
|
||||
|
||||
def forward_deterministic(self, X):
|
||||
return self.forward(X)
|
||||
|
||||
def backward(self, dY):
|
||||
raise NotImplementedError("unimplemented", self)
|
||||
|
||||
|
@ -390,11 +393,13 @@ class Layer:
|
|||
self.parents.append(parent)
|
||||
|
||||
# TODO: better names for these (still)
|
||||
|
||||
def _propagate(self, edges):
|
||||
def _propagate(self, edges, deterministic):
|
||||
if not self.unsafe:
|
||||
assert len(edges) == 1, self
|
||||
return self.forward(edges[0])
|
||||
if deterministic:
|
||||
return self.forward_deterministic(edges[0])
|
||||
else:
|
||||
return self.forward(edges[0])
|
||||
|
||||
def _backpropagate(self, edges):
|
||||
if len(edges) == 1:
|
||||
|
@ -437,7 +442,7 @@ class Layer:
|
|||
for k, w in self.weights.items():
|
||||
w.allocate(ins, outs, allocator=allocator)
|
||||
|
||||
def propagate(self, values):
|
||||
def propagate(self, values, deterministic):
|
||||
if not self.unsafe:
|
||||
assert self.parents, self
|
||||
edges = []
|
||||
|
@ -447,7 +452,7 @@ class Layer:
|
|||
if not self.unsafe:
|
||||
self.validate_input(X)
|
||||
edges.append(X)
|
||||
Y = self._propagate(edges)
|
||||
Y = self._propagate(edges, deterministic)
|
||||
if not self.unsafe:
|
||||
self.validate_output(Y)
|
||||
return Y
|
||||
|
@ -525,7 +530,7 @@ class ConstAffine(Layer):
|
|||
return dY * self.a
|
||||
|
||||
class Sum(Layer):
|
||||
def _propagate(self, edges):
|
||||
def _propagate(self, edges, deterministic):
|
||||
return np.sum(edges, axis=0)
|
||||
|
||||
def _backpropagate(self, edges):
|
||||
|
@ -546,6 +551,23 @@ class ActivityRegularizer(Layer):
|
|||
def backward(self, dY):
|
||||
return dY + self.reg.backward(self.X)
|
||||
|
||||
class Dropout(Layer):
|
||||
def __init__(self, dropout=0.0):
|
||||
super().__init__()
|
||||
self.p = _f(1 - dropout)
|
||||
assert 0 <= self.p <= 1
|
||||
|
||||
def forward(self, X):
|
||||
self.mask = (np.random.rand(*X.shape) < self.p) / self.p
|
||||
return X * self.mask
|
||||
|
||||
def forward_deterministic(self, X):
|
||||
#self.mask = _1
|
||||
return X
|
||||
|
||||
def backward(self, dY):
|
||||
return dY * self.mask
|
||||
|
||||
# Activation Layers {{{2
|
||||
|
||||
class Sigmoid(Layer): # aka Logistic
|
||||
|
@ -710,13 +732,13 @@ class Model:
|
|||
nodes.append(node)
|
||||
return nodes
|
||||
|
||||
def forward(self, X):
|
||||
def forward(self, X, deterministic=False):
|
||||
values = dict()
|
||||
input_node = self.ordered_nodes[0]
|
||||
output_node = self.ordered_nodes[-1]
|
||||
values[input_node] = input_node._propagate(np.expand_dims(X, 0))
|
||||
values[input_node] = input_node._propagate(np.expand_dims(X, 0), deterministic)
|
||||
for node in self.ordered_nodes[1:]:
|
||||
values[node] = node.propagate(values)
|
||||
values[node] = node.propagate(values, deterministic)
|
||||
return values[output_node]
|
||||
|
||||
def backward(self, error):
|
||||
|
@ -861,7 +883,7 @@ class Ritual: # i'm just making up names at this point
|
|||
self.learner.batch(b / batch_count)
|
||||
|
||||
if test_only:
|
||||
predicted = self.model.forward(batch_inputs)
|
||||
predicted = self.model.forward(batch_inputs, deterministic=True)
|
||||
else:
|
||||
predicted = self.learn(batch_inputs, batch_outputs)
|
||||
self.update()
|
||||
|
@ -873,6 +895,7 @@ class Ritual: # i'm just making up names at this point
|
|||
losses.append(batch_loss)
|
||||
cumsum_loss += batch_loss
|
||||
|
||||
# NOTE: this can use the non-deterministic predictions. fixme?
|
||||
batch_mloss = self.measure(predicted, batch_outputs)
|
||||
if np.isnan(batch_mloss):
|
||||
raise Exception("nan")
|
||||
|
@ -915,7 +938,7 @@ class Ritual: # i'm just making up names at this point
|
|||
self.learner.batch(b / batch_count)
|
||||
|
||||
if test_only:
|
||||
predicted = self.model.forward(batch_inputs)
|
||||
predicted = self.model.forward(batch_inputs, deterministic=True)
|
||||
else:
|
||||
predicted = self.learn(batch_inputs, batch_outputs)
|
||||
self.update()
|
||||
|
@ -927,6 +950,7 @@ class Ritual: # i'm just making up names at this point
|
|||
losses.append(batch_loss)
|
||||
cumsum_loss += batch_loss
|
||||
|
||||
# NOTE: this can use the non-deterministic predictions. fixme?
|
||||
batch_mloss = self.measure(predicted, batch_outputs)
|
||||
if np.isnan(batch_mloss):
|
||||
raise Exception("nan")
|
||||
|
|
|
@ -26,6 +26,7 @@ if use_emnist:
|
|||
|
||||
reg = None
|
||||
final_reg = None
|
||||
dropout = None
|
||||
actreg_lamb = None
|
||||
|
||||
load_fn = None
|
||||
|
@ -53,6 +54,7 @@ else:
|
|||
|
||||
reg = L1L2(3.2e-5, 3.2e-4)
|
||||
final_reg = L1L2(3.2e-5, 1e-3)
|
||||
dropout = 0.10
|
||||
actreg_lamb = None # 1e-3
|
||||
|
||||
load_fn = None
|
||||
|
@ -89,14 +91,17 @@ def get_mnist(fn='mnist.npz'):
|
|||
|
||||
inputs, outputs, valid_inputs, valid_outputs = get_mnist(fn)
|
||||
|
||||
def actreg(y):
|
||||
if not actreg_lamb:
|
||||
return y
|
||||
lamb = actreg_lamb # * np.prod(y.output_shape)
|
||||
reg = SaturateRelu(lamb)
|
||||
act = ActivityRegularizer(reg)
|
||||
reg.lamb_orig = reg.lamb # HACK
|
||||
return y.feed(act)
|
||||
def regulate(y):
|
||||
if actreg_lamb:
|
||||
assert type(activation) == Relu, type(activation)
|
||||
lamb = actreg_lamb # * np.prod(y.output_shape)
|
||||
reg = SaturateRelu(lamb)
|
||||
act = ActivityRegularizer(reg)
|
||||
reg.lamb_orig = reg.lamb # HACK
|
||||
y = y.feed(act)
|
||||
if dropout:
|
||||
y = y.feed(Dropout(dropout))
|
||||
return y
|
||||
|
||||
x = Input(shape=inputs.shape[1:])
|
||||
y = x
|
||||
|
@ -104,7 +109,7 @@ y = x
|
|||
y = y.feed(Reshape(new_shape=(mnist_dim, mnist_dim)))
|
||||
for i in range(n_denses):
|
||||
if i > 0:
|
||||
y = actreg(y)
|
||||
y = regulate(y)
|
||||
y = y.feed(activation())
|
||||
y = y.feed(Denses(new_dims[0], axis=0, init=init_he_normal,
|
||||
reg_w=reg, reg_b=reg))
|
||||
|
@ -113,11 +118,11 @@ for i in range(n_denses):
|
|||
y = y.feed(Flatten())
|
||||
for i in range(n_dense):
|
||||
if i > 0:
|
||||
y = actreg(y)
|
||||
y = regulate(y)
|
||||
y = y.feed(activation())
|
||||
y = y.feed(Dense(y.output_shape[0], init=init_he_normal,
|
||||
reg_w=reg, reg_b=reg))
|
||||
y = actreg(y)
|
||||
y = regulate(y)
|
||||
y = y.feed(activation())
|
||||
|
||||
y = y.feed(Dense(mnist_classes, init=init_glorot_uniform,
|
||||
|
@ -162,7 +167,7 @@ def measure_error(quiet=False):
|
|||
loss, mloss, _, _ = ritual.test_batched(inputs, outputs, bs, return_losses='both')
|
||||
|
||||
c = Confidence()
|
||||
predicted = ritual.model.forward(inputs)
|
||||
predicted = ritual.model.forward(inputs, deterministic=True)
|
||||
confid = c.forward(predicted)
|
||||
|
||||
if not quiet:
|
||||
|
|
Loading…
Reference in a new issue