From 904423d631e9edd45cfb23a02a93af3f0381acc1 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Tue, 11 Apr 2017 04:46:54 +0000 Subject: [PATCH] add saturating activity regularizer --- optim_nn.py | 18 ++++++++++++++++++ optim_nn_core.py | 19 ++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/optim_nn.py b/optim_nn.py index 346756d..562e63e 100755 --- a/optim_nn.py +++ b/optim_nn.py @@ -81,6 +81,24 @@ class NLL(Loss): # Negative Log Likelihood def backward(self, p, y): return -y / len(p) +# Regularizers {{{1 + +class SaturateRelu(Regularizer): + # paper: https://arxiv.org/abs/1703.09202 + # TODO: test this (and ActivityRegularizer) more thoroughly. + # i've looked at the histogram of the resulting weights. + # it seems like only the layers after this are affected + # the way they should be. + + def __init__(self, lamb=0.0): + self.lamb = _f(lamb) + + def forward(self, X): + return self.lamb * np.where(X >= 0, X, 0) + + def backward(self, X): + return self.lamb * np.where(X >= 0, 1, 0) + # Nonparametric Layers {{{1 # Parametric Layers {{{1 diff --git a/optim_nn_core.py b/optim_nn_core.py index 1238d59..1c6b427 100644 --- a/optim_nn_core.py +++ b/optim_nn_core.py @@ -116,7 +116,7 @@ class L1L2(Regularizer): self.l2 = _f(l2) def forward(self, X): - f = 0.0 + f = _0 if self.l1: f += np.sum(self.l1 * np.abs(X)) if self.l2: @@ -357,6 +357,7 @@ class Layer: self.parents = [] self.children = [] self.weights = OrderedDict() + self.loss = None # for activity regularizers self.input_shape = None self.output_shape = None kind = self.__class__.__name__ @@ -609,6 +610,20 @@ class LogSoftmax(Softmax): def backward(self, dY): return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm +class ActivityRegularizer(Layer): + def __init__(self, reg): + super().__init__() + assert isinstance(reg, Regularizer), reg + self.reg = reg + + def forward(self, X): + self.X = X + self.loss = np.sum(self.reg.forward(X)) + return X + + def backward(self, dY): + return dY + self.reg.backward(self.X) + # Parametric Layers {{{1 class Dense(Layer): @@ -713,6 +728,8 @@ class Model: def regulate_forward(self): loss = _0 for node in self.ordered_nodes: + if node.loss is not None: + loss += node.loss for k, w in node.weights.items(): loss += w.forward() return loss