add saturating activity regularizer

2017-04-11 04:46:54 +00:00 · 2017-04-11 04:46:54 +00:00 · 904423d631
commit 904423d631
parent b6597e8b6c
2 changed files with 36 additions and 1 deletions
--- a/optim_nn.py
+++ b/optim_nn.py
@ -81,6 +81,24 @@ class NLL(Loss): # Negative Log Likelihood
    def backward(self, p, y):
        return -y / len(p)

+# Regularizers {{{1
+
+class SaturateRelu(Regularizer):
+    # paper: https://arxiv.org/abs/1703.09202
+    # TODO: test this (and ActivityRegularizer) more thoroughly.
+    #       i've looked at the histogram of the resulting weights.
+    #       it seems like only the layers after this are affected
+    #       the way they should be.
+
+    def __init__(self, lamb=0.0):
+        self.lamb = _f(lamb)
+
+    def forward(self, X):
+        return self.lamb * np.where(X >= 0, X, 0)
+
+    def backward(self, X):
+        return self.lamb * np.where(X >= 0, 1, 0)
+
 # Nonparametric Layers {{{1

 # Parametric Layers {{{1
--- a/optim_nn_core.py
+++ b/optim_nn_core.py
@ -116,7 +116,7 @@ class L1L2(Regularizer):
        self.l2 = _f(l2)

    def forward(self, X):
-        f = 0.0
+        f = _0
        if self.l1:
            f += np.sum(self.l1 * np.abs(X))
        if self.l2:
@ -357,6 +357,7 @@ class Layer:
        self.parents = []
        self.children = []
        self.weights = OrderedDict()
+        self.loss = None # for activity regularizers
        self.input_shape = None
        self.output_shape = None
        kind = self.__class__.__name__
@ -609,6 +610,20 @@ class LogSoftmax(Softmax):
    def backward(self, dY):
        return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm

+class ActivityRegularizer(Layer):
+    def __init__(self, reg):
+        super().__init__()
+        assert isinstance(reg, Regularizer), reg
+        self.reg = reg
+
+    def forward(self, X):
+        self.X = X
+        self.loss = np.sum(self.reg.forward(X))
+        return X
+
+    def backward(self, dY):
+        return dY + self.reg.backward(self.X)
+
 # Parametric Layers {{{1

 class Dense(Layer):
@ -713,6 +728,8 @@ class Model:
    def regulate_forward(self):
        loss = _0
        for node in self.ordered_nodes:
+            if node.loss is not None:
+                loss += node.loss
            for k, w in node.weights.items():
                loss += w.forward()
        return loss