add saturating activity regularizer

2017-04-11 04:46:54 +00:00 · 2017-04-11 04:46:54 +00:00 · 904423d631
commit 904423d631
parent b6597e8b6c
2 changed files with 36 additions and 1 deletions
--- a/optim_nn.py
+++ b/optim_nn.py
@ -81,6 +81,24 @@ class NLL(Loss): # Negative Log Likelihood
    def backward(self, p, y):
        return -y / len(p)
 # Regularizers {{{1
 class SaturateRelu(Regularizer):
    # paper: https://arxiv.org/abs/1703.09202
    # TODO: test this (and ActivityRegularizer) more thoroughly.
    #       i've looked at the histogram of the resulting weights.
    #       it seems like only the layers after this are affected
    #       the way they should be.
    def __init__(self, lamb=0.0):
        self.lamb = _f(lamb)
    def forward(self, X):
        return self.lamb * np.where(X >= 0, X, 0)
    def backward(self, X):
        return self.lamb * np.where(X >= 0, 1, 0)
 # Nonparametric Layers {{{1
 # Parametric Layers {{{1
--- a/optim_nn_core.py
+++ b/optim_nn_core.py
@ -116,7 +116,7 @@ class L1L2(Regularizer):
        self.l2 = _f(l2)
    def forward(self, X):
-        f = 0.0
+        f = _0
        if self.l1:
            f += np.sum(self.l1 * np.abs(X))
        if self.l2:
@ -357,6 +357,7 @@ class Layer:
        self.parents = []
        self.children = []
        self.weights = OrderedDict()
        self.loss = None # for activity regularizers
        self.input_shape = None
        self.output_shape = None
        kind = self.__class__.__name__
@ -609,6 +610,20 @@ class LogSoftmax(Softmax):
    def backward(self, dY):
        return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm
 class ActivityRegularizer(Layer):
    def __init__(self, reg):
        super().__init__()
        assert isinstance(reg, Regularizer), reg
        self.reg = reg
    def forward(self, X):
        self.X = X
        self.loss = np.sum(self.reg.forward(X))
        return X
    def backward(self, dY):
        return dY + self.reg.backward(self.X)
 # Parametric Layers {{{1
 class Dense(Layer):
@ -713,6 +728,8 @@ class Model:
    def regulate_forward(self):
        loss = _0
        for node in self.ordered_nodes:
            if node.loss is not None:
                loss += node.loss
            for k, w in node.weights.items():
                loss += w.forward()
        return loss