optim/onn/regularizer.py

import numpy as np

from .float import _f, _0
from .layer import Layer


class Regularizer:
    pass


class ActivityRegularizer(Layer):
    def __init__(self, reg):
        super().__init__()
        assert isinstance(reg, Regularizer), reg
        self.reg = reg

    def forward(self, X):
        self.X = X
        self.loss = np.sum(self.reg.forward(X))
        return X

    def backward(self, dY):
        return dY + self.reg.backward(self.X)


class L1L2(Regularizer):
    def __init__(self, l1=0.0, l2=0.0):
        self.l1 = _f(l1)
        self.l2 = _f(l2)

    def forward(self, X):
        f = _0
        if self.l1:
            f += np.sum(self.l1 * np.abs(X))
        if self.l2:
            f += np.sum(self.l2 * np.square(X))
        return f

    def backward(self, X):
        df = np.zeros_like(X)
        if self.l1:
            df += self.l1 * np.sign(X)
        if self.l2:
            df += self.l2 * 2 * X
        return df


# more

class SaturateRelu(Regularizer):
    # paper: https://arxiv.org/abs/1703.09202
    # TODO: test this (and ActivityRegularizer) more thoroughly.
    #       i've looked at the histogram of the resulting weights.
    #       it seems like only the layers after this are affected
    #       the way they should be.

    def __init__(self, lamb=0.0):
        self.lamb = _f(lamb)

    def forward(self, X):
        return self.lamb * np.where(X >= 0, X, 0)

    def backward(self, X):
        return self.lamb * np.where(X >= 0, 1, 0)