2018-01-21 14:04:25 -08:00
|
|
|
import numpy as np
|
|
|
|
|
2018-03-17 06:09:15 -07:00
|
|
|
from .float import _f, _0
|
2018-03-12 18:29:39 -07:00
|
|
|
from .layer import Layer
|
2018-01-21 14:04:25 -08:00
|
|
|
|
2018-01-22 11:40:36 -08:00
|
|
|
|
2018-01-21 14:04:25 -08:00
|
|
|
class Regularizer:
|
|
|
|
pass
|
|
|
|
|
2018-01-22 11:40:36 -08:00
|
|
|
|
2018-03-12 18:29:39 -07:00
|
|
|
class ActivityRegularizer(Layer):
|
|
|
|
def __init__(self, reg):
|
|
|
|
super().__init__()
|
|
|
|
assert isinstance(reg, Regularizer), reg
|
|
|
|
self.reg = reg
|
|
|
|
|
|
|
|
def forward(self, X):
|
|
|
|
self.X = X
|
|
|
|
self.loss = np.sum(self.reg.forward(X))
|
|
|
|
return X
|
|
|
|
|
|
|
|
def backward(self, dY):
|
|
|
|
return dY + self.reg.backward(self.X)
|
|
|
|
|
|
|
|
|
2018-01-21 14:04:25 -08:00
|
|
|
class L1L2(Regularizer):
|
|
|
|
def __init__(self, l1=0.0, l2=0.0):
|
|
|
|
self.l1 = _f(l1)
|
|
|
|
self.l2 = _f(l2)
|
|
|
|
|
|
|
|
def forward(self, X):
|
|
|
|
f = _0
|
|
|
|
if self.l1:
|
|
|
|
f += np.sum(self.l1 * np.abs(X))
|
|
|
|
if self.l2:
|
|
|
|
f += np.sum(self.l2 * np.square(X))
|
|
|
|
return f
|
|
|
|
|
|
|
|
def backward(self, X):
|
|
|
|
df = np.zeros_like(X)
|
|
|
|
if self.l1:
|
|
|
|
df += self.l1 * np.sign(X)
|
|
|
|
if self.l2:
|
|
|
|
df += self.l2 * 2 * X
|
|
|
|
return df
|
|
|
|
|
2018-01-22 11:40:36 -08:00
|
|
|
|
2018-01-21 14:04:25 -08:00
|
|
|
# more
|
|
|
|
|
|
|
|
class SaturateRelu(Regularizer):
|
|
|
|
# paper: https://arxiv.org/abs/1703.09202
|
|
|
|
# TODO: test this (and ActivityRegularizer) more thoroughly.
|
|
|
|
# i've looked at the histogram of the resulting weights.
|
|
|
|
# it seems like only the layers after this are affected
|
|
|
|
# the way they should be.
|
|
|
|
|
|
|
|
def __init__(self, lamb=0.0):
|
|
|
|
self.lamb = _f(lamb)
|
|
|
|
|
|
|
|
def forward(self, X):
|
|
|
|
return self.lamb * np.where(X >= 0, X, 0)
|
|
|
|
|
|
|
|
def backward(self, X):
|
|
|
|
return self.lamb * np.where(X >= 0, 1, 0)
|