add saturating activity regularizer

This commit is contained in:
Connor Olding 2017-04-11 04:46:54 +00:00
parent b6597e8b6c
commit 904423d631
2 changed files with 36 additions and 1 deletions

View file

@ -81,6 +81,24 @@ class NLL(Loss): # Negative Log Likelihood
def backward(self, p, y): def backward(self, p, y):
return -y / len(p) return -y / len(p)
# Regularizers {{{1
class SaturateRelu(Regularizer):
# paper: https://arxiv.org/abs/1703.09202
# TODO: test this (and ActivityRegularizer) more thoroughly.
# i've looked at the histogram of the resulting weights.
# it seems like only the layers after this are affected
# the way they should be.
def __init__(self, lamb=0.0):
self.lamb = _f(lamb)
def forward(self, X):
return self.lamb * np.where(X >= 0, X, 0)
def backward(self, X):
return self.lamb * np.where(X >= 0, 1, 0)
# Nonparametric Layers {{{1 # Nonparametric Layers {{{1
# Parametric Layers {{{1 # Parametric Layers {{{1

View file

@ -116,7 +116,7 @@ class L1L2(Regularizer):
self.l2 = _f(l2) self.l2 = _f(l2)
def forward(self, X): def forward(self, X):
f = 0.0 f = _0
if self.l1: if self.l1:
f += np.sum(self.l1 * np.abs(X)) f += np.sum(self.l1 * np.abs(X))
if self.l2: if self.l2:
@ -357,6 +357,7 @@ class Layer:
self.parents = [] self.parents = []
self.children = [] self.children = []
self.weights = OrderedDict() self.weights = OrderedDict()
self.loss = None # for activity regularizers
self.input_shape = None self.input_shape = None
self.output_shape = None self.output_shape = None
kind = self.__class__.__name__ kind = self.__class__.__name__
@ -609,6 +610,20 @@ class LogSoftmax(Softmax):
def backward(self, dY): def backward(self, dY):
return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm
class ActivityRegularizer(Layer):
def __init__(self, reg):
super().__init__()
assert isinstance(reg, Regularizer), reg
self.reg = reg
def forward(self, X):
self.X = X
self.loss = np.sum(self.reg.forward(X))
return X
def backward(self, dY):
return dY + self.reg.backward(self.X)
# Parametric Layers {{{1 # Parametric Layers {{{1
class Dense(Layer): class Dense(Layer):
@ -713,6 +728,8 @@ class Model:
def regulate_forward(self): def regulate_forward(self):
loss = _0 loss = _0
for node in self.ordered_nodes: for node in self.ordered_nodes:
if node.loss is not None:
loss += node.loss
for k, w in node.weights.items(): for k, w in node.weights.items():
loss += w.forward() loss += w.forward()
return loss return loss