add saturating activity regularizer
This commit is contained in:
parent
b6597e8b6c
commit
904423d631
2 changed files with 36 additions and 1 deletions
18
optim_nn.py
18
optim_nn.py
|
@ -81,6 +81,24 @@ class NLL(Loss): # Negative Log Likelihood
|
||||||
def backward(self, p, y):
|
def backward(self, p, y):
|
||||||
return -y / len(p)
|
return -y / len(p)
|
||||||
|
|
||||||
|
# Regularizers {{{1
|
||||||
|
|
||||||
|
class SaturateRelu(Regularizer):
|
||||||
|
# paper: https://arxiv.org/abs/1703.09202
|
||||||
|
# TODO: test this (and ActivityRegularizer) more thoroughly.
|
||||||
|
# i've looked at the histogram of the resulting weights.
|
||||||
|
# it seems like only the layers after this are affected
|
||||||
|
# the way they should be.
|
||||||
|
|
||||||
|
def __init__(self, lamb=0.0):
|
||||||
|
self.lamb = _f(lamb)
|
||||||
|
|
||||||
|
def forward(self, X):
|
||||||
|
return self.lamb * np.where(X >= 0, X, 0)
|
||||||
|
|
||||||
|
def backward(self, X):
|
||||||
|
return self.lamb * np.where(X >= 0, 1, 0)
|
||||||
|
|
||||||
# Nonparametric Layers {{{1
|
# Nonparametric Layers {{{1
|
||||||
|
|
||||||
# Parametric Layers {{{1
|
# Parametric Layers {{{1
|
||||||
|
|
|
@ -116,7 +116,7 @@ class L1L2(Regularizer):
|
||||||
self.l2 = _f(l2)
|
self.l2 = _f(l2)
|
||||||
|
|
||||||
def forward(self, X):
|
def forward(self, X):
|
||||||
f = 0.0
|
f = _0
|
||||||
if self.l1:
|
if self.l1:
|
||||||
f += np.sum(self.l1 * np.abs(X))
|
f += np.sum(self.l1 * np.abs(X))
|
||||||
if self.l2:
|
if self.l2:
|
||||||
|
@ -357,6 +357,7 @@ class Layer:
|
||||||
self.parents = []
|
self.parents = []
|
||||||
self.children = []
|
self.children = []
|
||||||
self.weights = OrderedDict()
|
self.weights = OrderedDict()
|
||||||
|
self.loss = None # for activity regularizers
|
||||||
self.input_shape = None
|
self.input_shape = None
|
||||||
self.output_shape = None
|
self.output_shape = None
|
||||||
kind = self.__class__.__name__
|
kind = self.__class__.__name__
|
||||||
|
@ -609,6 +610,20 @@ class LogSoftmax(Softmax):
|
||||||
def backward(self, dY):
|
def backward(self, dY):
|
||||||
return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm
|
return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm
|
||||||
|
|
||||||
|
class ActivityRegularizer(Layer):
|
||||||
|
def __init__(self, reg):
|
||||||
|
super().__init__()
|
||||||
|
assert isinstance(reg, Regularizer), reg
|
||||||
|
self.reg = reg
|
||||||
|
|
||||||
|
def forward(self, X):
|
||||||
|
self.X = X
|
||||||
|
self.loss = np.sum(self.reg.forward(X))
|
||||||
|
return X
|
||||||
|
|
||||||
|
def backward(self, dY):
|
||||||
|
return dY + self.reg.backward(self.X)
|
||||||
|
|
||||||
# Parametric Layers {{{1
|
# Parametric Layers {{{1
|
||||||
|
|
||||||
class Dense(Layer):
|
class Dense(Layer):
|
||||||
|
@ -713,6 +728,8 @@ class Model:
|
||||||
def regulate_forward(self):
|
def regulate_forward(self):
|
||||||
loss = _0
|
loss = _0
|
||||||
for node in self.ordered_nodes:
|
for node in self.ordered_nodes:
|
||||||
|
if node.loss is not None:
|
||||||
|
loss += node.loss
|
||||||
for k, w in node.weights.items():
|
for k, w in node.weights.items():
|
||||||
loss += w.forward()
|
loss += w.forward()
|
||||||
return loss
|
return loss
|
||||||
|
|
Loading…
Reference in a new issue