add SELU stuff
This commit is contained in:
parent
7e38f45a3f
commit
e5cea3f847
74
optim_nn.py
74
optim_nn.py
|
@ -27,6 +27,12 @@ def log(left, right, update=False):
|
||||||
class Dummy:
|
class Dummy:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# Initializations {{{1
|
||||||
|
|
||||||
|
def init_gaussian_unit(size, ins, outs):
|
||||||
|
s = np.sqrt(1 / ins)
|
||||||
|
return np.random.normal(0, s, size=size)
|
||||||
|
|
||||||
# Loss functions {{{1
|
# Loss functions {{{1
|
||||||
|
|
||||||
class SquaredHalved(ResidualLoss):
|
class SquaredHalved(ResidualLoss):
|
||||||
|
@ -101,6 +107,68 @@ class SaturateRelu(Regularizer):
|
||||||
|
|
||||||
# Nonparametric Layers {{{1
|
# Nonparametric Layers {{{1
|
||||||
|
|
||||||
|
class AlphaDropout(Layer):
|
||||||
|
# to be used alongside Selu activations.
|
||||||
|
# paper: https://arxiv.org/abs/1706.02515
|
||||||
|
|
||||||
|
def __init__(self, dropout=0.0, alpha=1.67326324, lamb=1.05070099):
|
||||||
|
super().__init__()
|
||||||
|
self.alpha = _f(alpha)
|
||||||
|
self.lamb = _f(lamb)
|
||||||
|
self.saturated = -self.lamb * self.alpha
|
||||||
|
self.dropout = _f(dropout)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def dropout(self):
|
||||||
|
return self._dropout
|
||||||
|
|
||||||
|
@dropout.setter
|
||||||
|
def dropout(self, x):
|
||||||
|
self._dropout = _f(x)
|
||||||
|
self.q = 1 - self._dropout
|
||||||
|
assert 0 <= self.q <= 1
|
||||||
|
|
||||||
|
sat = self.saturated
|
||||||
|
|
||||||
|
self.a = 1 / np.sqrt(self.q + sat * sat * self.q * self._dropout)
|
||||||
|
self.b = -self.a * (self._dropout * sat)
|
||||||
|
|
||||||
|
def forward(self, X):
|
||||||
|
self.mask = np.random.rand(*X.shape) < self.q
|
||||||
|
return self.a * np.where(self.mask, X, self.saturated) + self.b
|
||||||
|
|
||||||
|
def forward_deterministic(self, X):
|
||||||
|
return X
|
||||||
|
|
||||||
|
def backward(self, dY):
|
||||||
|
return dY * self.a * self.mask
|
||||||
|
|
||||||
|
# Activations {{{2
|
||||||
|
|
||||||
|
class Selu(Layer):
|
||||||
|
# paper: https://arxiv.org/abs/1706.02515
|
||||||
|
|
||||||
|
def __init__(self, alpha=1.67326324, lamb=1.05070099):
|
||||||
|
super().__init__()
|
||||||
|
self.alpha = _f(alpha)
|
||||||
|
self.lamb = _f(lamb)
|
||||||
|
|
||||||
|
def forward(self, X):
|
||||||
|
self.cond = X >= 0
|
||||||
|
self.neg = self.alpha * np.exp(X)
|
||||||
|
return self.lamb * np.where(self.cond, X, self.neg - self.alpha)
|
||||||
|
|
||||||
|
def backward(self, dY):
|
||||||
|
return dY * self.lamb * np.where(self.cond, 1, self.neg)
|
||||||
|
|
||||||
|
class TanhTest(Layer):
|
||||||
|
def forward(self, X):
|
||||||
|
self.sig = np.tanh(1 / 2 * X)
|
||||||
|
return 2.4004 * self.sig
|
||||||
|
|
||||||
|
def backward(self, dY):
|
||||||
|
return dY * (1 / 2 * 2.4004) * (1 - self.sig * self.sig)
|
||||||
|
|
||||||
# Parametric Layers {{{1
|
# Parametric Layers {{{1
|
||||||
|
|
||||||
class LayerNorm(Layer):
|
class LayerNorm(Layer):
|
||||||
|
@ -469,9 +537,11 @@ def multiresnet(x, width, depth, block=2, multi=1,
|
||||||
# Toy Data {{{1
|
# Toy Data {{{1
|
||||||
|
|
||||||
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform,
|
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform,
|
||||||
glorot_normal=init_glorot_normal, glorot_uniform=init_glorot_uniform)
|
glorot_normal=init_glorot_normal, glorot_uniform=init_glorot_uniform,
|
||||||
|
gaussian_unit=init_gaussian_unit)
|
||||||
activations = dict(sigmoid=Sigmoid, tanh=Tanh, lecun=LeCunTanh,
|
activations = dict(sigmoid=Sigmoid, tanh=Tanh, lecun=LeCunTanh,
|
||||||
relu=Relu, elu=Elu, gelu=GeluApprox, softplus=Softplus)
|
relu=Relu, elu=Elu, gelu=GeluApprox, selu=Selu,
|
||||||
|
softplus=Softplus)
|
||||||
|
|
||||||
def prettyize(data):
|
def prettyize(data):
|
||||||
if isinstance(data, np.ndarray):
|
if isinstance(data, np.ndarray):
|
||||||
|
|
|
@ -623,7 +623,7 @@ class Elu(Layer):
|
||||||
|
|
||||||
def __init__(self, alpha=1):
|
def __init__(self, alpha=1):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.alpha = _f(alpha)
|
self.alpha = _f(alpha) # FIXME: unused
|
||||||
|
|
||||||
def forward(self, X):
|
def forward(self, X):
|
||||||
self.cond = X >= 0
|
self.cond = X >= 0
|
||||||
|
|
Loading…
Reference in New Issue
Block a user