add Softplus and LeCunTanh; use LeCunTanh
This commit is contained in:
parent
0332c2662b
commit
d3c23912c1
2 changed files with 34 additions and 8 deletions
16
optim_nn.py
16
optim_nn.py
|
@ -468,8 +468,10 @@ def multiresnet(x, width, depth, block=2, multi=1,
|
|||
|
||||
# Toy Data {{{1
|
||||
|
||||
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform)
|
||||
activations = dict(sigmoid=Sigmoid, tanh=Tanh, relu=Relu, elu=Elu, gelu=GeluApprox)
|
||||
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform,
|
||||
glorot_normal=init_glorot_normal, glorot_uniform=init_glorot_uniform)
|
||||
activations = dict(sigmoid=Sigmoid, tanh=Tanh, lecun=LeCunTanh,
|
||||
relu=Relu, elu=Elu, gelu=GeluApprox, softplus=Softplus)
|
||||
|
||||
def prettyize(data):
|
||||
if isinstance(data, np.ndarray):
|
||||
|
@ -692,10 +694,10 @@ def run(program, args=None):
|
|||
|
||||
# style of resnet (order of layers, which layers, etc.)
|
||||
parallel_style = 'onelesssum',
|
||||
activation = 'gelu',
|
||||
activation = 'lecun',
|
||||
|
||||
optim = 'adam', # note: most features only implemented for Adam
|
||||
optim_decay1 = 2, # first momentum given in epochs (optional)
|
||||
optim_decay1 = 24, # first momentum given in epochs (optional)
|
||||
optim_decay2 = 100, # second momentum given in epochs (optional)
|
||||
nesterov = True,
|
||||
batch_size = 64,
|
||||
|
@ -705,13 +707,13 @@ def run(program, args=None):
|
|||
learn = 1e-2,
|
||||
epochs = 24,
|
||||
learn_halve_every = 16, # only used with anneal/dumb
|
||||
restarts = 8,
|
||||
restarts = 5,
|
||||
restart_decay = 0.25, # only used with SGDR
|
||||
expando = lambda i: 24 * i,
|
||||
|
||||
# misc
|
||||
init = 'he_normal',
|
||||
loss = 'msee',
|
||||
init = 'glorot_uniform',
|
||||
loss = 'mse',
|
||||
mloss = 'mse',
|
||||
ritual = 'default',
|
||||
restart_optim = False, # restarts also reset internal state of optimizer
|
||||
|
|
|
@ -570,7 +570,7 @@ class Dropout(Layer):
|
|||
|
||||
# Activation Layers {{{2
|
||||
|
||||
class Sigmoid(Layer): # aka Logistic
|
||||
class Sigmoid(Layer): # aka Logistic, Expit (inverse of Logit)
|
||||
def forward(self, X):
|
||||
self.sig = sigmoid(X)
|
||||
return self.sig
|
||||
|
@ -578,6 +578,16 @@ class Sigmoid(Layer): # aka Logistic
|
|||
def backward(self, dY):
|
||||
return dY * self.sig * (1 - self.sig)
|
||||
|
||||
class Softplus(Layer):
|
||||
# integral of Sigmoid.
|
||||
|
||||
def forward(self, X):
|
||||
self.X = X
|
||||
return np.log(1 + np.exp(X))
|
||||
|
||||
def backward(self, dY):
|
||||
return sigmoid(self.X)
|
||||
|
||||
class Tanh(Layer):
|
||||
def forward(self, X):
|
||||
self.sig = np.tanh(X)
|
||||
|
@ -586,6 +596,20 @@ class Tanh(Layer):
|
|||
def backward(self, dY):
|
||||
return dY * (1 - self.sig * self.sig)
|
||||
|
||||
class LeCunTanh(Layer):
|
||||
# paper: http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
|
||||
# paper: http://yann.lecun.com/exdb/publis/pdf/lecun-89.pdf
|
||||
# scaled such that f([-1, 1]) = [-1, 1].
|
||||
# helps preserve an input variance of 1.
|
||||
# second derivative peaks around an input of ±1.
|
||||
|
||||
def forward(self, X):
|
||||
self.sig = np.tanh(2 / 3 * X)
|
||||
return 1.7159 * self.sig
|
||||
|
||||
def backward(self, dY):
|
||||
return dY * (2 / 3 * 1.7159) * (1 - self.sig * self.sig)
|
||||
|
||||
class Relu(Layer):
|
||||
def forward(self, X):
|
||||
self.cond = X >= 0
|
||||
|
|
Loading…
Reference in a new issue