add Softplus and LeCunTanh; use LeCunTanh

This commit is contained in:
Connor Olding 2017-04-23 17:40:47 +00:00
parent 0332c2662b
commit d3c23912c1
2 changed files with 34 additions and 8 deletions

View file

@ -468,8 +468,10 @@ def multiresnet(x, width, depth, block=2, multi=1,
# Toy Data {{{1
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform)
activations = dict(sigmoid=Sigmoid, tanh=Tanh, relu=Relu, elu=Elu, gelu=GeluApprox)
inits = dict(he_normal=init_he_normal, he_uniform=init_he_uniform,
glorot_normal=init_glorot_normal, glorot_uniform=init_glorot_uniform)
activations = dict(sigmoid=Sigmoid, tanh=Tanh, lecun=LeCunTanh,
relu=Relu, elu=Elu, gelu=GeluApprox, softplus=Softplus)
def prettyize(data):
if isinstance(data, np.ndarray):
@ -692,10 +694,10 @@ def run(program, args=None):
# style of resnet (order of layers, which layers, etc.)
parallel_style = 'onelesssum',
activation = 'gelu',
activation = 'lecun',
optim = 'adam', # note: most features only implemented for Adam
optim_decay1 = 2, # first momentum given in epochs (optional)
optim_decay1 = 24, # first momentum given in epochs (optional)
optim_decay2 = 100, # second momentum given in epochs (optional)
nesterov = True,
batch_size = 64,
@ -705,13 +707,13 @@ def run(program, args=None):
learn = 1e-2,
epochs = 24,
learn_halve_every = 16, # only used with anneal/dumb
restarts = 8,
restarts = 5,
restart_decay = 0.25, # only used with SGDR
expando = lambda i: 24 * i,
# misc
init = 'he_normal',
loss = 'msee',
init = 'glorot_uniform',
loss = 'mse',
mloss = 'mse',
ritual = 'default',
restart_optim = False, # restarts also reset internal state of optimizer

View file

@ -570,7 +570,7 @@ class Dropout(Layer):
# Activation Layers {{{2
class Sigmoid(Layer): # aka Logistic
class Sigmoid(Layer): # aka Logistic, Expit (inverse of Logit)
def forward(self, X):
self.sig = sigmoid(X)
return self.sig
@ -578,6 +578,16 @@ class Sigmoid(Layer): # aka Logistic
def backward(self, dY):
return dY * self.sig * (1 - self.sig)
class Softplus(Layer):
# integral of Sigmoid.
def forward(self, X):
self.X = X
return np.log(1 + np.exp(X))
def backward(self, dY):
return sigmoid(self.X)
class Tanh(Layer):
def forward(self, X):
self.sig = np.tanh(X)
@ -586,6 +596,20 @@ class Tanh(Layer):
def backward(self, dY):
return dY * (1 - self.sig * self.sig)
class LeCunTanh(Layer):
# paper: http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
# paper: http://yann.lecun.com/exdb/publis/pdf/lecun-89.pdf
# scaled such that f([-1, 1]) = [-1, 1].
# helps preserve an input variance of 1.
# second derivative peaks around an input of ±1.
def forward(self, X):
self.sig = np.tanh(2 / 3 * X)
return 1.7159 * self.sig
def backward(self, dY):
return dY * (2 / 3 * 1.7159) * (1 - self.sig * self.sig)
class Relu(Layer):
def forward(self, X):
self.cond = X >= 0