add exact GELU activation

This commit is contained in:
Connor Olding 2018-03-22 12:02:17 +01:00
parent 7161f983ab
commit a685db1489
2 changed files with 16 additions and 1 deletions

View File

@ -3,7 +3,10 @@ import numpy as np
# just for speed, not strictly essential:
from scipy.special import expit as sigmoid
from .float import _f, _1
# needed for GELU:
from scipy.special import erf
from .float import _f, _1, _inv2, _invsqrt2, _invsqrt2pi
from .layer_base import *
@ -120,6 +123,17 @@ class GeluApprox(Swish):
super().__init__(_f(1.704))
class Gelu(Activation):
def forward(self, X):
self.X = X
self.cdf = _inv2 * (_1 + erf(X * _invsqrt2))
return X * self.cdf
def backward(self, dY):
return dY * (self.cdf \
+ np.exp(-_inv2 * np.square(self.X)) * self.X * _invsqrt2pi)
class Softmax(Activation):
def forward(self, X):
alpha = np.max(X, axis=-1, keepdims=True)

View File

@ -15,4 +15,5 @@ _2 = _f(2)
_inv2 = _f(1/2)
_sqrt2 = _f(np.sqrt(2))
_invsqrt2 = _f(1/np.sqrt(2))
_invsqrt2pi = _f(1/np.sqrt(2 * np.pi))
_pi = _f(np.pi)