add exact GELU activation
This commit is contained in:
parent
7161f983ab
commit
a685db1489
2 changed files with 16 additions and 1 deletions
|
@ -3,7 +3,10 @@ import numpy as np
|
||||||
# just for speed, not strictly essential:
|
# just for speed, not strictly essential:
|
||||||
from scipy.special import expit as sigmoid
|
from scipy.special import expit as sigmoid
|
||||||
|
|
||||||
from .float import _f, _1
|
# needed for GELU:
|
||||||
|
from scipy.special import erf
|
||||||
|
|
||||||
|
from .float import _f, _1, _inv2, _invsqrt2, _invsqrt2pi
|
||||||
from .layer_base import *
|
from .layer_base import *
|
||||||
|
|
||||||
|
|
||||||
|
@ -120,6 +123,17 @@ class GeluApprox(Swish):
|
||||||
super().__init__(_f(1.704))
|
super().__init__(_f(1.704))
|
||||||
|
|
||||||
|
|
||||||
|
class Gelu(Activation):
|
||||||
|
def forward(self, X):
|
||||||
|
self.X = X
|
||||||
|
self.cdf = _inv2 * (_1 + erf(X * _invsqrt2))
|
||||||
|
return X * self.cdf
|
||||||
|
|
||||||
|
def backward(self, dY):
|
||||||
|
return dY * (self.cdf \
|
||||||
|
+ np.exp(-_inv2 * np.square(self.X)) * self.X * _invsqrt2pi)
|
||||||
|
|
||||||
|
|
||||||
class Softmax(Activation):
|
class Softmax(Activation):
|
||||||
def forward(self, X):
|
def forward(self, X):
|
||||||
alpha = np.max(X, axis=-1, keepdims=True)
|
alpha = np.max(X, axis=-1, keepdims=True)
|
||||||
|
|
|
@ -15,4 +15,5 @@ _2 = _f(2)
|
||||||
_inv2 = _f(1/2)
|
_inv2 = _f(1/2)
|
||||||
_sqrt2 = _f(np.sqrt(2))
|
_sqrt2 = _f(np.sqrt(2))
|
||||||
_invsqrt2 = _f(1/np.sqrt(2))
|
_invsqrt2 = _f(1/np.sqrt(2))
|
||||||
|
_invsqrt2pi = _f(1/np.sqrt(2 * np.pi))
|
||||||
_pi = _f(np.pi)
|
_pi = _f(np.pi)
|
||||||
|
|
Loading…
Reference in a new issue