diff --git a/onn/activation.py b/onn/activation.py index 74e87be..826666a 100644 --- a/onn/activation.py +++ b/onn/activation.py @@ -3,7 +3,10 @@ import numpy as np # just for speed, not strictly essential: from scipy.special import expit as sigmoid -from .float import _f, _1 +# needed for GELU: +from scipy.special import erf + +from .float import _f, _1, _inv2, _invsqrt2, _invsqrt2pi from .layer_base import * @@ -120,6 +123,17 @@ class GeluApprox(Swish): super().__init__(_f(1.704)) +class Gelu(Activation): + def forward(self, X): + self.X = X + self.cdf = _inv2 * (_1 + erf(X * _invsqrt2)) + return X * self.cdf + + def backward(self, dY): + return dY * (self.cdf \ + + np.exp(-_inv2 * np.square(self.X)) * self.X * _invsqrt2pi) + + class Softmax(Activation): def forward(self, X): alpha = np.max(X, axis=-1, keepdims=True) diff --git a/onn/float.py b/onn/float.py index 079bcac..b59a8d2 100644 --- a/onn/float.py +++ b/onn/float.py @@ -15,4 +15,5 @@ _2 = _f(2) _inv2 = _f(1/2) _sqrt2 = _f(np.sqrt(2)) _invsqrt2 = _f(1/np.sqrt(2)) +_invsqrt2pi = _f(1/np.sqrt(2 * np.pi)) _pi = _f(np.pi)