use the constant recommended by the paper

This commit is contained in:
Connor Olding 2019-02-06 18:41:55 +01:00
parent 594483c6d4
commit 2cfcc9062e

View File

@ -120,7 +120,7 @@ class GeluApprox(Swish):
# plot: https://www.desmos.com/calculator/ydzgtccsld
def __init__(self):
super().__init__(_f(1.704))
super().__init__(_f(1.702))
class Gelu(Activation):