add experimental soft-clipped optimizers

2018-03-07 01:52:26 +01:00 · 2018-03-07 01:52:26 +01:00 · 713fd2adbe
commit 713fd2adbe
parent 3aa3b70a9f
1 changed files with 76 additions and 0 deletions
--- a/onn/experimental.py
+++ b/onn/experimental.py
@ -0,0 +1,76 @@
+from .float import *
+from .layer import Layer
+from .loss import Loss
+from .optimizer import Optimizer
+from .ritual import Ritual
+from .learner import Learner
+from .parametric import Dense
+from .regularizer import Regularizer
+
+
+class AddSignClip(Optimizer):
+    # paper: https://arxiv.org/abs/1709.07417
+    # with heavy-handed gradient clipping of my own concoction.
+
+    def __init__(self, lr=0.01, mu=0.9, alpha=1.0, clip=1.0):
+        self.mu = _f(mu)
+        self.alpha = _f(alpha)
+        self.clip = _f(clip)
+
+        super().__init__(lr)
+
+    def reset(self):
+        self.accum = None
+
+    def compute(self, dW, W):
+        if self.accum is None:
+            self.accum = np.zeros_like(dW)
+
+        self.accum[:] = self.accum * self.mu + dW
+
+        signed = np.sign(dW) * np.sign(self.accum)
+        #signed *= decay
+
+        inter = dW * (self.alpha + signed)
+
+        total_norm = np.linalg.norm(inter)
+        # based on softplus.
+        inter /= np.log(1 + np.exp(total_norm / self.clip - 1)) + 1
+
+        return -self.lr * inter
+
+
+class PowerSignClip(Optimizer):
+    # paper: https://arxiv.org/abs/1709.07417
+    # with heavy-handed gradient clipping of my own concoction.
+
+    def __init__(self, lr=0.01, mu=0.9, alpha=np.e, clip=1.0):
+        self.mu = _f(mu)
+        self.alpha = _f(alpha)
+        self.use_exp = np.isclose(self.alpha, _f(np.e))
+        self.clip = _f(clip)
+
+        super().__init__(lr)
+
+    def reset(self):
+        self.accum = None
+
+    def compute(self, dW, W):
+        if self.accum is None:
+            self.accum = np.zeros_like(dW)
+
+        self.accum[:] = self.accum * self.mu + dW
+
+        signed = np.sign(dW) * np.sign(self.accum)
+        #signed *= decay
+
+        if self.use_exp:
+            inter = dW * np.exp(signed)
+        else:
+            inter = dW * np.power(self.alpha, signed)
+
+        total_norm = np.linalg.norm(inter)
+        # based on softplus.
+        inter /= np.log(1 + np.exp(total_norm / self.clip - 1)) + 1
+
+        return -self.lr * inter