diff --git a/onn/experimental.py b/onn/experimental.py new file mode 100644 index 0000000..27f1a5a --- /dev/null +++ b/onn/experimental.py @@ -0,0 +1,76 @@ +from .float import * +from .layer import Layer +from .loss import Loss +from .optimizer import Optimizer +from .ritual import Ritual +from .learner import Learner +from .parametric import Dense +from .regularizer import Regularizer + + +class AddSignClip(Optimizer): + # paper: https://arxiv.org/abs/1709.07417 + # with heavy-handed gradient clipping of my own concoction. + + def __init__(self, lr=0.01, mu=0.9, alpha=1.0, clip=1.0): + self.mu = _f(mu) + self.alpha = _f(alpha) + self.clip = _f(clip) + + super().__init__(lr) + + def reset(self): + self.accum = None + + def compute(self, dW, W): + if self.accum is None: + self.accum = np.zeros_like(dW) + + self.accum[:] = self.accum * self.mu + dW + + signed = np.sign(dW) * np.sign(self.accum) + #signed *= decay + + inter = dW * (self.alpha + signed) + + total_norm = np.linalg.norm(inter) + # based on softplus. + inter /= np.log(1 + np.exp(total_norm / self.clip - 1)) + 1 + + return -self.lr * inter + + +class PowerSignClip(Optimizer): + # paper: https://arxiv.org/abs/1709.07417 + # with heavy-handed gradient clipping of my own concoction. + + def __init__(self, lr=0.01, mu=0.9, alpha=np.e, clip=1.0): + self.mu = _f(mu) + self.alpha = _f(alpha) + self.use_exp = np.isclose(self.alpha, _f(np.e)) + self.clip = _f(clip) + + super().__init__(lr) + + def reset(self): + self.accum = None + + def compute(self, dW, W): + if self.accum is None: + self.accum = np.zeros_like(dW) + + self.accum[:] = self.accum * self.mu + dW + + signed = np.sign(dW) * np.sign(self.accum) + #signed *= decay + + if self.use_exp: + inter = dW * np.exp(signed) + else: + inter = dW * np.power(self.alpha, signed) + + total_norm = np.linalg.norm(inter) + # based on softplus. + inter /= np.log(1 + np.exp(total_norm / self.clip - 1)) + 1 + + return -self.lr * inter