diff --git a/onn.py b/onn.py index 278a832..26e8c0c 100755 --- a/onn.py +++ b/onn.py @@ -1028,6 +1028,8 @@ def optim_from_config(config): raise Exception("yellowfin only uses one decay term.") beta = np.exp(-1/d1) optim = YellowFin(beta=beta) + elif config.optim in ('ag', 'adagrad'): + optim = Adagrad() elif config.optim in ('rms', 'rmsprop'): d2 = config.optim_decay2 if 'optim_decay2' in config else 99.5 mu = np.exp(-1/d2) diff --git a/onn_core.py b/onn_core.py index 54fa015..27aa6a0 100644 --- a/onn_core.py +++ b/onn_core.py @@ -291,7 +291,7 @@ class Optimizer: def update(self, dW, W): W += self.compute(dW, W) -# the following optimizers are blatantly lifted from tiny-dnn: +# some of the the following optimizers are blatantly lifted from tiny-dnn: # https://github.com/tiny-dnn/tiny-dnn/blob/master/tiny_dnn/optimizers/optimizer.h class Momentum(Optimizer): @@ -315,10 +315,25 @@ class Momentum(Optimizer): return V +class Adagrad(Optimizer): + def __init__(self, lr=0.01, eps=1e-8): + self.eps = _f(eps) + + super().__init__(lr) + + def reset(self): + self.g = None + + def compute(self, dW, W): + if self.g is None: + self.g = np.zeros_like(dW) + + self.g += np.square(dW) + return -self.lr * dW / (np.sqrt(self.g) + self.eps) + class RMSprop(Optimizer): # RMSprop generalizes* Adagrad, etc. - # TODO: verify this is correct: # * RMSprop == Adagrad when # RMSprop.mu == 1