rewrite momentum optimizer

the original version wasn't very useful as it would typically diverge.
This commit is contained in:
Connor Olding 2019-03-22 12:55:13 +01:00
parent 5389ae7fca
commit 2a4f92154d

View file

@ -26,26 +26,25 @@ def filter_gradients(accum, grads, param):
class Momentum(Optimizer): class Momentum(Optimizer):
def __init__(self, lr=0.01, mu=0.9, nesterov=False): def __init__(self, lr=0.01, mu=0.9, nesterov=False):
self.mu = _f(mu) # momentum self.mu = _f(mu)
self.nesterov = bool(nesterov) self.nesterov = bool(nesterov)
super().__init__(lr) super().__init__(lr)
def reset(self): def reset(self):
self.Vprev = None self.accum = None
super().reset() super().reset()
def compute(self, dW, W): def compute(self, dW, W):
if self.Vprev is None: if self.accum is None:
self.Vprev = np.copy(dW) self.accum = np.zeros_like(dW)
V = self.mu * self.Vprev - self.lr * dW self.accum[:] = self.accum * self.mu + dW
self.Vprev[:] = V
if self.nesterov: if self.nesterov:
return self.mu * V - self.lr * dW return -self.lr * (self.accum * self.mu + dW)
else:
return V return -self.lr * self.accum
class Adadelta(Optimizer): class Adadelta(Optimizer):