rewrite momentum optimizer
the original version wasn't very useful as it would typically diverge.
This commit is contained in:
parent
5389ae7fca
commit
2a4f92154d
1 changed files with 8 additions and 9 deletions
|
@ -26,26 +26,25 @@ def filter_gradients(accum, grads, param):
|
||||||
|
|
||||||
class Momentum(Optimizer):
|
class Momentum(Optimizer):
|
||||||
def __init__(self, lr=0.01, mu=0.9, nesterov=False):
|
def __init__(self, lr=0.01, mu=0.9, nesterov=False):
|
||||||
self.mu = _f(mu) # momentum
|
self.mu = _f(mu)
|
||||||
self.nesterov = bool(nesterov)
|
self.nesterov = bool(nesterov)
|
||||||
|
|
||||||
super().__init__(lr)
|
super().__init__(lr)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.Vprev = None
|
self.accum = None
|
||||||
|
|
||||||
super().reset()
|
super().reset()
|
||||||
|
|
||||||
def compute(self, dW, W):
|
def compute(self, dW, W):
|
||||||
if self.Vprev is None:
|
if self.accum is None:
|
||||||
self.Vprev = np.copy(dW)
|
self.accum = np.zeros_like(dW)
|
||||||
|
|
||||||
V = self.mu * self.Vprev - self.lr * dW
|
self.accum[:] = self.accum * self.mu + dW
|
||||||
self.Vprev[:] = V
|
|
||||||
if self.nesterov:
|
if self.nesterov:
|
||||||
return self.mu * V - self.lr * dW
|
return -self.lr * (self.accum * self.mu + dW)
|
||||||
|
else:
|
||||||
return V
|
return -self.lr * self.accum
|
||||||
|
|
||||||
|
|
||||||
class Adadelta(Optimizer):
|
class Adadelta(Optimizer):
|
||||||
|
|
Loading…
Reference in a new issue