use MomentumClip in warmup for stability
This commit is contained in:
parent
7878f94f43
commit
dcbaef3032
1 changed files with 1 additions and 1 deletions
2
onn.py
2
onn.py
|
@ -1273,7 +1273,7 @@ def run(program, args=None):
|
|||
# use plain SGD in warmup to prevent (or possibly cause?) numeric issues
|
||||
temp_optim = learner.optim
|
||||
temp_loss = model.loss
|
||||
learner.optim = Optimizer(lr=0.001)
|
||||
learner.optim = MomentumClip(lr=0.01, mu=0)
|
||||
ritual.loss = Absolute() # less likely to blow up; more general
|
||||
|
||||
# NOTE: experiment: trying const batches and batch_size
|
||||
|
|
Loading…
Reference in a new issue