add weight regularization
This commit is contained in:
parent
0619163447
commit
a448ff3e8a
|
@ -147,13 +147,13 @@ class Denses(Layer): # TODO: rename?
|
||||||
'b': 'biases',
|
'b': 'biases',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, dim, init=init_he_uniform, axis=-1):
|
def __init__(self, dim, init=init_he_uniform, reg_w=None, reg_b=None, axis=-1):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.dim = int(dim)
|
self.dim = int(dim)
|
||||||
self.weight_init = init
|
self.weight_init = init
|
||||||
self.axis = int(axis)
|
self.axis = int(axis)
|
||||||
self.coeffs = self._new_weights('coeffs', init=init)
|
self.coeffs = self._new_weights('coeffs', init=init, regularizer=reg_w)
|
||||||
self.biases = self._new_weights('biases', init=init_zeros)
|
self.biases = self._new_weights('biases', init=init_zeros, regularizer=reg_b)
|
||||||
|
|
||||||
def make_shape(self, parent):
|
def make_shape(self, parent):
|
||||||
shape = parent.output_shape
|
shape = parent.output_shape
|
||||||
|
|
|
@ -105,6 +105,32 @@ class Absolute(ResidualLoss):
|
||||||
def df(self, r):
|
def df(self, r):
|
||||||
return np.sign(r)
|
return np.sign(r)
|
||||||
|
|
||||||
|
# Regularizers {{{1
|
||||||
|
|
||||||
|
class Regularizer:
|
||||||
|
pass
|
||||||
|
|
||||||
|
class L1L2(Regularizer):
|
||||||
|
def __init__(self, l1=0.0, l2=0.0):
|
||||||
|
self.l1 = _f(l1)
|
||||||
|
self.l2 = _f(l2)
|
||||||
|
|
||||||
|
def forward(self, X):
|
||||||
|
f = 0.0
|
||||||
|
if self.l1:
|
||||||
|
f += np.sum(self.l1 * np.abs(X))
|
||||||
|
if self.l2:
|
||||||
|
f += np.sum(self.l2 * np.square(X))
|
||||||
|
return f
|
||||||
|
|
||||||
|
def backward(self, X):
|
||||||
|
df = np.zeros_like(X)
|
||||||
|
if self.l1:
|
||||||
|
df += self.l1 * np.sign(X)
|
||||||
|
if self.l2:
|
||||||
|
df += self.l2 * 2 * X
|
||||||
|
return df
|
||||||
|
|
||||||
# Optimizers {{{1
|
# Optimizers {{{1
|
||||||
|
|
||||||
class Optimizer:
|
class Optimizer:
|
||||||
|
@ -281,6 +307,7 @@ class Weights:
|
||||||
self.shape = None
|
self.shape = None
|
||||||
self.init = None
|
self.init = None
|
||||||
self.allocator = None
|
self.allocator = None
|
||||||
|
self.regularizer = None
|
||||||
|
|
||||||
self.configure(**kwargs)
|
self.configure(**kwargs)
|
||||||
|
|
||||||
|
@ -308,6 +335,21 @@ class Weights:
|
||||||
self.f = f.reshape(self.shape)
|
self.f = f.reshape(self.shape)
|
||||||
self.g = g.reshape(self.shape)
|
self.g = g.reshape(self.shape)
|
||||||
|
|
||||||
|
def forward(self):
|
||||||
|
if self.regularizer is None:
|
||||||
|
return 0.0
|
||||||
|
return self.regularizer.forward(self.f)
|
||||||
|
|
||||||
|
def backward(self):
|
||||||
|
if self.regularizer is None:
|
||||||
|
return 0.0
|
||||||
|
return self.regularizer.backward(self.f)
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
if self.regularizer is None:
|
||||||
|
return
|
||||||
|
self.g += self.regularizer.backward(self.f)
|
||||||
|
|
||||||
# Abstract Layers {{{1
|
# Abstract Layers {{{1
|
||||||
|
|
||||||
class Layer:
|
class Layer:
|
||||||
|
@ -575,12 +617,12 @@ class Dense(Layer):
|
||||||
'b': 'biases',
|
'b': 'biases',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, dim, init=init_he_uniform):
|
def __init__(self, dim, init=init_he_uniform, reg_w=None, reg_b=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.dim = int(dim)
|
self.dim = int(dim)
|
||||||
self.output_shape = (dim,)
|
self.output_shape = (dim,)
|
||||||
self.coeffs = self._new_weights('coeffs', init=init)
|
self.coeffs = self._new_weights('coeffs', init=init, regularizer=reg_w)
|
||||||
self.biases = self._new_weights('biases', init=init_zeros)
|
self.biases = self._new_weights('biases', init=init_zeros, regularizer=reg_b)
|
||||||
|
|
||||||
def make_shape(self, parent):
|
def make_shape(self, parent):
|
||||||
shape = parent.output_shape
|
shape = parent.output_shape
|
||||||
|
@ -668,6 +710,18 @@ class Model:
|
||||||
values[node] = node.backpropagate(values)
|
values[node] = node.backpropagate(values)
|
||||||
return self.dW
|
return self.dW
|
||||||
|
|
||||||
|
def regulate_forward(self):
|
||||||
|
loss = _0
|
||||||
|
for node in self.ordered_nodes:
|
||||||
|
for k, w in node.weights.items():
|
||||||
|
loss += w.forward()
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def regulate(self):
|
||||||
|
for node in self.ordered_nodes:
|
||||||
|
for k, w in node.weights.items():
|
||||||
|
w.update()
|
||||||
|
|
||||||
def load_weights(self, fn):
|
def load_weights(self, fn):
|
||||||
# seemingly compatible with keras' Dense layers.
|
# seemingly compatible with keras' Dense layers.
|
||||||
import h5py
|
import h5py
|
||||||
|
@ -726,6 +780,7 @@ class Ritual: # i'm just making up names at this point
|
||||||
self.learner = learner if learner is not None else Learner(Optimizer())
|
self.learner = learner if learner is not None else Learner(Optimizer())
|
||||||
self.loss = loss if loss is not None else Squared()
|
self.loss = loss if loss is not None else Squared()
|
||||||
self.mloss = mloss if mloss is not None else loss
|
self.mloss = mloss if mloss is not None else loss
|
||||||
|
self.model = None
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.learner.reset(optim=True)
|
self.learner.reset(optim=True)
|
||||||
|
@ -735,12 +790,16 @@ class Ritual: # i'm just making up names at this point
|
||||||
def measure(self, p, y):
|
def measure(self, p, y):
|
||||||
return self.mloss.forward(p, y)
|
return self.mloss.forward(p, y)
|
||||||
|
|
||||||
def derive(self, p, y):
|
def forward(self, p, y):
|
||||||
|
return self.loss.forward(p, y) + self.model.regulate_forward()
|
||||||
|
|
||||||
|
def backward(self, p, y):
|
||||||
return self.loss.backward(p, y)
|
return self.loss.backward(p, y)
|
||||||
|
|
||||||
def learn(self, inputs, outputs):
|
def learn(self, inputs, outputs):
|
||||||
predicted = self.model.forward(inputs)
|
predicted = self.model.forward(inputs)
|
||||||
self.model.backward(self.derive(predicted, outputs))
|
self.model.backward(self.backward(predicted, outputs))
|
||||||
|
self.model.regulate()
|
||||||
return predicted
|
return predicted
|
||||||
|
|
||||||
def update(self):
|
def update(self):
|
||||||
|
@ -789,7 +848,7 @@ class Ritual: # i'm just making up names at this point
|
||||||
self.update()
|
self.update()
|
||||||
|
|
||||||
if return_losses == 'both':
|
if return_losses == 'both':
|
||||||
batch_loss = self.loss.forward(predicted, batch_outputs)
|
batch_loss = self.forward(predicted, batch_outputs)
|
||||||
if np.isnan(batch_loss):
|
if np.isnan(batch_loss):
|
||||||
raise Exception("nan")
|
raise Exception("nan")
|
||||||
losses.append(batch_loss)
|
losses.append(batch_loss)
|
||||||
|
@ -843,7 +902,7 @@ class Ritual: # i'm just making up names at this point
|
||||||
self.update()
|
self.update()
|
||||||
|
|
||||||
if return_losses == 'both':
|
if return_losses == 'both':
|
||||||
batch_loss = self.loss.forward(predicted, batch_outputs)
|
batch_loss = self.forward(predicted, batch_outputs)
|
||||||
if np.isnan(batch_loss):
|
if np.isnan(batch_loss):
|
||||||
raise Exception("nan")
|
raise Exception("nan")
|
||||||
losses.append(batch_loss)
|
losses.append(batch_loss)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user