This commit is contained in:
Connor Olding 2017-01-10 13:21:26 -08:00
parent e28eb0cf06
commit 5b2a293b8c

View file

@ -18,6 +18,13 @@ class Loss:
d = self.df(r) d = self.df(r)
return d / len(d) return d / len(d)
class Squared(Loss):
def f(self, r):
return np.square(r)
def df(self, r):
return 2 * r
class SquaredHalved(Loss): class SquaredHalved(Loss):
def f(self, r): def f(self, r):
return np.square(r) / 2 return np.square(r) / 2
@ -144,18 +151,18 @@ class Layer:
def multi(self, B): def multi(self, B):
assert len(B) == 1, self assert len(B) == 1, self
return self.F(B[0].T).T return self.F(B[0])
def dmulti(self, dB): def dmulti(self, dB):
if len(dB) == 1: if len(dB) == 1:
return self.dF(dB[0].T).T return self.dF(dB[0])
else: else:
dX = None dX = None
for dY in dB: for dY in dB:
if dX is None: if dX is None:
dX = self.dF(dY.T).T dX = self.dF(dY)
else: else:
dX += self.dF(dY.T).T dX += self.dF(dY)
return dX return dX
# general utility methods: # general utility methods:
@ -300,10 +307,10 @@ class Dense(Layer):
self.W = W self.W = W
self.dW = dW self.dW = dW
self.coeffs = self.W[:self.nW].reshape(outs, ins) self.coeffs = self.W[:self.nW].reshape(ins, outs)
self.biases = self.W[self.nW:].reshape(outs, 1) self.biases = self.W[self.nW:].reshape(1, outs)
self.dcoeffs = self.dW[:self.nW].reshape(outs, ins) self.dcoeffs = self.dW[:self.nW].reshape(ins, outs)
self.dbiases = self.dW[self.nW:].reshape(outs) self.dbiases = self.dW[self.nW:].reshape(1, outs)
# he_normal initialization # he_normal initialization
s = np.sqrt(2 / ins) s = np.sqrt(2 / ins)
@ -321,14 +328,14 @@ class Dense(Layer):
def F(self, X): def F(self, X):
self.X = X self.X = X
Y = self.coeffs.dot(X) \ Y = X.dot(self.coeffs) \
+ self.biases + self.biases
return Y return Y
def dF(self, dY): def dF(self, dY):
dX = self.coeffs.T.dot(dY) dX = dY.dot(self.coeffs.T)
self.dcoeffs[:] = dY.dot(self.X.T) self.dcoeffs[:] = self.X.T.dot(dY)
self.dbiases[:] = np.sum(dY, axis=1) self.dbiases[:] = np.sum(dY, axis=0, keepdims=True)
return dX return dX
# Model # Model
@ -411,8 +418,8 @@ class Model:
for i in range(len(denses)): for i in range(len(denses)):
a, b = i, i + 1 a, b = i, i + 1
b_name = "dense_{}".format(b) b_name = "dense_{}".format(b)
denses[a].coeffs = weights[b_name+'_W'].T denses[a].coeffs = weights[b_name+'_W']
denses[a].biases = np.expand_dims(weights[b_name+'_b'], -1) denses[a].biases = np.expand_dims(weights[b_name+'_b'], 0)
def save_weights(self, fn, overwrite=False): def save_weights(self, fn, overwrite=False):
raise NotImplementedError("unimplemented", self) raise NotImplementedError("unimplemented", self)
@ -433,7 +440,7 @@ if __name__ == '__main__':
# style of resnet # style of resnet
# only one is implemented so far # only one is implemented so far
parallel_style = 'batchless', parallel_style = 'batchless',
activation = 'gelu', activation = 'relu',
optim = 'adam', optim = 'adam',
nesterov = False, # only used with SGD or Adam nesterov = False, # only used with SGD or Adam
@ -465,8 +472,7 @@ if __name__ == '__main__':
return inputs, outputs return inputs, outputs
inputs, outputs = read_data("ml/cie_mlp_data.npz") inputs, outputs = read_data("ml/cie_mlp_data.npz")
valid_data = read_data("ml/cie_mlp_vdata.npz") valid_inputs, valid_outputs = read_data("ml/cie_mlp_vdata.npz")
valid_inputs, valid_outputs = valid_data
# Our Test Model # Our Test Model
@ -520,8 +526,12 @@ if __name__ == '__main__':
else: else:
raise Exception('unknown optimizer', config.optim) raise Exception('unknown optimizer', config.optim)
assert config.loss == 'mse', 'unknown loss function' if config.loss == 'mse':
loss = SquaredHalved() loss = Squared()
elif config.loss == 'mshe': # mushy
loss = SquaredHalved()
else:
raise Exception('unknown objective', config.loss)
LR = config.LR LR = config.LR
LRprod = 0.5**(1/config.LR_halve_every) LRprod = 0.5**(1/config.LR_halve_every)
@ -532,12 +542,14 @@ if __name__ == '__main__':
predicted = model.forward(inputs / x_scale) predicted = model.forward(inputs / x_scale)
residual = predicted - outputs / y_scale residual = predicted - outputs / y_scale
err = loss.mean(residual) err = loss.mean(residual)
print("train loss: {:10.6f}".format(err)) print("train loss: {:11.7f}".format(err))
print("improvement: {:+7.2f}%".format((0.0007031 / err - 1) * 100))
predicted = model.forward(valid_inputs / x_scale) predicted = model.forward(valid_inputs / x_scale)
residual = predicted - valid_outputs / y_scale residual = predicted - valid_outputs / y_scale
err = loss.mean(residual) err = loss.mean(residual)
print("valid loss: {:10.6f}".format(err)) print("valid loss: {:11.7f}".format(err))
print("improvement: {:+7.2f}%".format((0.0007159 / err - 1) * 100))
for i in range(config.restarts + 1): for i in range(config.restarts + 1):
measure_loss() measure_loss()
@ -554,6 +566,8 @@ if __name__ == '__main__':
shuffled_inputs = inputs[indices] / x_scale shuffled_inputs = inputs[indices] / x_scale
shuffled_outputs = outputs[indices] / y_scale shuffled_outputs = outputs[indices] / y_scale
optim.alpha = LR * LRprod**e
cumsum_loss = 0 cumsum_loss = 0
for b in range(batch_count): for b in range(batch_count):
bi = b * config.batch_size bi = b * config.batch_size
@ -561,16 +575,9 @@ if __name__ == '__main__':
batch_outputs = shuffled_outputs[bi:bi+config.batch_size] batch_outputs = shuffled_outputs[bi:bi+config.batch_size]
predicted = model.forward(batch_inputs) predicted = model.forward(batch_inputs)
dW = model.backward(np.ones_like(predicted))
residual = predicted - batch_outputs residual = predicted - batch_outputs
dW = model.backward(loss.dmean(residual))
# TODO: try something like this instead? optim.update(dW, model.W)
#err_dW = np.dot(loss.dmean(residual), np.expand_dims(dW, 0))
err_dW = loss.df(residual) * dW / len(residual)
err_dW = np.sum(err_dW, axis=0)
optim.alpha = LR * LRprod**e
optim.update(err_dW, model.W)
# note: we don't actually need this for training, only monitoring. # note: we don't actually need this for training, only monitoring.
cumsum_loss += loss.mean(residual) cumsum_loss += loss.mean(residual)