.
This commit is contained in:
parent
5f87c7a56b
commit
06eb28b23e
2 changed files with 77 additions and 82 deletions
16
optim_nn.py
16
optim_nn.py
|
@ -85,7 +85,7 @@ class LayerNorm(Layer):
|
||||||
self.gamma[:] = 1
|
self.gamma[:] = 1
|
||||||
self.beta[:] = 0
|
self.beta[:] = 0
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.mean = X.mean(0)
|
self.mean = X.mean(0)
|
||||||
self.center = X - self.mean
|
self.center = X - self.mean
|
||||||
self.var = self.center.var(0) + self.eps
|
self.var = self.center.var(0) + self.eps
|
||||||
|
@ -96,7 +96,7 @@ class LayerNorm(Layer):
|
||||||
return self.gamma * self.Xnorm + self.beta
|
return self.gamma * self.Xnorm + self.beta
|
||||||
return self.Xnorm
|
return self.Xnorm
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
length = dY.shape[0]
|
length = dY.shape[0]
|
||||||
|
|
||||||
if self.affine:
|
if self.affine:
|
||||||
|
@ -161,14 +161,14 @@ class Denses(Layer): # TODO: rename?
|
||||||
|
|
||||||
self.std = np.std(self.W)
|
self.std = np.std(self.W)
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.X = X
|
self.X = X
|
||||||
if self.axis == 0:
|
if self.axis == 0:
|
||||||
return np.einsum('ixj,xjk->ikj', X, self.coeffs) + self.biases
|
return np.einsum('ixj,xjk->ikj', X, self.coeffs) + self.biases
|
||||||
elif self.axis == 1:
|
elif self.axis == 1:
|
||||||
return np.einsum('ijx,jxk->ijk', X, self.coeffs) + self.biases
|
return np.einsum('ijx,jxk->ijk', X, self.coeffs) + self.biases
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
self.dbiases[:] = dY.sum(0, keepdims=True)
|
self.dbiases[:] = dY.sum(0, keepdims=True)
|
||||||
if self.axis == 0:
|
if self.axis == 0:
|
||||||
self.dcoeffs[:] = np.einsum('ixj,ikj->xjk', self.X, dY)
|
self.dcoeffs[:] = np.einsum('ixj,ikj->xjk', self.X, dY)
|
||||||
|
@ -183,12 +183,12 @@ class DenseOneLess(Dense):
|
||||||
ins, outs = self.input_shape[0], self.output_shape[0]
|
ins, outs = self.input_shape[0], self.output_shape[0]
|
||||||
assert ins == outs, (ins, outs)
|
assert ins == outs, (ins, outs)
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
np.fill_diagonal(self.coeffs, 0)
|
np.fill_diagonal(self.coeffs, 0)
|
||||||
self.X = X
|
self.X = X
|
||||||
return X.dot(self.coeffs) + self.biases
|
return X.dot(self.coeffs) + self.biases
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
self.dcoeffs[:] = self.X.T.dot(dY)
|
self.dcoeffs[:] = self.X.T.dot(dY)
|
||||||
self.dbiases[:] = dY.sum(0, keepdims=True)
|
self.dbiases[:] = dY.sum(0, keepdims=True)
|
||||||
np.fill_diagonal(self.dcoeffs, 0)
|
np.fill_diagonal(self.dcoeffs, 0)
|
||||||
|
@ -203,7 +203,7 @@ class CosineDense(Dense):
|
||||||
|
|
||||||
eps = 1e-4
|
eps = 1e-4
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.X = X
|
self.X = X
|
||||||
self.X_norm = np.sqrt(np.square(X).sum(-1, keepdims=True) \
|
self.X_norm = np.sqrt(np.square(X).sum(-1, keepdims=True) \
|
||||||
+ 1 + self.eps)
|
+ 1 + self.eps)
|
||||||
|
@ -213,7 +213,7 @@ class CosineDense(Dense):
|
||||||
Y = self.dot / (self.X_norm * self.W_norm)
|
Y = self.dot / (self.X_norm * self.W_norm)
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
ddot = dY / self.X_norm / self.W_norm
|
ddot = dY / self.X_norm / self.W_norm
|
||||||
dX_norm = -(dY * self.dot / self.W_norm).sum(-1, keepdims=True) / self.X_norm**2
|
dX_norm = -(dY * self.dot / self.W_norm).sum(-1, keepdims=True) / self.X_norm**2
|
||||||
dW_norm = -(dY * self.dot / self.X_norm).sum( 0, keepdims=True) / self.W_norm**2
|
dW_norm = -(dY * self.dot / self.X_norm).sum( 0, keepdims=True) / self.W_norm**2
|
||||||
|
|
143
optim_nn_core.py
143
optim_nn_core.py
|
@ -21,6 +21,9 @@ _sqrt2 = _f(np.sqrt(2))
|
||||||
_invsqrt2 = _f(1/np.sqrt(2))
|
_invsqrt2 = _f(1/np.sqrt(2))
|
||||||
_pi = _f(np.pi)
|
_pi = _f(np.pi)
|
||||||
|
|
||||||
|
class LayerIncompatibility(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
# Initializations {{{1
|
# Initializations {{{1
|
||||||
|
|
||||||
# note: these are currently only implemented for 2D shapes.
|
# note: these are currently only implemented for 2D shapes.
|
||||||
|
@ -52,13 +55,13 @@ class CategoricalCrossentropy(Loss):
|
||||||
def __init__(self, eps=1e-6):
|
def __init__(self, eps=1e-6):
|
||||||
self.eps = _f(eps)
|
self.eps = _f(eps)
|
||||||
|
|
||||||
def F(self, p, y):
|
def forward(self, p, y):
|
||||||
# TODO: assert dimensionality and p > 0 (if not self.unsafe?)
|
# TODO: assert dimensionality and p > 0 (if not self.unsafe?)
|
||||||
p = np.clip(p, self.eps, 1 - self.eps)
|
p = np.clip(p, self.eps, 1 - self.eps)
|
||||||
f = np.sum(-y * np.log(p) - (1 - y) * np.log(1 - p), axis=-1)
|
f = np.sum(-y * np.log(p) - (1 - y) * np.log(1 - p), axis=-1)
|
||||||
return np.mean(f)
|
return np.mean(f)
|
||||||
|
|
||||||
def dF(self, p, y):
|
def backward(self, p, y):
|
||||||
p = np.clip(p, self.eps, 1 - self.eps)
|
p = np.clip(p, self.eps, 1 - self.eps)
|
||||||
df = (p - y) / (p * (1 - p))
|
df = (p - y) / (p * (1 - p))
|
||||||
return df / len(y)
|
return df / len(y)
|
||||||
|
@ -68,18 +71,18 @@ class Accuracy(Loss):
|
||||||
# utilizes max(), so it cannot be used for gradient descent.
|
# utilizes max(), so it cannot be used for gradient descent.
|
||||||
# use CategoricalCrossentropy for that instead.
|
# use CategoricalCrossentropy for that instead.
|
||||||
|
|
||||||
def F(self, p, y):
|
def forward(self, p, y):
|
||||||
correct = np.argmax(p, axis=-1) == np.argmax(y, axis=-1)
|
correct = np.argmax(p, axis=-1) == np.argmax(y, axis=-1)
|
||||||
return np.mean(correct)
|
return np.mean(correct)
|
||||||
|
|
||||||
def dF(self, p, y):
|
def backward(self, p, y):
|
||||||
raise NotImplementedError("cannot take the gradient of Accuracy")
|
raise NotImplementedError("cannot take the gradient of Accuracy")
|
||||||
|
|
||||||
class ResidualLoss(Loss):
|
class ResidualLoss(Loss):
|
||||||
def F(self, p, y): # mean
|
def forward(self, p, y):
|
||||||
return np.mean(self.f(p - y))
|
return np.mean(self.f(p - y))
|
||||||
|
|
||||||
def dF(self, p, y): # dmean
|
def backward(self, p, y):
|
||||||
ret = self.df(p - y) / len(y)
|
ret = self.df(p - y) / len(y)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
@ -218,7 +221,7 @@ class Adam(Optimizer):
|
||||||
class Nadam(Optimizer):
|
class Nadam(Optimizer):
|
||||||
# paper: https://arxiv.org/abs/1412.6980
|
# paper: https://arxiv.org/abs/1412.6980
|
||||||
# paper: http://cs229.stanford.edu/proj2015/054_report.pdf
|
# paper: http://cs229.stanford.edu/proj2015/054_report.pdf
|
||||||
# TODO; double-check this implementation. also actually read the damn paper.
|
# TODO: double-check this implementation. also read the damn paper.
|
||||||
# lifted from https://github.com/fchollet/keras/blob/5d38b04/keras/optimizers.py#L530
|
# lifted from https://github.com/fchollet/keras/blob/5d38b04/keras/optimizers.py#L530
|
||||||
# lifted from https://github.com/jpilaul/IFT6266_project/blob/master/Models/Algo_Momentum.py
|
# lifted from https://github.com/jpilaul/IFT6266_project/blob/master/Models/Algo_Momentum.py
|
||||||
|
|
||||||
|
@ -282,10 +285,10 @@ class Layer:
|
||||||
|
|
||||||
# methods we might want to override:
|
# methods we might want to override:
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
raise NotImplementedError("unimplemented", self)
|
raise NotImplementedError("unimplemented", self)
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
raise NotImplementedError("unimplemented", self)
|
raise NotImplementedError("unimplemented", self)
|
||||||
|
|
||||||
def do_feed(self, child):
|
def do_feed(self, child):
|
||||||
|
@ -301,21 +304,21 @@ class Layer:
|
||||||
self.output_shape = shape
|
self.output_shape = shape
|
||||||
return shape
|
return shape
|
||||||
|
|
||||||
# TODO: rename this multi and B crap to something actually relevant.
|
# TODO: better names for these (still)
|
||||||
|
|
||||||
def multi(self, B):
|
def _propogate(self, edges):
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
assert len(B) == 1, self
|
assert len(edges) == 1, self
|
||||||
return self.F(B[0])
|
return self.forward(edges[0])
|
||||||
|
|
||||||
def dmulti(self, dB):
|
def _backpropogate(self, edges):
|
||||||
if len(dB) == 1:
|
if len(edges) == 1:
|
||||||
return self.dF(dB[0])
|
return self.backward(edges[0])
|
||||||
return sum((self.dF(dY) for dY in dB))
|
return sum((self.backward(dY) for dY in edges))
|
||||||
|
|
||||||
# general utility methods:
|
# general utility methods:
|
||||||
|
|
||||||
def compatible(self, parent):
|
def is_compatible(self, parent):
|
||||||
if self.input_shape is None:
|
if self.input_shape is None:
|
||||||
# inherit shape from output
|
# inherit shape from output
|
||||||
shape = self.make_shape(parent.output_shape)
|
shape = self.make_shape(parent.output_shape)
|
||||||
|
@ -325,9 +328,9 @@ class Layer:
|
||||||
return np.all(self.input_shape == parent.output_shape)
|
return np.all(self.input_shape == parent.output_shape)
|
||||||
|
|
||||||
def feed(self, child):
|
def feed(self, child):
|
||||||
if not child.compatible(self):
|
if not child.is_compatible(self):
|
||||||
fmt = "{} is incompatible with {}: shape mismatch: {} vs. {}"
|
fmt = "{} is incompatible with {}: shape mismatch: {} vs. {}"
|
||||||
raise Exception(fmt.format(self, child, self.output_shape, child.input_shape))
|
raise LayerIncompatibility(fmt.format(self, child, self.output_shape, child.input_shape))
|
||||||
self.do_feed(child)
|
self.do_feed(child)
|
||||||
child.be_fed(self)
|
child.be_fed(self)
|
||||||
return child
|
return child
|
||||||
|
@ -344,32 +347,32 @@ class Layer:
|
||||||
self.W = W
|
self.W = W
|
||||||
self.dW = dW
|
self.dW = dW
|
||||||
|
|
||||||
def forward(self, lut):
|
def propagate(self, values):
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
assert self.parents, self
|
assert self.parents, self
|
||||||
B = []
|
edges = []
|
||||||
for parent in self.parents:
|
for parent in self.parents:
|
||||||
# TODO: skip over irrelevant nodes (if any)
|
# TODO: skip over irrelevant nodes (if any)
|
||||||
X = lut[parent]
|
X = values[parent]
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_input(X)
|
self.validate_input(X)
|
||||||
B.append(X)
|
edges.append(X)
|
||||||
Y = self.multi(B)
|
Y = self._propogate(edges)
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_output(Y)
|
self.validate_output(Y)
|
||||||
return Y
|
return Y
|
||||||
|
|
||||||
def backward(self, lut):
|
def backpropagate(self, values):
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
assert self.children, self
|
assert self.children, self
|
||||||
dB = []
|
edges = []
|
||||||
for child in self.children:
|
for child in self.children:
|
||||||
# TODO: skip over irrelevant nodes (if any)
|
# TODO: skip over irrelevant nodes (if any)
|
||||||
dY = lut[child]
|
dY = values[child]
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_output(dY)
|
self.validate_output(dY)
|
||||||
dB.append(dY)
|
edges.append(dY)
|
||||||
dX = self.dmulti(dB)
|
dX = self._backpropogate(edges)
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_input(dX)
|
self.validate_input(dX)
|
||||||
return dX
|
return dX
|
||||||
|
@ -384,10 +387,10 @@ class Input(Layer):
|
||||||
self.input_shape = self.shape
|
self.input_shape = self.shape
|
||||||
self.output_shape = self.shape
|
self.output_shape = self.shape
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
return X
|
return X
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
#self.dY = dY
|
#self.dY = dY
|
||||||
return np.zeros_like(dY)
|
return np.zeros_like(dY)
|
||||||
|
|
||||||
|
@ -397,11 +400,11 @@ class Reshape(Layer):
|
||||||
self.shape = tuple(new_shape)
|
self.shape = tuple(new_shape)
|
||||||
self.output_shape = self.shape
|
self.output_shape = self.shape
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.batch_size = X.shape[0]
|
self.batch_size = X.shape[0]
|
||||||
return X.reshape(self.batch_size, *self.output_shape)
|
return X.reshape(self.batch_size, *self.output_shape)
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
assert dY.shape[0] == self.batch_size
|
assert dY.shape[0] == self.batch_size
|
||||||
return dY.reshape(self.batch_size, *self.input_shape)
|
return dY.reshape(self.batch_size, *self.input_shape)
|
||||||
|
|
||||||
|
@ -411,11 +414,11 @@ class Flatten(Layer):
|
||||||
self.output_shape = (np.prod(shape),)
|
self.output_shape = (np.prod(shape),)
|
||||||
return shape
|
return shape
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.batch_size = X.shape[0]
|
self.batch_size = X.shape[0]
|
||||||
return X.reshape(self.batch_size, *self.output_shape)
|
return X.reshape(self.batch_size, *self.output_shape)
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
assert dY.shape[0] == self.batch_size
|
assert dY.shape[0] == self.batch_size
|
||||||
return dY.reshape(self.batch_size, *self.input_shape)
|
return dY.reshape(self.batch_size, *self.input_shape)
|
||||||
|
|
||||||
|
@ -425,42 +428,42 @@ class Affine(Layer):
|
||||||
self.a = _f(a)
|
self.a = _f(a)
|
||||||
self.b = _f(b)
|
self.b = _f(b)
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
return self.a * X + self.b
|
return self.a * X + self.b
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
return dY * self.a
|
return dY * self.a
|
||||||
|
|
||||||
class Sum(Layer):
|
class Sum(Layer):
|
||||||
def multi(self, B):
|
def _propogate(self, edges):
|
||||||
return np.sum(B, axis=0)
|
return np.sum(edges, axis=0)
|
||||||
|
|
||||||
def dmulti(self, dB):
|
def _backpropogate(self, edges):
|
||||||
#assert len(dB) == 1, "unimplemented"
|
#assert len(edges) == 1, "unimplemented"
|
||||||
return dB[0] # TODO: does this always work?
|
return edges[0] # TODO: does this always work?
|
||||||
|
|
||||||
class Sigmoid(Layer): # aka Logistic
|
class Sigmoid(Layer): # aka Logistic
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.sig = sigmoid(X)
|
self.sig = sigmoid(X)
|
||||||
return self.sig
|
return self.sig
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
return dY * self.sig * (1 - self.sig)
|
return dY * self.sig * (1 - self.sig)
|
||||||
|
|
||||||
class Tanh(Layer):
|
class Tanh(Layer):
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.sig = np.tanh(X)
|
self.sig = np.tanh(X)
|
||||||
return self.sig
|
return self.sig
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
return dY * (1 - self.sig * self.sig)
|
return dY * (1 - self.sig * self.sig)
|
||||||
|
|
||||||
class Relu(Layer):
|
class Relu(Layer):
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.cond = X >= 0
|
self.cond = X >= 0
|
||||||
return np.where(self.cond, X, 0)
|
return np.where(self.cond, X, 0)
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
return np.where(self.cond, dY, 0)
|
return np.where(self.cond, dY, 0)
|
||||||
|
|
||||||
class Elu(Layer):
|
class Elu(Layer):
|
||||||
|
@ -470,24 +473,24 @@ class Elu(Layer):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.alpha = _f(alpha)
|
self.alpha = _f(alpha)
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.cond = X >= 0
|
self.cond = X >= 0
|
||||||
self.neg = np.exp(X) - 1
|
self.neg = np.exp(X) - 1
|
||||||
return np.where(self.cond, X, self.neg)
|
return np.where(self.cond, X, self.neg)
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
return dY * np.where(self.cond, 1, self.neg + 1)
|
return dY * np.where(self.cond, 1, self.neg + 1)
|
||||||
|
|
||||||
class GeluApprox(Layer):
|
class GeluApprox(Layer):
|
||||||
# paper: https://arxiv.org/abs/1606.08415
|
# paper: https://arxiv.org/abs/1606.08415
|
||||||
# plot: https://www.desmos.com/calculator/ydzgtccsld
|
# plot: https://www.desmos.com/calculator/ydzgtccsld
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.a = 1.704 * X
|
self.a = 1.704 * X
|
||||||
self.sig = sigmoid(self.a)
|
self.sig = sigmoid(self.a)
|
||||||
return X * self.sig
|
return X * self.sig
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
return dY * self.sig * (1 + self.a * (1 - self.sig))
|
return dY * self.sig * (1 + self.a * (1 - self.sig))
|
||||||
|
|
||||||
class Softmax(Layer):
|
class Softmax(Layer):
|
||||||
|
@ -497,14 +500,14 @@ class Softmax(Layer):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.axis = int(axis)
|
self.axis = int(axis)
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
alpha = np.max(X, axis=-1, keepdims=True)
|
alpha = np.max(X, axis=-1, keepdims=True)
|
||||||
num = np.exp(X - alpha)
|
num = np.exp(X - alpha)
|
||||||
den = np.sum(num, axis=-1, keepdims=True)
|
den = np.sum(num, axis=-1, keepdims=True)
|
||||||
self.sm = num / den
|
self.sm = num / den
|
||||||
return self.sm
|
return self.sm
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
dYsm = dY * self.sm
|
dYsm = dY * self.sm
|
||||||
dX = dYsm - np.sum(dYsm, axis=-1, keepdims=True) * self.sm
|
dX = dYsm - np.sum(dYsm, axis=-1, keepdims=True) * self.sm
|
||||||
return dX
|
return dX
|
||||||
|
@ -543,19 +546,11 @@ class Dense(Layer):
|
||||||
|
|
||||||
self.std = np.std(self.W)
|
self.std = np.std(self.W)
|
||||||
|
|
||||||
def F(self, X):
|
def forward(self, X):
|
||||||
self.X = X
|
self.X = X
|
||||||
return X.dot(self.coeffs) + self.biases
|
return X.dot(self.coeffs) + self.biases
|
||||||
|
|
||||||
def dF(self, dY):
|
def backward(self, dY):
|
||||||
#Y = np.einsum('ix,xj->ij', X, C)
|
|
||||||
#dX = np.einsum('ix,jx->ij', dY, C)
|
|
||||||
#dC = np.einsum('xi,xj->ij', X, dY)
|
|
||||||
# or rather
|
|
||||||
#Y = np.einsum('ix,xj->ij', X, C)
|
|
||||||
#dX = np.einsum('ij,xj->ix', dY, C)
|
|
||||||
#dC = np.einsum('ix,ij->xj', X, dY)
|
|
||||||
# that makes sense, just move the pairs around
|
|
||||||
self.dcoeffs[:] = self.X.T.dot(dY)
|
self.dcoeffs[:] = self.X.T.dot(dY)
|
||||||
self.dbiases[:] = dY.sum(0, keepdims=True)
|
self.dbiases[:] = dY.sum(0, keepdims=True)
|
||||||
return dY.dot(self.coeffs.T)
|
return dY.dot(self.coeffs.T)
|
||||||
|
@ -602,20 +597,20 @@ class Model:
|
||||||
return nodes
|
return nodes
|
||||||
|
|
||||||
def forward(self, X):
|
def forward(self, X):
|
||||||
lut = dict()
|
values = dict()
|
||||||
input_node = self.ordered_nodes[0]
|
input_node = self.ordered_nodes[0]
|
||||||
output_node = self.ordered_nodes[-1]
|
output_node = self.ordered_nodes[-1]
|
||||||
lut[input_node] = input_node.multi(np.expand_dims(X, 0))
|
values[input_node] = input_node._propogate(np.expand_dims(X, 0))
|
||||||
for node in self.ordered_nodes[1:]:
|
for node in self.ordered_nodes[1:]:
|
||||||
lut[node] = node.forward(lut)
|
values[node] = node.propagate(values)
|
||||||
return lut[output_node]
|
return values[output_node]
|
||||||
|
|
||||||
def backward(self, error):
|
def backward(self, error):
|
||||||
lut = dict()
|
values = dict()
|
||||||
output_node = self.ordered_nodes[-1]
|
output_node = self.ordered_nodes[-1]
|
||||||
lut[output_node] = output_node.dmulti(np.expand_dims(error, 0))
|
values[output_node] = output_node._backpropogate(np.expand_dims(error, 0))
|
||||||
for node in reversed(self.ordered_nodes[:-1]):
|
for node in reversed(self.ordered_nodes[:-1]):
|
||||||
lut[node] = node.backward(lut)
|
values[node] = node.backpropagate(values)
|
||||||
return self.dW
|
return self.dW
|
||||||
|
|
||||||
def load_weights(self, fn):
|
def load_weights(self, fn):
|
||||||
|
@ -670,10 +665,10 @@ class Ritual: # i'm just making up names at this point
|
||||||
self.bn = 0
|
self.bn = 0
|
||||||
|
|
||||||
def measure(self, p, y):
|
def measure(self, p, y):
|
||||||
return self.mloss.F(p, y)
|
return self.mloss.forward(p, y)
|
||||||
|
|
||||||
def derive(self, p, y):
|
def derive(self, p, y):
|
||||||
return self.loss.dF(p, y)
|
return self.loss.backward(p, y)
|
||||||
|
|
||||||
def learn(self, inputs, outputs):
|
def learn(self, inputs, outputs):
|
||||||
predicted = self.model.forward(inputs)
|
predicted = self.model.forward(inputs)
|
||||||
|
|
Loading…
Reference in a new issue