.
This commit is contained in:
parent
e2b179b2e6
commit
53a7d92288
2 changed files with 52 additions and 36 deletions
30
optim_nn.py
Normal file → Executable file
30
optim_nn.py
Normal file → Executable file
|
@ -51,6 +51,36 @@ class SomethingElse(ResidualLoss):
|
||||||
def df(self, r):
|
def df(self, r):
|
||||||
return np.sign(r) * np.abs(r)**self.c
|
return np.sign(r) * np.abs(r)**self.c
|
||||||
|
|
||||||
|
class Confidence(Loss):
|
||||||
|
# this isn't "confidence" in any meaningful way; (e.g. Bayesian)
|
||||||
|
# it's just a metric of how large the value is of the predicted class.
|
||||||
|
# when using it for loss, it acts like a crappy regularizer.
|
||||||
|
# it really just measures how much of a hot-shot the network thinks it is.
|
||||||
|
|
||||||
|
def forward(self, p, y=None):
|
||||||
|
categories = p.shape[-1]
|
||||||
|
confidence = (np.max(p, axis=-1) - 1/categories) / (1 - 1/categories)
|
||||||
|
# the exponent in softmax puts a maximum on confidence,
|
||||||
|
# but we don't compensate for that. if necessary,
|
||||||
|
# it'd be better to use an activation that doesn't have this limit.
|
||||||
|
return np.mean(confidence)
|
||||||
|
|
||||||
|
def backward(self, p, y=None):
|
||||||
|
# in order to agree with the forward pass,
|
||||||
|
# using this backwards pass as-is will minimize confidence.
|
||||||
|
categories = p.shape[-1]
|
||||||
|
detc = p / categories / (1 - 1/categories)
|
||||||
|
dmax = p == np.max(p, axis=-1, keepdims=True)
|
||||||
|
return detc * dmax
|
||||||
|
|
||||||
|
class NLL(Loss): # Negative Log Likelihood
|
||||||
|
def forward(self, p, y):
|
||||||
|
correct = p * y
|
||||||
|
return np.mean(-correct)
|
||||||
|
|
||||||
|
def backward(self, p, y):
|
||||||
|
return -y / len(p)
|
||||||
|
|
||||||
# Nonparametric Layers {{{1
|
# Nonparametric Layers {{{1
|
||||||
|
|
||||||
# Parametric Layers {{{1
|
# Parametric Layers {{{1
|
||||||
|
|
|
@ -77,28 +77,6 @@ class Accuracy(Loss):
|
||||||
def backward(self, p, y):
|
def backward(self, p, y):
|
||||||
raise NotImplementedError("cannot take the gradient of Accuracy")
|
raise NotImplementedError("cannot take the gradient of Accuracy")
|
||||||
|
|
||||||
class Confidence(Loss):
|
|
||||||
# this isn't "confidence" in any meaningful way; (e.g. Bayesian)
|
|
||||||
# it's just a metric of how large the value is of the predicted class.
|
|
||||||
# when using it for loss, it acts like a crappy regularizer.
|
|
||||||
# it really just measures how much of a hot-shot the network thinks it is.
|
|
||||||
|
|
||||||
def forward(self, p, y=None):
|
|
||||||
categories = p.shape[-1]
|
|
||||||
confidence = (np.max(p, axis=-1) - 1/categories) / (1 - 1/categories)
|
|
||||||
# the exponent in softmax puts a maximum on confidence,
|
|
||||||
# but we don't compensate for that. if necessary,
|
|
||||||
# it'd be better to use an activation that doesn't have this limit.
|
|
||||||
return np.mean(confidence)
|
|
||||||
|
|
||||||
def backward(self, p, y=None):
|
|
||||||
# in order to agree with the forward pass,
|
|
||||||
# using this backwards pass as-is will minimize confidence.
|
|
||||||
categories = p.shape[-1]
|
|
||||||
detc = p / categories / (1 - 1/categories)
|
|
||||||
dmax = p == np.max(p, axis=-1, keepdims=True)
|
|
||||||
return detc * dmax
|
|
||||||
|
|
||||||
class ResidualLoss(Loss):
|
class ResidualLoss(Loss):
|
||||||
def forward(self, p, y):
|
def forward(self, p, y):
|
||||||
return np.mean(self.f(p - y))
|
return np.mean(self.f(p - y))
|
||||||
|
@ -327,12 +305,12 @@ class Layer:
|
||||||
|
|
||||||
# TODO: better names for these (still)
|
# TODO: better names for these (still)
|
||||||
|
|
||||||
def _propogate(self, edges):
|
def _propagate(self, edges):
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
assert len(edges) == 1, self
|
assert len(edges) == 1, self
|
||||||
return self.forward(edges[0])
|
return self.forward(edges[0])
|
||||||
|
|
||||||
def _backpropogate(self, edges):
|
def _backpropagate(self, edges):
|
||||||
if len(edges) == 1:
|
if len(edges) == 1:
|
||||||
return self.backward(edges[0])
|
return self.backward(edges[0])
|
||||||
return sum((self.backward(dY) for dY in edges))
|
return sum((self.backward(dY) for dY in edges))
|
||||||
|
@ -378,7 +356,7 @@ class Layer:
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_input(X)
|
self.validate_input(X)
|
||||||
edges.append(X)
|
edges.append(X)
|
||||||
Y = self._propogate(edges)
|
Y = self._propagate(edges)
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_output(Y)
|
self.validate_output(Y)
|
||||||
return Y
|
return Y
|
||||||
|
@ -393,7 +371,7 @@ class Layer:
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_output(dY)
|
self.validate_output(dY)
|
||||||
edges.append(dY)
|
edges.append(dY)
|
||||||
dX = self._backpropogate(edges)
|
dX = self._backpropagate(edges)
|
||||||
if not self.unsafe:
|
if not self.unsafe:
|
||||||
self.validate_input(dX)
|
self.validate_input(dX)
|
||||||
return dX
|
return dX
|
||||||
|
@ -443,7 +421,7 @@ class Flatten(Layer):
|
||||||
assert dY.shape[0] == self.batch_size
|
assert dY.shape[0] == self.batch_size
|
||||||
return dY.reshape(self.batch_size, *self.input_shape)
|
return dY.reshape(self.batch_size, *self.input_shape)
|
||||||
|
|
||||||
class Affine(Layer):
|
class ConstAffine(Layer):
|
||||||
def __init__(self, a=1, b=0):
|
def __init__(self, a=1, b=0):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.a = _f(a)
|
self.a = _f(a)
|
||||||
|
@ -456,10 +434,10 @@ class Affine(Layer):
|
||||||
return dY * self.a
|
return dY * self.a
|
||||||
|
|
||||||
class Sum(Layer):
|
class Sum(Layer):
|
||||||
def _propogate(self, edges):
|
def _propagate(self, edges):
|
||||||
return np.sum(edges, axis=0)
|
return np.sum(edges, axis=0)
|
||||||
|
|
||||||
def _backpropogate(self, edges):
|
def _backpropagate(self, edges):
|
||||||
#assert len(edges) == 1, "unimplemented"
|
#assert len(edges) == 1, "unimplemented"
|
||||||
return edges[0] # TODO: does this always work?
|
return edges[0] # TODO: does this always work?
|
||||||
|
|
||||||
|
@ -515,8 +493,6 @@ class GeluApprox(Layer):
|
||||||
return dY * self.sig * (1 + self.a * (1 - self.sig))
|
return dY * self.sig * (1 + self.a * (1 - self.sig))
|
||||||
|
|
||||||
class Softmax(Layer):
|
class Softmax(Layer):
|
||||||
# lifted from theano
|
|
||||||
|
|
||||||
def __init__(self, axis=-1):
|
def __init__(self, axis=-1):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.axis = int(axis)
|
self.axis = int(axis)
|
||||||
|
@ -529,9 +505,19 @@ class Softmax(Layer):
|
||||||
return self.sm
|
return self.sm
|
||||||
|
|
||||||
def backward(self, dY):
|
def backward(self, dY):
|
||||||
dYsm = dY * self.sm
|
return (dY - np.sum(dY * self.sm, axis=-1, keepdims=True)) * self.sm
|
||||||
dX = dYsm - np.sum(dYsm, axis=-1, keepdims=True) * self.sm
|
|
||||||
return dX
|
class LogSoftmax(Softmax):
|
||||||
|
def __init__(self, axis=-1, eps=1e-6):
|
||||||
|
super().__init__()
|
||||||
|
self.axis = int(axis)
|
||||||
|
self.eps = _f(eps)
|
||||||
|
|
||||||
|
def forward(self, X):
|
||||||
|
return np.log(super().forward(X) + self.eps)
|
||||||
|
|
||||||
|
def backward(self, dY):
|
||||||
|
return dY - np.sum(dY, axis=-1, keepdims=True) * self.sm
|
||||||
|
|
||||||
# Parametric Layers {{{1
|
# Parametric Layers {{{1
|
||||||
|
|
||||||
|
@ -626,7 +612,7 @@ class Model:
|
||||||
values = dict()
|
values = dict()
|
||||||
input_node = self.ordered_nodes[0]
|
input_node = self.ordered_nodes[0]
|
||||||
output_node = self.ordered_nodes[-1]
|
output_node = self.ordered_nodes[-1]
|
||||||
values[input_node] = input_node._propogate(np.expand_dims(X, 0))
|
values[input_node] = input_node._propagate(np.expand_dims(X, 0))
|
||||||
for node in self.ordered_nodes[1:]:
|
for node in self.ordered_nodes[1:]:
|
||||||
values[node] = node.propagate(values)
|
values[node] = node.propagate(values)
|
||||||
return values[output_node]
|
return values[output_node]
|
||||||
|
@ -634,7 +620,7 @@ class Model:
|
||||||
def backward(self, error):
|
def backward(self, error):
|
||||||
values = dict()
|
values = dict()
|
||||||
output_node = self.ordered_nodes[-1]
|
output_node = self.ordered_nodes[-1]
|
||||||
values[output_node] = output_node._backpropogate(np.expand_dims(error, 0))
|
values[output_node] = output_node._backpropagate(np.expand_dims(error, 0))
|
||||||
for node in reversed(self.ordered_nodes[:-1]):
|
for node in reversed(self.ordered_nodes[:-1]):
|
||||||
values[node] = node.backpropagate(values)
|
values[node] = node.backpropagate(values)
|
||||||
return self.dW
|
return self.dW
|
||||||
|
|
Loading…
Add table
Reference in a new issue