optim/onn/loss.py

130 lines
3.6 KiB
Python

import numpy as np
from .float import *
class Loss:
def forward(self, p, y):
raise NotImplementedError("unimplemented", self)
def backward(self, p, y):
raise NotImplementedError("unimplemented", self)
class NLL(Loss): # Negative Log Likelihood
def forward(self, p, y):
correct = p * y
return np.mean(-correct)
def backward(self, p, y):
return -y / len(p)
class CategoricalCrossentropy(Loss):
# lifted from theano
def __init__(self, eps=1e-6):
self.eps = _f(eps)
def forward(self, p, y):
p = np.clip(p, self.eps, 1 - self.eps)
f = np.sum(-y * np.log(p) - (1 - y) * np.log(1 - p), axis=-1)
return np.mean(f)
def backward(self, p, y):
p = np.clip(p, self.eps, 1 - self.eps)
df = (p - y) / (p * (1 - p))
return df / len(y)
class Accuracy(Loss):
# returns percentage of categories correctly predicted.
# utilizes argmax(), so it cannot be used for gradient descent.
# use CategoricalCrossentropy or NLL for that instead.
def forward(self, p, y):
correct = np.argmax(p, axis=-1) == np.argmax(y, axis=-1)
return np.mean(correct)
def backward(self, p, y):
raise NotImplementedError("cannot take the gradient of Accuracy")
class ResidualLoss(Loss):
def forward(self, p, y):
return np.mean(self.f(p - y))
def backward(self, p, y):
ret = self.df(p - y) / len(y)
return ret
class SquaredHalved(ResidualLoss):
def f(self, r):
return np.square(r) / 2
def df(self, r):
return r
class Squared(ResidualLoss):
def f(self, r):
return np.square(r)
def df(self, r):
return 2 * r
class Absolute(ResidualLoss):
def f(self, r):
return np.abs(r)
def df(self, r):
return np.sign(r)
class Huber(ResidualLoss):
def __init__(self, delta=1.0):
self.delta = _f(delta)
def f(self, r):
return np.where(r <= self.delta,
np.square(r) / 2,
self.delta * (np.abs(r) - self.delta / 2))
def df(self, r):
return np.where(r <= self.delta,
r,
self.delta * np.sign(r))
# more
class SomethingElse(ResidualLoss):
# generalizes Absolute and SquaredHalved.
# plot: https://www.desmos.com/calculator/fagjg9vuz7
def __init__(self, a=4/3):
assert 1 <= a <= 2, "parameter out of range"
self.a = _f(a / 2)
self.b = _f(2 / a)
self.c = _f(2 / a - 1)
def f(self, r):
return self.a * np.abs(r)**self.b
def df(self, r):
return np.sign(r) * np.abs(r)**self.c
class Confidence(Loss):
# this isn't "confidence" in any meaningful way; (e.g. Bayesian)
# it's just a metric of how large the value is of the predicted class.
# when using it for loss, it acts like a crappy regularizer.
# it really just measures how much of a hot-shot the network thinks it is.
def forward(self, p, y=None):
categories = p.shape[-1]
confidence = (np.max(p, axis=-1) - 1/categories) / (1 - 1/categories)
# the exponent in softmax puts a maximum on confidence,
# but we don't compensate for that. if necessary,
# it'd be better to use an activation that doesn't have this limit.
return np.mean(confidence)
def backward(self, p, y=None):
# in order to agree with the forward pass,
# using this backwards pass as-is will minimize confidence.
categories = p.shape[-1]
detc = p / categories / (1 - 1/categories)
dmax = p == np.max(p, axis=-1, keepdims=True)
return detc * dmax