import numpy as np from .float import _f class Loss: def forward(self, p, y): raise NotImplementedError("unimplemented", self) def backward(self, p, y): raise NotImplementedError("unimplemented", self) class NLL(Loss): # Negative Log Likelihood # NOTE: this is a misnomer -- the "log" part is not implemented here. # instead, you should use a Log activation at the end of your network # e.g. LogSoftmax. # TODO: simplify the math that comes about it. def forward(self, p, y): correct = p * y return np.mean(-correct) def backward(self, p, y): return -y / len(p) class HingeWW(Loss): # multi-class hinge-loss, Weston & Watkins variant. def forward(self, p, y): # TODO: rename score since less is better. score = p * (1 - y) - p * y return np.mean(np.sum(np.maximum(1 + score, 0), axis=-1)) def backward(self, p, y): score = p * (1 - y) - p * y d_score = 1 - y - y return (score >= -1) * d_score / len(y) class HingeCS(Loss): # multi-class hinge-loss, Crammer & Singer variant. # this has been loosely extended to support multiple true classes. # however, it should generally be used such that # p is a vector that sums to 1 with values in [0, 1], # and y is a one-hot encoding of the correct class. def forward(self, p, y): wrong = np.max((1 - y) * p, axis=-1) right = np.max(y * p, axis=-1) f = np.maximum(1 + wrong - right, 0) return np.mean(f) def backward(self, p, y): wrong_in = (1 - y) * p right_in = y * p wrong = np.max(wrong_in, axis=-1, keepdims=True) right = np.max(right_in, axis=-1, keepdims=True) # note: this could go haywire if the maximum is not unique. delta = (1 - y) * (wrong_in == wrong) - y * (right_in == right) return (wrong - right >= -1) * delta / len(y) class CategoricalCrossentropy(Loss): # lifted from theano def __init__(self, eps=1e-6): self.eps = _f(eps) def forward(self, p, y): p = np.clip(p, self.eps, 1 - self.eps) f = np.sum(-y * np.log(p) - (1 - y) * np.log(1 - p), axis=-1) return np.mean(f) def backward(self, p, y): p = np.clip(p, self.eps, 1 - self.eps) df = (p - y) / (p * (1 - p)) return df / len(y) class Accuracy(Loss): # returns percentage of categories correctly predicted. # utilizes argmax(), so it cannot be used for gradient descent. # use CategoricalCrossentropy or NLL for that instead. def forward(self, p, y): correct = np.argmax(p, axis=-1) == np.argmax(y, axis=-1) return np.mean(correct) def backward(self, p, y): raise NotImplementedError("cannot take the gradient of Accuracy") class ResidualLoss(Loss): def forward(self, p, y): return np.mean(self.f(p - y)) def backward(self, p, y): ret = self.df(p - y) / len(y) return ret class SquaredHalved(ResidualLoss): def f(self, r): return np.square(r) / 2 def df(self, r): return r class Squared(ResidualLoss): def f(self, r): return np.square(r) def df(self, r): return 2 * r class Absolute(ResidualLoss): def f(self, r): return np.abs(r) def df(self, r): return np.sign(r) class Huber(ResidualLoss): def __init__(self, delta=1.0): self.delta = _f(delta) def f(self, r): return np.where(r <= self.delta, np.square(r) / 2, self.delta * (np.abs(r) - self.delta / 2)) def df(self, r): return np.where(r <= self.delta, r, self.delta * np.sign(r)) def LogCosh(ResidualLoss): # essentially a smooth version of Huber loss. def f(self, r): return np.log(np.cosh(x)) def df(self, r): return np.tanh(r) # more class SomethingElse(ResidualLoss): # generalizes Absolute and SquaredHalved. # plot: https://www.desmos.com/calculator/fagjg9vuz7 def __init__(self, a=4/3): assert 1 <= a <= 2, "parameter out of range" self.a = _f(a / 2) self.b = _f(2 / a) self.c = _f(2 / a - 1) def f(self, r): return self.a * np.abs(r)**self.b def df(self, r): return np.sign(r) * np.abs(r)**self.c