optim/onn/parametric.py

import numpy as np

from .float import _f
from .layer_base import *
from .initialization import *


class Bias(Layer):
    # TODO: support axes other than -1 and shapes other than 1D.

    serialized = {
        'b': 'biases',
    }

    def __init__(self, init=init_zeros, reg_b=None):
        super().__init__()
        self.biases = self._new_weights('biases', init=init, regularizer=reg_b)

    def make_shape(self, parent):
        shape = parent.output_shape
        self.input_shape = shape
        self.output_shape = shape
        self.biases.shape = (shape[-1],)

    def forward(self, X):
        return X + self.biases.f

    def backward(self, dY):
        self.biases.g += dY.sum(0)
        return dY


class Dense(Layer):
    serialized = {
        'W': 'coeffs',
        'b': 'biases',
    }

    def __init__(self, dim, init=init_he_uniform, reg_w=None, reg_b=None):
        super().__init__()
        self.dim = int(dim)
        self.output_shape = (dim,)
        self.coeffs = self._new_weights('coeffs', init=init,
                                        regularizer=reg_w)
        self.biases = self._new_weights('biases', init=init_zeros,
                                        regularizer=reg_b)

    def make_shape(self, parent):
        shape = parent.output_shape
        self.input_shape = shape
        assert len(shape) == 1, shape
        self.coeffs.shape = (shape[0], self.dim)
        self.biases.shape = (1, self.dim)

    def forward(self, X):
        self.X = X
        return X @ self.coeffs.f + self.biases.f

    def backward(self, dY):
        self.coeffs.g += self.X.T @ dY
        self.biases.g += dY.sum(0, keepdims=True)
        return dY @ self.coeffs.f.T


# more

class Conv1Dper(Layer):
    # periodic (circular) convolution.
    # currently only supports one channel I/O.
    # some notes:
    # we could use FFTs for larger convolutions.
    # i think storing the coefficients backwards would
    # eliminate reversal in the critical code.

    serialize = {
        'W': 'coeffs',
    }

    def __init__(self, kernel_size, pos=None,
                 init=init_glorot_uniform, reg_w=None):
        super().__init__()
        self.kernel_size = int(kernel_size)
        self.coeffs = self._new_weights('coeffs', init=init, regularizer=reg_w)
        if pos is None:
            self.wrap0 = (self.kernel_size - 0) // 2
            self.wrap1 = (self.kernel_size - 1) // 2
        elif pos == 'alt':
            self.wrap0 = (self.kernel_size - 1) // 2
            self.wrap1 = (self.kernel_size - 0) // 2
        elif pos == 'left':
            self.wrap0 = 0
            self.wrap1 = self.kernel_size - 1
        elif pos == 'right':
            self.wrap0 = self.kernel_size - 1
            self.wrap1 = 0
        else:
            raise Exception("pos parameter not understood: {}".format(pos))

    def make_shape(self, parent):
        shape = parent.output_shape
        self.input_shape = shape
        assert len(shape) == 1, shape
        self.output_shape = shape
        self.coeffs.shape = (1, self.kernel_size)

    def forward(self, X):
        if self.wrap0 == 0:
            Xper = np.hstack((X, X[:, :self.wrap1]))
        elif self.wrap1 == 0:
            Xper = np.hstack((X[:, -self.wrap0:], X))
        else:
            Xper = np.hstack((X[:, -self.wrap0:], X, X[:, :self.wrap1]))
        self.cols = rolling_batch(Xper, self.kernel_size)
        convolved = (self.cols * self.coeffs.f[:, ::-1]).sum(2)
        return convolved

    def backward(self, dY):
        self.coeffs.g += (dY[:, :, None] * self.cols).sum(0)[:, ::-1].sum(
            0, keepdims=True)
        return (dY[:, :, None] * self.coeffs.f[:, ::-1]).sum(2)


class LayerNorm(Layer):
    # paper: https://arxiv.org/abs/1607.06450
    # note: nonparametric when affine == False

    def __init__(self, eps=1e-5, affine=True):
        super().__init__()
        self.eps = _f(eps)
        self.affine = bool(affine)

        if self.affine:
            self.gamma = self._new_weights('gamma', init=init_ones)
            self.beta = self._new_weights('beta', init=init_zeros)
            self.serialized = {
                'gamma': 'gamma',
                'beta': 'beta',
            }

    def make_shape(self, parent):
        shape = parent.output_shape
        self.input_shape = shape
        self.output_shape = shape
        assert len(shape) == 1, shape
        if self.affine:
            self.gamma.shape = (shape[0],)
            self.beta.shape = (shape[0],)

    def forward(self, X):
        self.mean = X.mean(0)
        self.center = X - self.mean
        self.var = self.center.var(0) + self.eps
        self.std = np.sqrt(self.var)

        self.Xnorm = self.center / self.std
        if self.affine:
            return self.gamma.f * self.Xnorm + self.beta.f
        return self.Xnorm

    def backward(self, dY):
        length = dY.shape[0]

        if self.affine:
            dXnorm = dY * self.gamma.f
            self.gamma.g += (dY * self.Xnorm).sum(0)
            self.beta.g += dY.sum(0)
        else:
            dXnorm = dY

        dstd = (dXnorm * self.center).sum(0) / -self.var
        dcenter = dXnorm / self.std + dstd / self.std * self.center / length
        dmean = -dcenter.sum(0)
        dX = dcenter + dmean / length

        return dX


class Denses(Layer):  # TODO: rename?
    # acts as a separate Dense for each row or column. only for 2D arrays.

    serialized = {
        'W': 'coeffs',
        'b': 'biases',
    }

    def __init__(self, dim, init=init_he_uniform,
                 reg_w=None, reg_b=None, axis=-1):
        super().__init__()
        self.dim = int(dim)
        self.weight_init = init
        self.axis = int(axis)
        self.coeffs = self._new_weights('coeffs', init=init,
                                        regularizer=reg_w)
        self.biases = self._new_weights('biases', init=init_zeros,
                                        regularizer=reg_b)

    def make_shape(self, parent):
        shape = parent.output_shape
        self.input_shape = shape
        assert len(shape) == 2, shape

        assert -len(shape) <= self.axis < len(shape)
        self.axis = self.axis % len(shape)

        self.output_shape = list(shape)
        self.output_shape[self.axis] = self.dim
        self.output_shape = tuple(self.output_shape)

        in_rows = self.input_shape[0]
        in_cols = self.input_shape[1]
        out_rows = self.output_shape[0]
        out_cols = self.output_shape[1]

        self.coeffs.shape = (in_rows, in_cols, self.dim)
        self.biases.shape = (1, out_rows, out_cols)

    def forward(self, X):
        self.X = X
        if self.axis == 0:
            return np.einsum('ixj,xjk->ikj', X, self.coeffs.f) + self.biases.f
        elif self.axis == 1:
            return np.einsum('ijx,jxk->ijk', X, self.coeffs.f) + self.biases.f

    def backward(self, dY):
        self.biases.g += dY.sum(0, keepdims=True)
        if self.axis == 0:
            self.coeffs.g += np.einsum('ixj,ikj->xjk', self.X, dY)
            return np.einsum('ikj,xjk->ixj', dY, self.coeffs.f)
        elif self.axis == 1:
            self.coeffs.g += np.einsum('ijx,ijk->jxk', self.X, dY)
            return np.einsum('ijk,jxk->ijx', dY, self.coeffs.f)


class CosineDense(Dense):
    # paper: https://arxiv.org/abs/1702.05870
    # another implementation:
    # https://github.com/farizrahman4u/keras-contrib/pull/36
    # the paper doesn't mention bias,
    # so we treat bias as an additional weight with a constant input of 1.
    # this is correct in Dense layers, so i hope it's correct here too.

    eps = 1e-4

    def forward(self, X):
        self.X = X
        self.X_norm = np.sqrt(np.square(X).sum(-1, keepdims=True)
                              + 1 + self.eps)
        self.W_norm = np.sqrt(np.square(self.coeffs.f).sum(0, keepdims=True)
                              + np.square(self.biases.f) + self.eps)
        self.dot = X @ self.coeffs.f + self.biases.f
        Y = self.dot / (self.X_norm * self.W_norm)
        return Y

    def backward(self, dY):
        ddot = dY / self.X_norm / self.W_norm
        dX_norm = -(dY * self.dot / self.W_norm).sum(-1, keepdims=True) \
            / self.X_norm**2
        dW_norm = -(dY * self.dot / self.X_norm).sum(0, keepdims=True) \
            / self.W_norm**2

        self.coeffs.g += self.X.T @ ddot \
            + dW_norm / self.W_norm * self.coeffs.f
        self.biases.g += ddot.sum(0, keepdims=True) \
            + dW_norm / self.W_norm * self.biases.f
        dX = ddot @ self.coeffs.f.T + dX_norm / self.X_norm * self.X

        return dX
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`import numpy as np`

fix __name__ being incorrect due to extra __all__ this fixes tracebacks and checks for __main__, among other things. 2018-03-17 06:09:15 -07:00			`from .float import _f`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`from .layer_base import *`
			`from .initialization import *`

basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`class Bias(Layer):`
			`# TODO: support axes other than -1 and shapes other than 1D.`

			`serialized = {`
			`'b': 'biases',`
			`}`

			`def __init__(self, init=init_zeros, reg_b=None):`
			`super().__init__()`
			`self.biases = self._new_weights('biases', init=init, regularizer=reg_b)`

			`def make_shape(self, parent):`
			`shape = parent.output_shape`
			`self.input_shape = shape`
			`self.output_shape = shape`
			`self.biases.shape = (shape[-1],)`

			`def forward(self, X):`
			`return X + self.biases.f`

			`def backward(self, dY):`
			`self.biases.g += dY.sum(0)`
			`return dY`

basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`class Dense(Layer):`
			`serialized = {`
			`'W': 'coeffs',`
			`'b': 'biases',`
			`}`

			`def __init__(self, dim, init=init_he_uniform, reg_w=None, reg_b=None):`
			`super().__init__()`
			`self.dim = int(dim)`
			`self.output_shape = (dim,)`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`self.coeffs = self._new_weights('coeffs', init=init,`
			`regularizer=reg_w)`
			`self.biases = self._new_weights('biases', init=init_zeros,`
			`regularizer=reg_b)`
merge and split modules into a package 2018-01-21 14:04:25 -08:00
			`def make_shape(self, parent):`
			`shape = parent.output_shape`
			`self.input_shape = shape`
			`assert len(shape) == 1, shape`
			`self.coeffs.shape = (shape[0], self.dim)`
			`self.biases.shape = (1, self.dim)`

			`def forward(self, X):`
			`self.X = X`
			`return X @ self.coeffs.f + self.biases.f`

			`def backward(self, dY):`
			`self.coeffs.g += self.X.T @ dY`
			`self.biases.g += dY.sum(0, keepdims=True)`
			`return dY @ self.coeffs.f.T`


			`# more`

			`class Conv1Dper(Layer):`
			`# periodic (circular) convolution.`
			`# currently only supports one channel I/O.`
			`# some notes:`
			`# we could use FFTs for larger convolutions.`
			`# i think storing the coefficients backwards would`
			`# eliminate reversal in the critical code.`

			`serialize = {`
			`'W': 'coeffs',`
			`}`

			`def __init__(self, kernel_size, pos=None,`
			`init=init_glorot_uniform, reg_w=None):`
			`super().__init__()`
			`self.kernel_size = int(kernel_size)`
			`self.coeffs = self._new_weights('coeffs', init=init, regularizer=reg_w)`
			`if pos is None:`
			`self.wrap0 = (self.kernel_size - 0) // 2`
			`self.wrap1 = (self.kernel_size - 1) // 2`
			`elif pos == 'alt':`
			`self.wrap0 = (self.kernel_size - 1) // 2`
			`self.wrap1 = (self.kernel_size - 0) // 2`
			`elif pos == 'left':`
			`self.wrap0 = 0`
			`self.wrap1 = self.kernel_size - 1`
			`elif pos == 'right':`
			`self.wrap0 = self.kernel_size - 1`
			`self.wrap1 = 0`
			`else:`
			`raise Exception("pos parameter not understood: {}".format(pos))`

			`def make_shape(self, parent):`
			`shape = parent.output_shape`
			`self.input_shape = shape`
			`assert len(shape) == 1, shape`
			`self.output_shape = shape`
			`self.coeffs.shape = (1, self.kernel_size)`

			`def forward(self, X):`
			`if self.wrap0 == 0:`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`Xper = np.hstack((X, X[:, :self.wrap1]))`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`elif self.wrap1 == 0:`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`Xper = np.hstack((X[:, -self.wrap0:], X))`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`else:`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`Xper = np.hstack((X[:, -self.wrap0:], X, X[:, :self.wrap1]))`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`self.cols = rolling_batch(Xper, self.kernel_size)`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`convolved = (self.cols * self.coeffs.f[:, ::-1]).sum(2)`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`return convolved`

			`def backward(self, dY):`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`self.coeffs.g += (dY[:, :, None] * self.cols).sum(0)[:, ::-1].sum(`
			`0, keepdims=True)`
			`return (dY[:, :, None] * self.coeffs.f[:, ::-1]).sum(2)`

merge and split modules into a package 2018-01-21 14:04:25 -08:00
			`class LayerNorm(Layer):`
			`# paper: https://arxiv.org/abs/1607.06450`
			`# note: nonparametric when affine == False`

			`def __init__(self, eps=1e-5, affine=True):`
			`super().__init__()`
			`self.eps = _f(eps)`
			`self.affine = bool(affine)`

			`if self.affine:`
			`self.gamma = self._new_weights('gamma', init=init_ones)`
			`self.beta = self._new_weights('beta', init=init_zeros)`
			`self.serialized = {`
			`'gamma': 'gamma',`
			`'beta': 'beta',`
			`}`

			`def make_shape(self, parent):`
			`shape = parent.output_shape`
			`self.input_shape = shape`
			`self.output_shape = shape`
			`assert len(shape) == 1, shape`
			`if self.affine:`
			`self.gamma.shape = (shape[0],)`
			`self.beta.shape = (shape[0],)`

			`def forward(self, X):`
			`self.mean = X.mean(0)`
			`self.center = X - self.mean`
			`self.var = self.center.var(0) + self.eps`
			`self.std = np.sqrt(self.var)`

			`self.Xnorm = self.center / self.std`
			`if self.affine:`
			`return self.gamma.f * self.Xnorm + self.beta.f`
			`return self.Xnorm`

			`def backward(self, dY):`
			`length = dY.shape[0]`

			`if self.affine:`
			`dXnorm = dY * self.gamma.f`
			`self.gamma.g += (dY * self.Xnorm).sum(0)`
			`self.beta.g += dY.sum(0)`
			`else:`
			`dXnorm = dY`

			`dstd = (dXnorm * self.center).sum(0) / -self.var`
			`dcenter = dXnorm / self.std + dstd / self.std * self.center / length`
			`dmean = -dcenter.sum(0)`
			`dX = dcenter + dmean / length`

			`return dX`

basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00
			`class Denses(Layer): # TODO: rename?`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`# acts as a separate Dense for each row or column. only for 2D arrays.`

			`serialized = {`
			`'W': 'coeffs',`
			`'b': 'biases',`
			`}`

basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`def __init__(self, dim, init=init_he_uniform,`
			`reg_w=None, reg_b=None, axis=-1):`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`super().__init__()`
			`self.dim = int(dim)`
			`self.weight_init = init`
			`self.axis = int(axis)`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`self.coeffs = self._new_weights('coeffs', init=init,`
			`regularizer=reg_w)`
			`self.biases = self._new_weights('biases', init=init_zeros,`
			`regularizer=reg_b)`
merge and split modules into a package 2018-01-21 14:04:25 -08:00
			`def make_shape(self, parent):`
			`shape = parent.output_shape`
			`self.input_shape = shape`
			`assert len(shape) == 2, shape`

			`assert -len(shape) <= self.axis < len(shape)`
			`self.axis = self.axis % len(shape)`

			`self.output_shape = list(shape)`
			`self.output_shape[self.axis] = self.dim`
			`self.output_shape = tuple(self.output_shape)`

			`in_rows = self.input_shape[0]`
			`in_cols = self.input_shape[1]`
			`out_rows = self.output_shape[0]`
			`out_cols = self.output_shape[1]`

			`self.coeffs.shape = (in_rows, in_cols, self.dim)`
			`self.biases.shape = (1, out_rows, out_cols)`

			`def forward(self, X):`
			`self.X = X`
			`if self.axis == 0:`
			`return np.einsum('ixj,xjk->ikj', X, self.coeffs.f) + self.biases.f`
			`elif self.axis == 1:`
			`return np.einsum('ijx,jxk->ijk', X, self.coeffs.f) + self.biases.f`

			`def backward(self, dY):`
			`self.biases.g += dY.sum(0, keepdims=True)`
			`if self.axis == 0:`
			`self.coeffs.g += np.einsum('ixj,ikj->xjk', self.X, dY)`
			`return np.einsum('ikj,xjk->ixj', dY, self.coeffs.f)`
			`elif self.axis == 1:`
			`self.coeffs.g += np.einsum('ijx,ijk->jxk', self.X, dY)`
			`return np.einsum('ijk,jxk->ijx', dY, self.coeffs.f)`

basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`class CosineDense(Dense):`
			`# paper: https://arxiv.org/abs/1702.05870`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`# another implementation:`
			`# https://github.com/farizrahman4u/keras-contrib/pull/36`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`# the paper doesn't mention bias,`
			`# so we treat bias as an additional weight with a constant input of 1.`
			`# this is correct in Dense layers, so i hope it's correct here too.`

			`eps = 1e-4`

			`def forward(self, X):`
			`self.X = X`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`self.X_norm = np.sqrt(np.square(X).sum(-1, keepdims=True)`
			`+ 1 + self.eps)`
			`self.W_norm = np.sqrt(np.square(self.coeffs.f).sum(0, keepdims=True)`
			`+ np.square(self.biases.f) + self.eps)`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`self.dot = X @ self.coeffs.f + self.biases.f`
			`Y = self.dot / (self.X_norm * self.W_norm)`
			`return Y`

			`def backward(self, dY):`
			`ddot = dY / self.X_norm / self.W_norm`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`dX_norm = -(dY * self.dot / self.W_norm).sum(-1, keepdims=True) \`
			`/ self.X_norm**2`
			`dW_norm = -(dY * self.dot / self.X_norm).sum(0, keepdims=True) \`
			`/ self.W_norm**2`
merge and split modules into a package 2018-01-21 14:04:25 -08:00
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`self.coeffs.g += self.X.T @ ddot \`
			`+ dW_norm / self.W_norm * self.coeffs.f`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`self.biases.g += ddot.sum(0, keepdims=True) \`
basic PEP 8 compliance rip readability 2018-01-22 11:40:36 -08:00			`+ dW_norm / self.W_norm * self.biases.f`
merge and split modules into a package 2018-01-21 14:04:25 -08:00			`dX = ddot @ self.coeffs.f.T + dX_norm / self.X_norm * self.X`

			`return dX`