optim/onn/utility.py

import sys
import numpy as np


def lament(*args, **kwargs):
    print(*args, file=sys.stderr, **kwargs)


def lower_priority():
    """Set the priority of the process to below-normal."""
    # via https://stackoverflow.com/a/1023269
    if sys.platform == 'win32':
        try:
            import win32api
            import win32process
            import win32con
            pid = win32api.GetCurrentProcessId()
            handle = win32api.OpenProcess(
                win32con.PROCESS_ALL_ACCESS, True, pid)
            win32process.SetPriorityClass(
                handle, win32process.BELOW_NORMAL_PRIORITY_CLASS)
        except ImportError:
            lament("you do not have pywin32 installed.")
            lament("the process priority could not be lowered.")
            lament("consider: python -m pip install pypiwin32")
            lament("consider: conda install pywin32")
    else:
        import os
        os.nice(1)


def onehot(y):
    unique = np.unique(y)
    Y = np.zeros((y.shape[0], len(unique)), dtype=np.int8)
    offsets = np.arange(len(y)) * len(unique)
    Y.flat[offsets + y.flat] = 1
    return Y


# more

_log_was_update = False


def log(left, right, update=False):
    s = "\x1B[1m  {:>20}:\x1B[0m   {}".format(left, right)
    global _log_was_update
    if update and _log_was_update:
        lament('\x1B[F' + s)
    else:
        lament(s)
    _log_was_update = update


class Dummy:
    pass


class Folding:
    # NOTE: this class assumes classes are *exactly* evenly distributed.

    def __init__(self, inputs, outputs, folds):
        # outputs should be one-hot.

        self.folds = int(folds)

        # this temporarily converts one-hot encoding back to integer indices.
        classes = np.argmax(outputs, axis=-1)

        # we need to do stratified k-folds,
        # so let's put them in an order that's easy to split
        # without breaking class distribution.
        # don't worry, they'll get shuffled again in train_batched.
        classes = np.argmax(outputs, axis=-1)
        class_n = np.max(classes) + 1
        sorted_inputs = np.array([inputs[classes == n]
                                  for n in range(class_n)], inputs.dtype)
        sorted_outputs = np.arange(class_n) \
            .repeat(sorted_inputs.shape[1]).reshape(sorted_inputs.shape[:2])

        # now to interleave the classes instead of having them grouped:
        inputs = np.swapaxes(sorted_inputs, 0, 1) \
            .reshape(-1, *sorted_inputs.shape[2:])
        outputs = np.swapaxes(sorted_outputs, 0, 1) \
            .reshape(-1, *sorted_outputs.shape[2:])

        # one final thing: we need to make our outputs one-hot again.
        self.inputs = inputs
        self.outputs = onehot(outputs)

        # now we can do stratified folds simply by contiguous slices!
        self.foldstep = len(self.inputs) // self.folds
        assert len(self.inputs) % self.foldstep == 0, \
            "bad number of folds; cannot be stratified"

    def fold(self, i):
        roll = i * self.foldstep
        split = (self.folds - 1) * self.foldstep

        train_inputs = np.roll(self.inputs, roll, axis=0)[:split]
        valid_inputs = np.roll(self.inputs, roll, axis=0)[split:]

        train_outputs = np.roll(self.outputs, roll, axis=0)[:split]
        valid_outputs = np.roll(self.outputs, roll, axis=0)[split:]

        return train_inputs, train_outputs, valid_inputs, valid_outputs