import sys import numpy as np def lament(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) def lower_priority(): """Set the priority of the process to below-normal.""" # via https://stackoverflow.com/a/1023269 if sys.platform == 'win32': try: import win32api import win32process import win32con pid = win32api.GetCurrentProcessId() handle = win32api.OpenProcess( win32con.PROCESS_ALL_ACCESS, True, pid) win32process.SetPriorityClass( handle, win32process.BELOW_NORMAL_PRIORITY_CLASS) except ImportError: lament("you do not have pywin32 installed.") lament("the process priority could not be lowered.") lament("consider: python -m pip install pypiwin32") lament("consider: conda install pywin32") else: import os os.nice(1) def onehot(y): unique = np.unique(y) Y = np.zeros((y.shape[0], len(unique)), dtype=np.int8) offsets = np.arange(len(y)) * len(unique) Y.flat[offsets + y.flat] = 1 return Y # more _log_was_update = False def log(left, right, update=False): s = "\x1B[1m {:>20}:\x1B[0m {}".format(left, right) global _log_was_update if update and _log_was_update: lament('\x1B[F' + s) else: lament(s) _log_was_update = update class Dummy: pass class Folding: # NOTE: this class assumes classes are *exactly* evenly distributed. def __init__(self, inputs, outputs, folds): # outputs should be one-hot. self.folds = int(folds) # this temporarily converts one-hot encoding back to integer indices. classes = np.argmax(outputs, axis=-1) # we need to do stratified k-folds, # so let's put them in an order that's easy to split # without breaking class distribution. # don't worry, they'll get shuffled again in train_batched. classes = np.argmax(outputs, axis=-1) class_n = np.max(classes) + 1 sorted_inputs = np.array([inputs[classes == n] for n in range(class_n)], inputs.dtype) sorted_outputs = np.arange(class_n ).repeat(sorted_inputs.shape[1]).reshape(sorted_inputs.shape[:2]) # now to interleave the classes instead of having them grouped: inputs = np.swapaxes(sorted_inputs, 0, 1 ).reshape(-1, *sorted_inputs.shape[2:]) outputs = np.swapaxes(sorted_outputs, 0, 1 ).reshape(-1, *sorted_outputs.shape[2:]) # one final thing: we need to make our outputs one-hot again. self.inputs = inputs self.outputs = onehot(outputs) # now we can do stratified folds simply by contiguous slices! self.foldstep = len(self.inputs) // self.folds assert len(self.inputs) % self.foldstep == 0, \ "bad number of folds; cannot be stratified" def fold(self, i): roll = i * self.foldstep split = (self.folds - 1) * self.foldstep train_inputs = np.roll(self.inputs, roll, axis=0)[:split] valid_inputs = np.roll(self.inputs, roll, axis=0)[split:] train_outputs = np.roll(self.outputs, roll, axis=0)[:split] valid_outputs = np.roll(self.outputs, roll, axis=0)[split:] return train_inputs, train_outputs, valid_inputs, valid_outputs