optim/onn/utility.py

133 lines
4.2 KiB
Python
Raw Normal View History

2018-01-21 14:04:25 -08:00
import sys
2018-02-10 02:28:43 -08:00
import numpy as np
2018-01-21 14:04:25 -08:00
2018-01-22 11:40:36 -08:00
2018-01-21 14:04:25 -08:00
def lament(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
2018-01-22 11:40:36 -08:00
2018-01-21 14:04:25 -08:00
def lower_priority():
"""Set the priority of the process to below-normal."""
# via https://stackoverflow.com/a/1023269
if sys.platform == 'win32':
try:
2018-01-22 11:40:36 -08:00
import win32api
import win32process
import win32con
2018-01-21 14:04:25 -08:00
pid = win32api.GetCurrentProcessId()
2018-01-22 11:40:36 -08:00
handle = win32api.OpenProcess(
win32con.PROCESS_ALL_ACCESS, True, pid)
win32process.SetPriorityClass(
handle, win32process.BELOW_NORMAL_PRIORITY_CLASS)
2018-01-21 14:04:25 -08:00
except ImportError:
lament("you do not have pywin32 installed.")
lament("the process priority could not be lowered.")
lament("consider: python -m pip install pypiwin32")
lament("consider: conda install pywin32")
else:
import os
os.nice(1)
2018-01-22 11:40:36 -08:00
2018-02-01 23:52:32 -08:00
def onehot(y):
unique = np.unique(y)
Y = np.zeros((y.shape[0], len(unique)), dtype=np.int8)
offsets = np.arange(len(y)) * len(unique)
Y.flat[offsets + y.flat] = 1
return Y
2019-02-04 21:09:35 -08:00
def batchize(inputs, outputs, batch_size, shuffle=True):
batch_count = np.ceil(len(inputs) / batch_size).astype(int)
if shuffle:
def gen():
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for b in range(batch_count):
bi = b * batch_size
batch_indices = indices[bi:bi + batch_size]
batch_inputs = inputs[batch_indices]
batch_outputs = outputs[batch_indices]
yield batch_inputs, batch_outputs
else:
def gen():
for b in range(batch_count):
bi = b * batch_size
batch_inputs = inputs[bi:bi + batch_size]
batch_outputs = outputs[bi:bi + batch_size]
yield batch_inputs, batch_outputs
return gen(), batch_count
2018-01-21 14:04:25 -08:00
# more
_log_was_update = False
2018-01-22 11:40:36 -08:00
2018-01-21 14:04:25 -08:00
def log(left, right, update=False):
s = "\x1B[1m {:>20}:\x1B[0m {}".format(left, right)
global _log_was_update
if update and _log_was_update:
lament('\x1B[F' + s)
else:
lament(s)
_log_was_update = update
2018-01-22 11:40:36 -08:00
2018-01-21 14:04:25 -08:00
class Dummy:
pass
class Folding:
# NOTE: this class assumes classes are *exactly* evenly distributed.
def __init__(self, inputs, outputs, folds):
# outputs should be one-hot.
self.folds = int(folds)
# this temporarily converts one-hot encoding back to integer indices.
classes = np.argmax(outputs, axis=-1)
# we need to do stratified k-folds,
# so let's put them in an order that's easy to split
# without breaking class distribution.
# don't worry, they'll get shuffled again in train_batched.
classes = np.argmax(outputs, axis=-1)
class_n = np.max(classes) + 1
sorted_inputs = np.array([inputs[classes == n]
for n in range(class_n)], inputs.dtype)
2018-03-23 22:26:02 -07:00
sorted_outputs = np.arange(class_n) \
.repeat(sorted_inputs.shape[1]).reshape(sorted_inputs.shape[:2])
# now to interleave the classes instead of having them grouped:
2018-03-23 22:26:02 -07:00
inputs = np.swapaxes(sorted_inputs, 0, 1) \
.reshape(-1, *sorted_inputs.shape[2:])
outputs = np.swapaxes(sorted_outputs, 0, 1) \
.reshape(-1, *sorted_outputs.shape[2:])
# one final thing: we need to make our outputs one-hot again.
self.inputs = inputs
self.outputs = onehot(outputs)
# now we can do stratified folds simply by contiguous slices!
self.foldstep = len(self.inputs) // self.folds
assert len(self.inputs) % self.foldstep == 0, \
"bad number of folds; cannot be stratified"
def fold(self, i):
roll = i * self.foldstep
split = (self.folds - 1) * self.foldstep
train_inputs = np.roll(self.inputs, roll, axis=0)[:split]
valid_inputs = np.roll(self.inputs, roll, axis=0)[split:]
train_outputs = np.roll(self.outputs, roll, axis=0)[:split]
valid_outputs = np.roll(self.outputs, roll, axis=0)[split:]
return train_inputs, train_outputs, valid_inputs, valid_outputs