This commit is contained in:
Connor Olding 2016-05-25 11:37:34 -07:00
parent e61a32c615
commit b028ee53d9
2 changed files with 16 additions and 34 deletions

View File

@ -60,7 +60,7 @@ class ATTTT():
class PatternBrain(Brain):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
super().__init__(*args, padding='~', **kwargs)
self.tokens = []
@ -147,11 +147,6 @@ class PatternBrain(Brain):
token_value = "".join(self.resolve_tokens(most_common[0]))
new_id = self.new_token(token_value)
if len("".join(self.tokens.values())) > len(all_items):
# this might not ever occur
lament('preventing token dictionary from growing larger than source')
break
# replace the most common two-token sequence
# with one token to represent both
found = np.all(sequences == most_common[0], axis=1)
@ -175,8 +170,6 @@ class PatternBrain(Brain):
lament("new token id {:5} occurs {:8} times: \"{}\"".format(
new_id, len(here[0]), self.tokens[new_id]))
# TODO: find unused tokens?
# reconstruct all_items out of the sequences
all_items = sequences.reshape(-1)[::2][1:].copy()
return all_items
@ -198,7 +191,6 @@ class PatternBrain(Brain):
all_items = self.merge_all(all_items, merges, min_count)
# begin the actual learning
self.padding = '~'
self.reset()
np_item = []
for i in all_items:

View File

@ -9,25 +9,24 @@ def normalize(counter):
s = float(sum(v))
m = float(max(v))
del v
d = {}
for c, cnt in counter.items():
d[c] = (cnt/s, cnt/m)
return d
# return [(c, cnt/s, cnt/m) for c, cnt in counter.items()]
return [(c, cnt/s, cnt/m) for c, cnt in counter.items()]
def normalize_sorted(counter):
# mostly just for debugging i guess?
# if the elements were unsorted,
# we couldn't use our lazy method (subtraction) of selecting tokens
# and temperature would correspond to arbitrary tokens
# instead of more/less common tokens.
return sorted(normalize(counter), key=lambda t: t[1], reverse=True)
# http://nbviewer.jupyter.org/gist/yoavg/d76121dfde2618422139
class Brain:
def __init__(self, order=1, temperature=0.5):
def __init__(self, padding, order=1, temperature=0.5):
self.order = order
self.temperature = temperature
self.padding = None
self.padding = padding
self.reset()
@ -51,21 +50,13 @@ class Brain:
@temperature.setter
def temperature(self, value):
assert(0 < value < 1)
self._temperature = value
if value == 1:
# TODO: proper distribution stuff
self.random = lambda count: np.random.random(count)**2
elif value == 0:
self.random = np.random.random
else:
# +0.25 = -0.0
# +0.50 = +0.5
# +0.75 = +1.0
point75 = 1
const = (point75 * 2 - 1) / math.atanh(0.75 * 2 - 1)
unbound = (math.atanh((1 - value) * 2 - 1) * const + 1) / 2
self.random = easytruncnorm(0, 1, unbound, 0.25).rvs
a = 1 - value * 2
# http://www.mathopenref.com/graphfunctions.html?fx=(a*x-x)/(2*a*x-a-1)&sg=f&sh=f&xh=1&xl=0&yh=1&yl=0&ah=1&al=-1&a=0.5
tweak = lambda x: (a * x - x) / (2 * a * x - a - 1)
self.random = lambda n: 1 - tweak(np.random.random(n))
def learn_all(self, items):
@ -102,7 +93,7 @@ class Brain:
def update(self):
if self.dirty and self._machine:
self.machine = {hist:normalize(items)
self.machine = {hist: normalize_sorted(items)
for hist, items in self._machine.items()}
self.dirty = False
@ -116,9 +107,8 @@ class Brain:
return None
x = self.random(1)
for c, v in dist.items():
# if x <= v: # this is a bad idea
x = x - v[0]
for c, cs, cm in dist:
x = x - cs
if x <= 0:
return c