thursday/thursday/utilities/scoring.py

78 lines
3.4 KiB
Python

def prune_results(results, multiple, _check=False):
# if there are more than `multiple` results for one optimizer+objective pair,
# then trim the bottom and top until there are only `multiple` left.
new_results = {}
for obj_name, obj_res in results.items():
new_res = {}
for fopt, opt_name, extra in sorted(obj_res):
l = new_res.setdefault(opt_name, [[], []])
l[0].append(fopt)
l[1].append(extra)
slices = {}
for opt_name, res in new_res.items():
# in the event that an odd number of results needs to be trimmed,
# prefer trimming from the bottom (i.e. worse solutions get removed first).
fopts, extras = res
down = (len(fopts) - multiple) // 2
up = len(fopts) - (len(fopts) - multiple + 1) // 2
slices[opt_name] = slice(down, up)
for opt_name, res in new_res.items():
fopts, extras = res
s = slices[opt_name]
fopts, extras = fopts[s], extras[s]
if _check:
assert len(fopts) == multiple, (len(fopts), multiple)
if len(fopts) == multiple:
for fopt, extra in zip(fopts, extras):
result = (fopt, opt_name, extra)
new_results.setdefault(obj_name, []).append(result)
return results
def perform_another_experimental_scoring_method(results):
if len(results) and len(something := next(iter(results.values()))[0]) == 3:
history_length = len(something[2])
each = {}
# for i in (history_length - 1,):
for i in range(history_length):
# for k, v in results.items(): for vi in v: assert len(vi) == 3, vi
l = {k: [(res[2][i], res[1]) for res in v] for k, v in results.items()}
for k, v in perform_another_experimental_scoring_method(l).items():
each.setdefault(k, []).append(v)
return {k: sum(v) / len(v) for k, v in each.items()}
new_results = {}
all_opt_names = set()
for obj_name, obj_res in results.items():
all_res = {}
for fopt, opt_name in obj_res:
all_res.setdefault(fopt, []).append(opt_name)
all_opt_names.add(opt_name)
new_results[obj_name] = dict(sorted(all_res.items()))
limited_by_floating_point_precision = 53
best_ranks_and_counts = {}
for outer_rank in range(1, limited_by_floating_point_precision + 1):
for obj_name, all_res in new_results.items():
for fopt, opt_names in all_res.items():
dirty = False
for opt_name in set(opt_names):
if opt_name in best_ranks_and_counts:
rank, count = best_ranks_and_counts[opt_name]
if rank == outer_rank:
best_ranks_and_counts[opt_name] = (rank, count + 1)
dirty = True
else:
best_ranks_and_counts[opt_name] = (outer_rank, 1)
dirty = True
if dirty:
break
scores = {k: 0.0 for k in all_opt_names}
for opt_name, (rank, count) in best_ranks_and_counts.items():
points = 2 ** (1 - rank)
count = min(count, limited_by_floating_point_precision)
scores[opt_name] = score = sum(points / 2**i for i in range(count))
return scores