implement a basic compressor
This commit is contained in:
parent
f4bad735d2
commit
2553855a4d
1 changed files with 228 additions and 22 deletions
250
fs.py
250
fs.py
|
@ -9,11 +9,146 @@ import sys
|
|||
|
||||
from util import *
|
||||
|
||||
DEBUG = False
|
||||
|
||||
lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs)
|
||||
heresay = os.path.split(sys.argv[0])[0]
|
||||
|
||||
def create_rom(path):
|
||||
raise Exception("TODO")
|
||||
# TODO: don't hardcode this? look into how the game handles it.
|
||||
blocks = (
|
||||
(0x120000, 0x20000),
|
||||
(0x140000, 0x20000),
|
||||
(0x160000, 0x20000),
|
||||
(0x180000, 0x20000),
|
||||
(0x1A0000, 0x20000),
|
||||
(0x1C0000, 0x20000),
|
||||
(0x1E0000, 0x20000),
|
||||
(0x200000, 0x40000),
|
||||
(0x240000, 0x20000),
|
||||
(0x260000, 0x20000),
|
||||
(0x280000, 0x20000),
|
||||
(0x2A0000, 0x20000),
|
||||
(0x2C0000, 0x20000),
|
||||
(0x2E0000, 0x20000),
|
||||
(0x300000, 0x500000),
|
||||
)
|
||||
|
||||
def hexdump(data):
|
||||
# for debugging.
|
||||
for i in range((len(data) + 15) // 16):
|
||||
butts = data[i * 16:i * 16 + 16]
|
||||
print(("{:06X} |" + " {:02X}" * len(butts)).format(i * 16, *butts))
|
||||
|
||||
def compress(data, mode='greedy'):
|
||||
# TODO: 'best' and 'worst' modes.
|
||||
assert mode == 'greedy', f"unknown mode: {mode}"
|
||||
|
||||
comp = bytearray()
|
||||
comp.extend(W4(len(data)))
|
||||
if len(data) == 0:
|
||||
return comp
|
||||
|
||||
buf_len = 1024
|
||||
buf = bytearray(buf_len)
|
||||
buf_i = 0x3BE
|
||||
min_len = 0 + 3
|
||||
max_len = 0x3F + 3
|
||||
|
||||
def find_match(sub):
|
||||
if len(sub) < min_len:
|
||||
return None, None
|
||||
match_i, match_len = None, None
|
||||
|
||||
for i in range(buf_len):
|
||||
match_i, match_len = (buf_i - i) % buf_len, 0
|
||||
while buf[(match_i + match_len) % buf_len] == sub[match_len]:
|
||||
if (match_i + match_len) % buf_len == buf_i:
|
||||
# TODO: handle pseudo-writes to buffer.
|
||||
break
|
||||
match_len += 1
|
||||
if match_len == max_len:
|
||||
break
|
||||
if match_len == len(sub):
|
||||
break
|
||||
if match_len < min_len:
|
||||
match_i, match_len = None, None
|
||||
else:
|
||||
break
|
||||
|
||||
if match_len is not None:
|
||||
assert min_len <= match_len <= max_len
|
||||
return match_i, match_len
|
||||
|
||||
shift = 0
|
||||
shifted = 0
|
||||
last_shift_i = None
|
||||
|
||||
def push_shift():
|
||||
nonlocal comp, shift, shifted
|
||||
assert last_shift_i is not None
|
||||
comp[last_shift_i] = shift
|
||||
shift = 0
|
||||
shifted = 0
|
||||
|
||||
def shift_in(x):
|
||||
nonlocal shift, shifted
|
||||
assert 0 <= x <= 1
|
||||
assert shifted < 8
|
||||
shift >>= 1
|
||||
shift |= x << 7
|
||||
#assert 0 <= shift <= 0xFF
|
||||
shifted += 1
|
||||
|
||||
i = 0
|
||||
while i < len(data):
|
||||
sub = data[i:i + max_len]
|
||||
match_i, match_len = find_match(sub)
|
||||
|
||||
if DEBUG:
|
||||
if len(sub) < min_len:
|
||||
print("pos {:06X}: too short to match".format(i))
|
||||
else:
|
||||
match_str = "no match"
|
||||
if match_i is not None:
|
||||
match_str = "{:03X}:{}".format(match_i, match_len)
|
||||
fmt = "pos {:06X}: matching {:02X}{:02X}{:02X}: {}"
|
||||
print(fmt.format(i, sub[0], sub[1], sub[2], match_str))
|
||||
|
||||
if last_shift_i is None:
|
||||
last_shift_i = len(comp)
|
||||
comp.append(0)
|
||||
shift = 0
|
||||
shifted = 0
|
||||
|
||||
if match_i is None or match_len is None:
|
||||
shift_in(1)
|
||||
comp.append(sub[0])
|
||||
buf[buf_i] = sub[0]
|
||||
i += 1
|
||||
buf_i = (buf_i + 1) % buf_len
|
||||
else:
|
||||
shift_in(0)
|
||||
a = match_i & 0xFF
|
||||
b = ((match_i & 0x300) >> 2) | (match_len - 3)
|
||||
comp.append(a)
|
||||
comp.append(b)
|
||||
for j in range(match_len):
|
||||
buf[buf_i] = sub[j]
|
||||
buf_i = (buf_i + 1) % buf_len
|
||||
i += match_len
|
||||
|
||||
if shifted == 8:
|
||||
push_shift()
|
||||
last_shift_i = None
|
||||
|
||||
if last_shift_i is not None:
|
||||
comp[last_shift_i] = shift >> (8 - shifted)
|
||||
|
||||
if DEBUG:
|
||||
decompress(comp[4:], len(data))
|
||||
|
||||
assert i == len(data)
|
||||
return comp
|
||||
|
||||
def decompress(data, expected_size):
|
||||
decomp = bytearray()
|
||||
|
@ -56,35 +191,105 @@ def decompress(data, expected_size):
|
|||
write(read_buf(copy_offset + ci))
|
||||
i += 2
|
||||
|
||||
if DEBUG:
|
||||
hexdump(data)
|
||||
print('-' * (6 + 2 + 3 * 16))
|
||||
hexdump(decomp)
|
||||
|
||||
if len(decomp) > expected_size:
|
||||
raise Exception("decomp is larger than it said it would be.")
|
||||
|
||||
return decomp
|
||||
|
||||
def create_rom(d):
|
||||
root, _, files = next(os.walk(d))
|
||||
files.sort()
|
||||
|
||||
dirs = []
|
||||
for block_meta in blocks:
|
||||
dirs.append([])
|
||||
|
||||
rom_size = 8*1024*1024
|
||||
base_offset = 0x2008
|
||||
# TODO: don't hardcode?:
|
||||
skip_14 = [
|
||||
32, # sound or music bank?
|
||||
33, # sound or music bank?
|
||||
220, # ?
|
||||
221, # ?
|
||||
267, # ?
|
||||
]
|
||||
|
||||
with open(d+'.z64', 'w+b') as f:
|
||||
# initialize with zeros
|
||||
f.write(bytearray(rom_size))
|
||||
f.seek(0)
|
||||
|
||||
old_di = -1
|
||||
old_fi = -1
|
||||
for i, fn in enumerate(files):
|
||||
if fn == 'misc.bin':
|
||||
with open(os.path.join(d, fn), 'rb') as f2:
|
||||
data = f2.read()
|
||||
|
||||
f.seek(0)
|
||||
f.write(data)
|
||||
elif '-' in fn:
|
||||
extless = fn.split('.')[0]
|
||||
di, fi = extless.split('-')
|
||||
di, fi = int(di), int(fi)
|
||||
if di != old_di:
|
||||
old_fi = -1
|
||||
old_di = di
|
||||
if fi != old_fi + 1:
|
||||
raise Exception("file indices must be consecutive")
|
||||
with open(os.path.join(d, fn), 'rb') as f2:
|
||||
data = f2.read()
|
||||
dirs[di].append(data)
|
||||
old_fi = fi
|
||||
else:
|
||||
lament("skipping unknown file:", fn)
|
||||
|
||||
for di, files in enumerate(dirs):
|
||||
block_offset, block_size = blocks[di]
|
||||
f.seek(block_offset)
|
||||
f.write(W4(base_offset))
|
||||
f.write(W4(0x400))
|
||||
|
||||
offset = 0
|
||||
for fi, data in enumerate(files):
|
||||
f.write(W4(offset))
|
||||
if fi == 0 and di != 14 or di == 14 and fi in skip_14:
|
||||
new_data = data
|
||||
else:
|
||||
new_data = compress(data)
|
||||
fmt = "compressed {:02}-{:03}.bin from {} bytes into {} ({:.2%})"
|
||||
percent = len(new_data) / len(data) if len(data) > 0 else 1
|
||||
print(fmt.format(di, fi, len(data), len(new_data), percent))
|
||||
size = len(new_data)
|
||||
f.write(W4(size))
|
||||
offset += size
|
||||
files[fi] = new_data
|
||||
|
||||
#if fi != 0: break # DEBUG
|
||||
|
||||
while f.tell() & 0xFFFF < 0x2008:
|
||||
f.write(W4(0xFFFFFFFF))
|
||||
f.write(W4(0xFFFFFFFF))
|
||||
|
||||
assert f.tell() & 0xFFFF == 0x2008
|
||||
for data in files:
|
||||
f.write(data)
|
||||
|
||||
assert f.tell() - block_offset < block_size
|
||||
|
||||
#break # DEBUG
|
||||
|
||||
def dump_files(f):
|
||||
# TODO:
|
||||
misc = f.read(0x120000)
|
||||
dump_as(misc, "misc.bin")
|
||||
|
||||
# TODO: don't hardcode this? look into how the game handles it.
|
||||
blocks = (
|
||||
(0x120000, 0x20000),
|
||||
(0x140000, 0x20000),
|
||||
(0x160000, 0x20000),
|
||||
(0x180000, 0x20000),
|
||||
(0x1A0000, 0x20000),
|
||||
(0x1C0000, 0x20000),
|
||||
(0x1E0000, 0x20000),
|
||||
(0x200000, 0x40000),
|
||||
(0x240000, 0x20000),
|
||||
(0x260000, 0x20000),
|
||||
(0x280000, 0x20000),
|
||||
(0x2A0000, 0x20000),
|
||||
(0x2C0000, 0x20000),
|
||||
(0x2E0000, 0x20000),
|
||||
(0x300000, 0x4C0000),
|
||||
)
|
||||
|
||||
for dir_index, block_meta in enumerate(blocks):
|
||||
block_offset, block_size = block_meta
|
||||
f.seek(block_offset)
|
||||
|
@ -115,7 +320,8 @@ def dump_files(f):
|
|||
uncompressed_size = R4(b'\0' + f.read(3))
|
||||
data = decompress(f.read(size - 4), uncompressed_size)
|
||||
else:
|
||||
data = bytes(hint) + f.read(size - 1)
|
||||
print("hinted:", fn)
|
||||
data = bytes([hint]) + f.read(size - 1)
|
||||
dump_as(data, fn)
|
||||
|
||||
f.seek(header_resume)
|
||||
|
|
Loading…
Add table
Reference in a new issue