1
0
Fork 0
mirror of https://github.com/notwa/mm synced 2024-11-05 01:19:02 -08:00

rewrite functions in pyrex for performance

on my machine;
* n64_fast.crc is 29 times faster
* Yaz0_fast.decode is 226 times faster
This commit is contained in:
Connor Olding 2015-03-04 07:17:50 -08:00
parent 1b3458361c
commit 43ce5f0d7e
5 changed files with 176 additions and 17 deletions

View file

@ -1,6 +1,4 @@
# decoder ripped from: http://www.amnoid.de/gc/yaz0.txt
# encoder ripped from:
# https://bitbucket.org/ottehr/z64-fm/src/9fdc704ca42ff15c8e01b1566d4692d986920c6a/yaz0.c
def decode(comp):
src = 16 # skip header
@ -51,6 +49,3 @@ def decode(comp):
valid -= 1
return uncomp
def encode(uncomp):
raise Exception('Yaz0_encode: unimplemented')

58
Yaz0_fast.pyx Normal file
View file

@ -0,0 +1,58 @@
# decoder ripped from: http://www.amnoid.de/gc/yaz0.txt
ctypedef unsigned long ulong
ctypedef unsigned char uchar
cdef ulong get_size(uchar *comp):
return comp[4]*0x1000000 + comp[5]*0x10000 + comp[6]*0x100 + comp[7]
cdef void _decode(uchar *comp, uchar *uncomp):
cdef:
ulong src = 16 # skip header
ulong dst = 0
uchar valid = 0 # bit count
uchar curr = 0 # code byte
ulong size = get_size(comp)
uchar byte1, byte2
ulong dist, copy, i, n
while dst < size:
if not valid:
curr = comp[src]
src += 1
valid = 8
if curr & 0x80:
uncomp[dst] = comp[src]
dst += 1
src += 1
else:
byte1 = comp[src]
byte2 = comp[src + 1]
src += 2
dist = ((byte1 & 0xF) << 8) | byte2
copy = dst - (dist + 1)
n = byte1 >> 4
if n:
n += 2
else:
n = comp[src] + 0x12
src += 1
for i in range(n):
uncomp[dst] = uncomp[copy]
copy += 1
dst += 1
curr <<= 1
valid -= 1
def decode(comp):
size = get_size(comp)
uncomp = bytearray(size)
_decode(comp, uncomp)
return uncomp

4
n64.py
View file

@ -47,9 +47,7 @@ def crc(f, bootcode=6105):
t3 ^= d
b = d & 0x1F
r = (d << b) | (d >> (32 - b))
r &= MAX32
r = ROL(d, d & 0x1F)
t5 += r
t5 &= MAX32

87
n64_fast.pyx Normal file
View file

@ -0,0 +1,87 @@
# Based on uCON64's N64 checksum algorithm by Andreas Sterbenz
ctypedef unsigned long ulong
ctypedef unsigned char uchar
# ulong must be 32 bits since we expect them to overflow as such
# TODO: test on different machines
assert(sizeof(ulong) == 4)
from zlib import crc32
crc_seeds = {
6101: 0xF8CA4DDC,
6102: 0xF8CA4DDC,
6103: 0xA3886759,
6105: 0xDF26F436,
6106: 0x1FEA617A,
}
bootcode_crcs = {
0x6170A4A1: 6101,
0x90BB6CB5: 6102,
0x0B050EE0: 6103,
0x98BC2C86: 6105,
0xACC8580A: 6106,
}
cdef ulong ROL(ulong i, ulong b):
return (i << b) | (i >> (32 - b))
cdef ulong R4(uchar *b):
return b[0]*0x1000000 + b[1]*0x10000 + b[2]*0x100 + b[3]
cdef object _crc(uchar *data, ulong bootcode, uchar *lookup):
cdef:
ulong seed = crc_seeds[bootcode]
ulong t1, t2, t3, t4, t5, t6
ulong i, d, b, r, o
ulong crc1, crc2
t1 = t2 = t3 = t4 = t5 = t6 = seed
for i in range(0x1000, 0x101000, 4):
d = R4(data + i)
if t6 + d < t6:
t4 += 1
t6 += d
t3 ^= d
r = ROL(d, d & 0x1F)
t5 += r
if t2 > d:
t2 ^= r
else:
t2 ^= t6 ^ d
if bootcode == 6105:
o = i & 0xFF
t1 += R4(lookup + o)^ d
else:
t1 += t5
if bootcode == 6103:
crc1 = (t6 ^ t4) + t3
crc2 = (t5 ^ t2) + t1
elif bootcode == 6106:
crc1 = t6*t4 + t3
crc2 = t5*t2 + t1
else:
crc1 = t6 ^ t4 ^ t3
crc2 = t5 ^ t2 ^ t1
return crc1, crc2
def crc(f, bootcode=6105):
f.seek(0)
data = f.read()
lookup = data[0x750:0x850]
return _crc(data, bootcode, lookup)
def bootcode_version(f):
f.seek(0x40)
return bootcode_crcs[crc32(f.read(0x1000 - 0x40)) & 0xFFFFFFFF]

View file

@ -1,18 +1,32 @@
#!/bin/python
# shoutouts to spinout182
import sys
import os, os.path
from io import BytesIO
from hashlib import sha1
from util import *
from heuristics import *
import n64
# check for cython
try:
import pyximport
except ImportError:
fast = False
else:
pyximport.install()
fast = True
if fast:
import Yaz0_fast as Yaz0
import n64_fast as n64
else:
import Yaz0
import n64
from util import *
from heuristics import detect_format
lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs)
# shoutouts to spinout182
# assume first entry is makerom (0x1060), and second entry begins from makerom
dma_sig = b"\x00\x00\x00\x00\x00\x00\x10\x60\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x60"
@ -22,7 +36,7 @@ def dump_wrap(data, fn, size):
fn += '.' + kind
dump_as(data, fn, size)
def z_dump_file(f, i=0, name=None):
def z_dump_file(f, i=0, name=None, uncompress=True):
vs = R4(f.read(4)) # virtual start
ve = R4(f.read(4)) # virtual end
ps = R4(f.read(4)) # physical start
@ -54,11 +68,18 @@ def z_dump_file(f, i=0, name=None):
f.seek(ps)
compressed = f.read(pe - ps)
if compressed[:4] == b'Yaz0':
if uncompress:
data = Yaz0.decode(compressed)
dump_wrap(data, fn, size)
else:
dump_wrap(compressed, fn+'.Yaz0', len(compressed))
else:
if uncompress:
lament('unknown compression; skipping:', fn)
lament(compressed[:4])
else:
lament('unknown compression:', fn)
dump_wrap(compressed, fn, len(compressed))
f.seek(here)
return True