1
0
Fork 0
mirror of https://github.com/notwa/mm synced 2025-01-04 18:08:04 -08:00

rewrite functions in pyrex for performance

on my machine;
* n64_fast.crc is 29 times faster
* Yaz0_fast.decode is 226 times faster
This commit is contained in:
Connor Olding 2015-03-04 07:17:50 -08:00
parent 1b3458361c
commit 43ce5f0d7e
5 changed files with 176 additions and 17 deletions

View file

@ -1,6 +1,4 @@
# decoder ripped from: http://www.amnoid.de/gc/yaz0.txt # decoder ripped from: http://www.amnoid.de/gc/yaz0.txt
# encoder ripped from:
# https://bitbucket.org/ottehr/z64-fm/src/9fdc704ca42ff15c8e01b1566d4692d986920c6a/yaz0.c
def decode(comp): def decode(comp):
src = 16 # skip header src = 16 # skip header
@ -51,6 +49,3 @@ def decode(comp):
valid -= 1 valid -= 1
return uncomp return uncomp
def encode(uncomp):
raise Exception('Yaz0_encode: unimplemented')

58
Yaz0_fast.pyx Normal file
View file

@ -0,0 +1,58 @@
# decoder ripped from: http://www.amnoid.de/gc/yaz0.txt
ctypedef unsigned long ulong
ctypedef unsigned char uchar
cdef ulong get_size(uchar *comp):
return comp[4]*0x1000000 + comp[5]*0x10000 + comp[6]*0x100 + comp[7]
cdef void _decode(uchar *comp, uchar *uncomp):
cdef:
ulong src = 16 # skip header
ulong dst = 0
uchar valid = 0 # bit count
uchar curr = 0 # code byte
ulong size = get_size(comp)
uchar byte1, byte2
ulong dist, copy, i, n
while dst < size:
if not valid:
curr = comp[src]
src += 1
valid = 8
if curr & 0x80:
uncomp[dst] = comp[src]
dst += 1
src += 1
else:
byte1 = comp[src]
byte2 = comp[src + 1]
src += 2
dist = ((byte1 & 0xF) << 8) | byte2
copy = dst - (dist + 1)
n = byte1 >> 4
if n:
n += 2
else:
n = comp[src] + 0x12
src += 1
for i in range(n):
uncomp[dst] = uncomp[copy]
copy += 1
dst += 1
curr <<= 1
valid -= 1
def decode(comp):
size = get_size(comp)
uncomp = bytearray(size)
_decode(comp, uncomp)
return uncomp

4
n64.py
View file

@ -47,9 +47,7 @@ def crc(f, bootcode=6105):
t3 ^= d t3 ^= d
b = d & 0x1F r = ROL(d, d & 0x1F)
r = (d << b) | (d >> (32 - b))
r &= MAX32
t5 += r t5 += r
t5 &= MAX32 t5 &= MAX32

87
n64_fast.pyx Normal file
View file

@ -0,0 +1,87 @@
# Based on uCON64's N64 checksum algorithm by Andreas Sterbenz
ctypedef unsigned long ulong
ctypedef unsigned char uchar
# ulong must be 32 bits since we expect them to overflow as such
# TODO: test on different machines
assert(sizeof(ulong) == 4)
from zlib import crc32
crc_seeds = {
6101: 0xF8CA4DDC,
6102: 0xF8CA4DDC,
6103: 0xA3886759,
6105: 0xDF26F436,
6106: 0x1FEA617A,
}
bootcode_crcs = {
0x6170A4A1: 6101,
0x90BB6CB5: 6102,
0x0B050EE0: 6103,
0x98BC2C86: 6105,
0xACC8580A: 6106,
}
cdef ulong ROL(ulong i, ulong b):
return (i << b) | (i >> (32 - b))
cdef ulong R4(uchar *b):
return b[0]*0x1000000 + b[1]*0x10000 + b[2]*0x100 + b[3]
cdef object _crc(uchar *data, ulong bootcode, uchar *lookup):
cdef:
ulong seed = crc_seeds[bootcode]
ulong t1, t2, t3, t4, t5, t6
ulong i, d, b, r, o
ulong crc1, crc2
t1 = t2 = t3 = t4 = t5 = t6 = seed
for i in range(0x1000, 0x101000, 4):
d = R4(data + i)
if t6 + d < t6:
t4 += 1
t6 += d
t3 ^= d
r = ROL(d, d & 0x1F)
t5 += r
if t2 > d:
t2 ^= r
else:
t2 ^= t6 ^ d
if bootcode == 6105:
o = i & 0xFF
t1 += R4(lookup + o)^ d
else:
t1 += t5
if bootcode == 6103:
crc1 = (t6 ^ t4) + t3
crc2 = (t5 ^ t2) + t1
elif bootcode == 6106:
crc1 = t6*t4 + t3
crc2 = t5*t2 + t1
else:
crc1 = t6 ^ t4 ^ t3
crc2 = t5 ^ t2 ^ t1
return crc1, crc2
def crc(f, bootcode=6105):
f.seek(0)
data = f.read()
lookup = data[0x750:0x850]
return _crc(data, bootcode, lookup)
def bootcode_version(f):
f.seek(0x40)
return bootcode_crcs[crc32(f.read(0x1000 - 0x40)) & 0xFFFFFFFF]

View file

@ -1,18 +1,32 @@
#!/bin/python #!/bin/python
# shoutouts to spinout182
import sys import sys
import os, os.path import os, os.path
from io import BytesIO from io import BytesIO
from hashlib import sha1 from hashlib import sha1
from util import * # check for cython
from heuristics import * try:
import n64 import pyximport
except ImportError:
fast = False
else:
pyximport.install()
fast = True
if fast:
import Yaz0_fast as Yaz0
import n64_fast as n64
else:
import Yaz0 import Yaz0
import n64
from util import *
from heuristics import detect_format
lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs) lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs)
# shoutouts to spinout182
# assume first entry is makerom (0x1060), and second entry begins from makerom # assume first entry is makerom (0x1060), and second entry begins from makerom
dma_sig = b"\x00\x00\x00\x00\x00\x00\x10\x60\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x60" dma_sig = b"\x00\x00\x00\x00\x00\x00\x10\x60\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x60"
@ -22,7 +36,7 @@ def dump_wrap(data, fn, size):
fn += '.' + kind fn += '.' + kind
dump_as(data, fn, size) dump_as(data, fn, size)
def z_dump_file(f, i=0, name=None): def z_dump_file(f, i=0, name=None, uncompress=True):
vs = R4(f.read(4)) # virtual start vs = R4(f.read(4)) # virtual start
ve = R4(f.read(4)) # virtual end ve = R4(f.read(4)) # virtual end
ps = R4(f.read(4)) # physical start ps = R4(f.read(4)) # physical start
@ -54,11 +68,18 @@ def z_dump_file(f, i=0, name=None):
f.seek(ps) f.seek(ps)
compressed = f.read(pe - ps) compressed = f.read(pe - ps)
if compressed[:4] == b'Yaz0': if compressed[:4] == b'Yaz0':
if uncompress:
data = Yaz0.decode(compressed) data = Yaz0.decode(compressed)
dump_wrap(data, fn, size) dump_wrap(data, fn, size)
else: else:
dump_wrap(compressed, fn+'.Yaz0', len(compressed))
else:
if uncompress:
lament('unknown compression; skipping:', fn) lament('unknown compression; skipping:', fn)
lament(compressed[:4]) lament(compressed[:4])
else:
lament('unknown compression:', fn)
dump_wrap(compressed, fn, len(compressed))
f.seek(here) f.seek(here)
return True return True