From 43ce5f0d7e3c6dc4aea4829fae78351451a71427 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Wed, 4 Mar 2015 07:17:50 -0800 Subject: [PATCH] rewrite functions in pyrex for performance on my machine; * n64_fast.crc is 29 times faster * Yaz0_fast.decode is 226 times faster --- Yaz0.py | 5 --- Yaz0_fast.pyx | 58 ++++++++++++++++++++++++++++++++++ n64.py | 4 +-- n64_fast.pyx | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++ z64dump.py | 39 +++++++++++++++++------ 5 files changed, 176 insertions(+), 17 deletions(-) create mode 100644 Yaz0_fast.pyx create mode 100644 n64_fast.pyx diff --git a/Yaz0.py b/Yaz0.py index 1e9ec4a..426468e 100644 --- a/Yaz0.py +++ b/Yaz0.py @@ -1,6 +1,4 @@ # decoder ripped from: http://www.amnoid.de/gc/yaz0.txt -# encoder ripped from: -# https://bitbucket.org/ottehr/z64-fm/src/9fdc704ca42ff15c8e01b1566d4692d986920c6a/yaz0.c def decode(comp): src = 16 # skip header @@ -51,6 +49,3 @@ def decode(comp): valid -= 1 return uncomp - -def encode(uncomp): - raise Exception('Yaz0_encode: unimplemented') diff --git a/Yaz0_fast.pyx b/Yaz0_fast.pyx new file mode 100644 index 0000000..ae129ad --- /dev/null +++ b/Yaz0_fast.pyx @@ -0,0 +1,58 @@ +# decoder ripped from: http://www.amnoid.de/gc/yaz0.txt + +ctypedef unsigned long ulong +ctypedef unsigned char uchar + +cdef ulong get_size(uchar *comp): + return comp[4]*0x1000000 + comp[5]*0x10000 + comp[6]*0x100 + comp[7] + +cdef void _decode(uchar *comp, uchar *uncomp): + cdef: + ulong src = 16 # skip header + ulong dst = 0 + uchar valid = 0 # bit count + uchar curr = 0 # code byte + + ulong size = get_size(comp) + + uchar byte1, byte2 + ulong dist, copy, i, n + + while dst < size: + if not valid: + curr = comp[src] + src += 1 + valid = 8 + + if curr & 0x80: + uncomp[dst] = comp[src] + dst += 1 + src += 1 + else: + byte1 = comp[src] + byte2 = comp[src + 1] + src += 2 + + dist = ((byte1 & 0xF) << 8) | byte2 + copy = dst - (dist + 1) + + n = byte1 >> 4 + if n: + n += 2 + else: + n = comp[src] + 0x12 + src += 1 + + for i in range(n): + uncomp[dst] = uncomp[copy] + copy += 1 + dst += 1 + + curr <<= 1 + valid -= 1 + +def decode(comp): + size = get_size(comp) + uncomp = bytearray(size) + _decode(comp, uncomp) + return uncomp diff --git a/n64.py b/n64.py index e548c0d..e684a89 100644 --- a/n64.py +++ b/n64.py @@ -47,9 +47,7 @@ def crc(f, bootcode=6105): t3 ^= d - b = d & 0x1F - r = (d << b) | (d >> (32 - b)) - r &= MAX32 + r = ROL(d, d & 0x1F) t5 += r t5 &= MAX32 diff --git a/n64_fast.pyx b/n64_fast.pyx new file mode 100644 index 0000000..ad7040b --- /dev/null +++ b/n64_fast.pyx @@ -0,0 +1,87 @@ +# Based on uCON64's N64 checksum algorithm by Andreas Sterbenz + +ctypedef unsigned long ulong +ctypedef unsigned char uchar + +# ulong must be 32 bits since we expect them to overflow as such +# TODO: test on different machines +assert(sizeof(ulong) == 4) + +from zlib import crc32 + +crc_seeds = { + 6101: 0xF8CA4DDC, + 6102: 0xF8CA4DDC, + 6103: 0xA3886759, + 6105: 0xDF26F436, + 6106: 0x1FEA617A, +} + +bootcode_crcs = { + 0x6170A4A1: 6101, + 0x90BB6CB5: 6102, + 0x0B050EE0: 6103, + 0x98BC2C86: 6105, + 0xACC8580A: 6106, +} + +cdef ulong ROL(ulong i, ulong b): + return (i << b) | (i >> (32 - b)) + +cdef ulong R4(uchar *b): + return b[0]*0x1000000 + b[1]*0x10000 + b[2]*0x100 + b[3] + +cdef object _crc(uchar *data, ulong bootcode, uchar *lookup): + cdef: + ulong seed = crc_seeds[bootcode] + ulong t1, t2, t3, t4, t5, t6 + ulong i, d, b, r, o + ulong crc1, crc2 + + t1 = t2 = t3 = t4 = t5 = t6 = seed + + for i in range(0x1000, 0x101000, 4): + d = R4(data + i) + + if t6 + d < t6: + t4 += 1 + + t6 += d + + t3 ^= d + + r = ROL(d, d & 0x1F) + + t5 += r + + if t2 > d: + t2 ^= r + else: + t2 ^= t6 ^ d + + if bootcode == 6105: + o = i & 0xFF + t1 += R4(lookup + o)^ d + else: + t1 += t5 + + if bootcode == 6103: + crc1 = (t6 ^ t4) + t3 + crc2 = (t5 ^ t2) + t1 + elif bootcode == 6106: + crc1 = t6*t4 + t3 + crc2 = t5*t2 + t1 + else: + crc1 = t6 ^ t4 ^ t3 + crc2 = t5 ^ t2 ^ t1 + return crc1, crc2 + +def crc(f, bootcode=6105): + f.seek(0) + data = f.read() + lookup = data[0x750:0x850] + return _crc(data, bootcode, lookup) + +def bootcode_version(f): + f.seek(0x40) + return bootcode_crcs[crc32(f.read(0x1000 - 0x40)) & 0xFFFFFFFF] diff --git a/z64dump.py b/z64dump.py index 2130674..7f1bac3 100755 --- a/z64dump.py +++ b/z64dump.py @@ -1,18 +1,32 @@ #!/bin/python -# shoutouts to spinout182 import sys import os, os.path from io import BytesIO from hashlib import sha1 +# check for cython +try: + import pyximport +except ImportError: + fast = False +else: + pyximport.install() + fast = True + +if fast: + import Yaz0_fast as Yaz0 + import n64_fast as n64 +else: + import Yaz0 + import n64 + from util import * -from heuristics import * -import n64 -import Yaz0 +from heuristics import detect_format lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs) +# shoutouts to spinout182 # assume first entry is makerom (0x1060), and second entry begins from makerom dma_sig = b"\x00\x00\x00\x00\x00\x00\x10\x60\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x60" @@ -22,7 +36,7 @@ def dump_wrap(data, fn, size): fn += '.' + kind dump_as(data, fn, size) -def z_dump_file(f, i=0, name=None): +def z_dump_file(f, i=0, name=None, uncompress=True): vs = R4(f.read(4)) # virtual start ve = R4(f.read(4)) # virtual end ps = R4(f.read(4)) # physical start @@ -54,11 +68,18 @@ def z_dump_file(f, i=0, name=None): f.seek(ps) compressed = f.read(pe - ps) if compressed[:4] == b'Yaz0': - data = Yaz0.decode(compressed) - dump_wrap(data, fn, size) + if uncompress: + data = Yaz0.decode(compressed) + dump_wrap(data, fn, size) + else: + dump_wrap(compressed, fn+'.Yaz0', len(compressed)) else: - lament('unknown compression; skipping:', fn) - lament(compressed[:4]) + if uncompress: + lament('unknown compression; skipping:', fn) + lament(compressed[:4]) + else: + lament('unknown compression:', fn) + dump_wrap(compressed, fn, len(compressed)) f.seek(here) return True