#!/usr/bin/env python3 import sys import os, os.path from io import BytesIO import struct import re lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs) unpack = lambda fmt, data: struct.unpack(fmt, bytes(data)) extended='°ÀÁÂÄÇÈÉÊËÌÍÎÏÑÒÓÔÖÙÚÛÜßàáâäçèéêëìíîïñòóôöùúûü¡ª¿' def parse_jp_text(f): s = '' bs = bytearray() def arg2(): return '{:04X}'.format(unpack('>H', f.read(2))[0]).encode('shift-jis') lastx = 0 special = f.read(12) # TODO while 1: b1 = f.read(1) if not b1: break b2 = f.read(1) if not b2: break x1 = ord(b1) x2 = ord(b2) x = x1*0x100 + x2 odd = x1 % 2 xs = bytes('{:04X}'.format(x), 'shift-jis') # shift-jis shifty1 = (0x81 <= x1 <= 0x9F and x1 != 0x85 and x1 != 0x86) or 0xE0 <= x1 <= 0xEF shifty2e = 0x9F <= x2 <= 0xFC shifty2o = 0x40 <= x2 <= 0x9E try: (b1 + b2).decode('shift-jis') shifty = True except: shifty = False if x == 0x0020: bs += b' ' # doesn't seem to be fullwidth? elif x == 0x0500: # end mark break elif x == 0x0009: # if verbose: # bs += b'[pause??]' bs += b'\n\n' elif x == 0x000A: bs += b'\n' elif x == 0x000B: # if verbose: # bs += b'[pause?]' bs += b'\n\n' elif x == 0x000C: if verbose: bs += b'[pause]' bs += b'\n\n' elif x == 0x001F: if verbose: bs += b'[spaces '+arg2()+b']' else: bs += b' '*int(arg2(), 16) elif x == 0x0100: bs += b'[Link]' elif x == 0x0101: if verbose: bs += b'[instant on]' elif x == 0x0102: if verbose: bs += b'[instant off]' elif x == 0x0103: if verbose: bs += b'[no skip sound?]' elif x == 0x0104: if verbose: bs += b'[keepalive]' elif x == 0x0110: if verbose: bs += b'[next wait '+arg2()+b']' else: arg2() bs += b'\n\n' elif x == 0x0111: # 0x1C if verbose: bs += b'[end wait '+arg2()+b']' else: arg2() elif x == 0x0112: # 0x1D if verbose: bs += b'[end wait alt 0112 '+arg2()+b']' else: arg2() elif x == 0x0120: bs += b'[sound '+arg2()+b']' elif x == 0x0128: if verbose: bs += b'[wait '+arg2()+b']' else: arg2() elif x == 0x0135: # unique to JP? bs += b'[unk 0135]' elif x == 0x0201: bs += b'[failed song]' elif x == 0x0202: if verbose: bs += b'[two-choice]' elif x == 0x0203: if verbose: bs += b'[three-choice]' elif x == 0x0204: bs += b'[postman timer]' elif x == 0x0208: bs += b'[deku score?]' elif x == 0x020B: bs += b'[highscore? 020B]' elif x == 0x020C: bs += b'[rupee prompt]' elif x == 0x020D: bs += b'[rupees selected]' elif x == 0x020E: bs += b'[rupees]' elif x == 0x020F: bs += b'[hours/minutes remaining]' elif x == 0x021C: # X人目 bs += b'[fairies]' elif x == 0x021D: # X匹 bs += b'[gold skulltulas]' elif x == 0x021E: bs += b'[score? 021E]' elif x == 0x021F: bs += b'[score? 021F]' elif x == 0x0220: bs += b'[doggy prompt]' elif x == 0x0221: bs += b'[bombers code prompt]' elif x == 0x0222: bs += b'[item prompt]' elif x == 0x0224: bs += b'[soar destination]' elif x == 0x0225: bs += b'[lottery prompt]' elif x == 0x0227: bs += b'[fairies remaining]' elif x == 0x0228: bs += b'[fairies remaining]' elif x == 0x0229: bs += b'[fairies remaining]' elif x == 0x022A: bs += b'[fairies remaining]' elif x == 0x022B: bs += b'[witch archery]' elif x == 0x022C: bs += b'[winning numbers]' elif x == 0x022D: bs += b'[ticket numbers]' elif x == 0x022E: bs += b'[item worth]' elif x == 0x022F: bs += b'[bombers code]' elif x == 0x0230: if verbose: bs += b'[end convo]' elif x == 0x0231: bs += b'[skull color]' elif x == 0x0232: bs += b'[skull color]' elif x == 0x0233: bs += b'[skull color]' elif x == 0x0234: bs += b'[skull color]' elif x == 0x0235: bs += b'[skull color]' elif x == 0x0236: bs += b'[skull color]' elif x == 0x0237: bs += b'[hours remaining]' elif x == 0x0238: bs += b'[time until morning]' elif x == 0x0240: if verbose: bs += b'[no skip?]' elif x == 0x0306: bs += b'[octorok archery highscore]' elif x == 0x0307: bs += b'[epona highscore?]' elif x == 0x0308: bs += b'[highscore? 0308]' elif x == 0x0309: bs += b'[epona highscore]' elif x == 0x030A: bs += b'[deku highscore]' elif x == 0x030B: bs += b'[deku highscore]' elif x == 0x030C: bs += b'[deku highscore]' elif x == 0x030D: bs += b'[unused name 1]' elif x == 0x030E: bs += b'[unused name 2]' elif x == 0x030F: bs += b'[unused name 3]' elif x == 0x0310: bs += b'[highscore? 0310]' elif x == 0x037E: bs += b'[unk 037E]' elif x1 == 0x20: # 0x00-0x08 if x2 == 0x00: if verbose: bs += b'[white]' elif x2 == 0x01: if verbose: bs += b'[red]' elif x2 == 0x02: if verbose: bs += b'[green]' elif x2 == 0x03: if verbose: bs += b'[dark blue]' elif x2 == 0x04: if verbose: bs += b'[yellow]' elif x2 == 0x05: if verbose: bs += b'[light blue]' elif x2 == 0x06: if verbose: bs += b'[pink]' elif x2 == 0x07: if verbose: bs += b'[silver]' elif x2 == 0x08: if verbose: bs += b'[orange]' else: raise Exception('unknown color') elif x == 0x839F: bs += b'[A]' elif x == 0x83A0: bs += b'[B]' elif x == 0x83A1: bs += b'[C]' elif x == 0x83A2: bs += b'[L]' elif x == 0x83A3: bs += b'[R]' elif x == 0x83A4: bs += b'[Z]' elif x == 0x83A5: bs += b'[C Up]' elif x == 0x83A6: bs += b'[C Down]' elif x == 0x83A7: bs += b'[C Left]' elif x == 0x83A8: bs += b'[C Right]' elif x == 0x83A9: bs += b'[Triangle]' elif x == 0x83AA: bs += b'[Control Stick]' elif x == 0x83AB: bs += b'[DPad]' elif x == 0x0000: lament(bs) lament('{:04X}'.format(lastx)) raise Exception('unexpected 0000') elif shifty1 and shifty2o: if not shifty: lament('CRAP {:02X}{:02X}'.format(x1, x2)) raise Exception('not actually shifty') bs += b1 bs += b2 elif shifty1 and shifty2e: if not shifty: lament('CRAP {:02X}{:02X}'.format(x1, x2)) raise Exception('not actually shifty') bs += b1 bs += b2 elif shifty: # last resort... lament() lament(bs.decode('shift-jis')) lament('unknown {:04X}'.format(x)) raise Exception('looks shifty') bs += b1 bs += b2 else: lament(bs) lament('unknown {:02X}{:02X}'.format(x1, x2)) raise Exception('unknown character') lastx = x s = bs.decode('shift-jis') return s def parse_en_text(f): s = '' bs = bytearray() def arg(): return '{:02X}'.format(ord(f.read(1))).encode('utf-8') lastx = 0 special = f.read(11) # TODO while 1: b1 = f.read(1) if not b1: break x = ord(b1) if b'\x20' <= b1 <= b'\x7E': # ascii if b1 == b'"': bs += b'""' else: bs += b1 continue if b'\x7F' <= b1 <= b'\xAF': bs += extended[x - 0x7F].encode('utf-8') continue elif x == 0x00: if verbose: bs += b'[white]' elif x == 0x01: if verbose: bs += b'[red]' elif x == 0x02: if verbose: bs += b'[green]' elif x == 0x03: if verbose: bs += b'[dark blue]' elif x == 0x04: if verbose: bs += b'[yellow]' elif x == 0x05: if verbose: bs += b'[light blue]' elif x == 0x06: if verbose: bs += b'[pink]' elif x == 0x07: if verbose: bs += b'[silver]' elif x == 0x08: if verbose: bs += b'[orange]' elif x == 0x0A: if verbose: bs += b'[spaces? '+arg()+b']' else: arg() elif x == 0x0B: bs += b'[record?]' elif x == 0x0C: bs += b'[fairies]' elif x == 0x0D: bs += b'[gold skulltulas]' elif x == 0x10: if verbose: bs += b'[pause]' bs += b'\n\n' elif x == 0x11: bs += b'\n' elif x == 0x12: bs += b'\n\n' elif x == 0x13: bs += b'\n' elif x == 0x15: if verbose: bs += b'[no skip?]' elif x == 0x16: bs += b'[Link]' elif x == 0x17: if verbose: bs += b'[instant on]' elif x == 0x18: if verbose: bs += b'[instant off]' elif x == 0x19: if verbose: bs += b'[no skip sound?]' elif x == 0x1A: if verbose: bs += b'[keepalive]' elif x == 0x1B: if verbose: bs += b'[next wait '+arg()+arg()+b']' else: arg() arg() bs += b'\n\n' elif x == 0x1C: if verbose: bs += b'[end wait '+arg()+arg()+b']' else: arg() arg() elif x == 0x1D: if verbose: bs += b'[end wait alt '+arg()+arg()+b']' else: arg() arg() elif x == 0x1E: bs += b'[sound '+arg()+arg()+b']' elif x == 0x1F: if verbose: bs += b'[wait '+arg()+arg()+b']' else: arg() arg() elif x == 0xC2: if verbose: bs += b'[two-choice]' elif x == 0xC3: if verbose: bs += b'[three-choice]' elif x == 0xB0: bs += b'[A]' elif x == 0xB1: bs += b'[B]' elif x == 0xB2: bs += b'[C]' elif x == 0xB3: bs += b'[L]' elif x == 0xB4: bs += b'[R]' elif x == 0xB5: bs += b'[Z]' elif x == 0xB6: bs += b'[C Up]' elif x == 0xB7: bs += b'[C Down]' elif x == 0xB8: bs += b'[C Left]' elif x == 0xB9: bs += b'[C Right]' elif x == 0xBA: bs += b'[Triangle]' elif x == 0xBB: bs += b'[Control Stick]' elif x == 0xBF: # end marker break elif x == 0xC1: bs += b'[failed song]' elif x == 0xC4: bs += b'[postman timer]' elif x == 0xC8: bs += b'[deku score]' elif x == 0xCB: bs += b'[score]' elif x == 0xCC: bs += b'[rupee prompt]' elif x == 0xCD: bs += b'[rupees selected]' elif x == 0xCE: bs += b'[rupees]' elif x == 0xCF: bs += b'[hours remaining CF]' elif x == 0xD0: bs += b'[doggy bet]' elif x == 0xD1: bs += b'[bombers code prompt]' elif x == 0xD2: bs += b'[item prompt]' elif x == 0xD4: bs += b'[soar destination]' elif x == 0xD5: bs += b'[lottery prompt]' elif x == 0xD7: bs += b'[fairies remaining]' elif x == 0xD8: bs += b'[fairies remaining]' elif x == 0xD9: bs += b'[fairies remaining]' elif x == 0xDA: bs += b'[fairies remaining]' elif x == 0xDB: bs += b'[witch archery]' elif x == 0xDC: bs += b'[winning numbers]' elif x == 0xDD: bs += b'[ticket numbers]' elif x == 0xDE: bs += b'[item worth]' elif x == 0xDF: bs += b'[bombers code]' elif x == 0xE0: if verbose: bs += b'[end convo]' elif x == 0xE1: bs += b'[skull color]' elif x == 0xE2: bs += b'[skull color]' elif x == 0xE3: bs += b'[skull color]' elif x == 0xE4: bs += b'[skull color]' elif x == 0xE5: bs += b'[skull color]' elif x == 0xE6: bs += b'[skull color]' elif x == 0xE7: bs += b'[hours remaining E7]' elif x == 0xE8: bs += b'[time until morning]' elif x == 0xFA: bs += b'[deku highscore]' elif x == 0xFB: bs += b'[deku highscore]' elif x == 0xFC: bs += b'[deku highscore]' elif x == 0xF6: bs += b'[octorok archery highscore]' elif x == 0xF9: bs += b'[epona highscore]' else: lament(bs) lament('unknown {:02X}'.format(x)) raise Exception('unknown character') lastx = x s = bs.decode('utf-8') return s def dump_text(parser, msgtable, msgs): if not isinstance(msgtable, BytesIO): msgtable = BytesIO(msgtable) if not isinstance(msgs, BytesIO): msgs = BytesIO(msgs) msgtable_end = msgtable.seek(0, 2) msgs_end = msgs.seek(0, 2) msgtable.seek(0) msgs.seek(0) lastid = 0 for i in range(msgtable_end//8): msgid, = unpack('>H', msgtable.read(2)) if msgid >= 0xFFFC: break if msgid != lastid + 1: print('"",""') xy = msgtable.read(1) unused = msgtable.read(1) bank = msgtable.read(1) offset, = unpack('>L', b'\x00'+msgtable.read(3)) msgs.seek(offset) text = parser(msgs) text = re.sub('\n\n+', '\n\n', text) #print('{:04} {:04X} {:06X}'.format(i, msgid, offset)) print('"${:04X}","{}"'.format(msgid, text)) lastid = msgid def dumpit(codefile, textfile, language, table_offset, table_size): if os.path.exists(codefile) and os.path.exists(textfile): with open(codefile, 'rb') as f: f.seek(table_offset) msgtable = f.read(table_size) with open(textfile, 'rb') as f: msgs = f.read() if language == 'jp': dump_text(parse_jp_text, msgtable, msgs) elif language == 'en': dump_text(parse_en_text, msgtable, msgs) else: raise Exception('unsupported') return True else: return False import sys args = sys.argv[1:] # MM (U) 1.0 and (J) 1.0 if len(args) > 1 and (args[1].startswith('v') or args[1].startswith('V')): verbose = True else: verbose = False if args[0] == 'jp': dirname = 'dump/mm-JP10-5fb2301aacbf85278af30dca3e4194ad48599e36' codefile = dirname+'/0028 V00B5F000' jp_start = 0x11A398 jp_end = 0x123128 textfile = dirname+'/0026 V00AF9000' dumpit(codefile, textfile, 'jp', jp_start, jp_end - jp_start) elif args[0] == 'en': dirname = 'dump/mm-US10-d6133ace5afaa0882cf214cf88daba39e266c078' codefile = dirname+'/0031 V00B3C000' en_start = 0x1210D8 en_end = 0x12A048 textfile = dirname+'/0029 V00AD1000' dumpit(codefile, textfile, 'en', en_start, en_end - en_start) else: raise Exception('unknown language to dump')