mm/dumptext.py

435 lines
12 KiB
Python

#!/usr/bin/env python3
import sys
import os, os.path
from io import BytesIO
import struct
lament = lambda *args, **kwargs: print(*args, file=sys.stderr, **kwargs)
unpack = lambda fmt, data: struct.unpack(fmt, bytes(data))
extended='‾ÀÎÂÄÇÈÉÊËÏÔÖÙÛÜßàáâäçèéêëïôöùûü'
def parse_jp_text(f):
s = ''
bs = bytearray()
def arg2():
return '{:04X}'.format(unpack('>H', f.read(2))[0]).encode('shift-jis')
lastx = 0
while 1:
b1 = f.read(1)
if not b1:
break
if b'\x20' <= b1 <= b'\x7F':
# ascii
if b1 == b'"':
bs += b'""'
else:
bs += b1
continue
if b'\xA1' <= b1 <= b'\xDF':
# single-byte half-width katakana
bs += b1
continue
b2 = f.read(1)
if not b2:
raise Exception('unexpected EOF')
x1 = ord(b1)
x2 = ord(b2)
x = x1*0x100 + x2
odd = x1 % 2
xs = bytes('{:04X}'.format(x), 'shift-jis')
# shift-jis
shifty1 = (0x81 <= x1 <= 0x9F and x1 != 0x85 and x1 != 0x86) or 0xE0 <= x1 <= 0xEF
shifty2e = 0x9F <= x2 <= 0xFC
shifty2o = 0x40 <= x2 <= 0x9E
try:
(b1 + b2).decode('shift-jis')
shifty = True
except:
shifty = False
if x == 0x000A:
bs += b'\n'
elif x == 0x8170:
# end marker
break
elif x == 0x81A5: # 0x04
if verbose:
bs += b'[pause]'
bs += b'\n\n'
elif x == 0x000B: # 0x05
if verbose:
bs += b'[color '+arg2()[2:]+b']'
else:
arg2()
elif x == 0x86C7: # 0x06
if verbose:
bs += b'[spaces '+arg2()+b']'
else:
arg2()
elif x == 0x81CB: # 0x07
bs += b'[goto '+arg2()+b']'
elif x == 0x8189: # 0x08
if verbose:
bs += b'[instant on]'
elif x == 0x818A: # 0x09
if verbose:
bs += b'[instant off]'
elif x == 0x86C8: # 0x0A
# shop-related? keeps dialog open?
if verbose:
bs += b'[keepalive]'
elif x == 0x819F: # 0x0B
if verbose:
bs += b'[event start]'
elif x == 0x81A3: # 0x0C
if verbose:
bs += b'[wait '+arg2()+b']'
else:
arg2()
bs += b'\n\n'
elif x == 0x819E: # 0x0E
if verbose:
bs += b'[fade wait '+arg2()+b']'
else:
arg2()
elif x == 0x874F: # 0x0F
bs += b'[Link]'
elif x == 0x81F0: # 0x10
if verbose:
bs += b'[Ocarina]'
elif x == 0x81F3: # 0x12
bs += b'[sound '+arg2()+b']'
elif x == 0x819A: # 0x13
bs += b'[Item Icon '+arg2()[2:]+b']'
elif x == 0x86C9: # 0x14
if verbose:
bs += b'[speed '+xs+b' '+arg2()+b']'
else:
arg2()
elif x == 0x86B3: # 0x15
bs += b'[background '+arg2()+arg2()+b']'
elif x == 0x8791: # 0x16
bs += b'[Marathon Time]'
elif x == 0x8792: # 0x17
bs += b'[Race Time]'
elif x == 0x879B: # 0x18
bs += b'[Points]'
elif x == 0x86A3: # 0x19
bs += b'[Gold Skulltulas]'
elif x == 0x8199: # 0x1A
if verbose:
bs += b'[no skip]'
elif x == 0x81BC: # 0x1B
if True or verbose:
bs += b'[two-choice]'
elif x == 0x81B8: # 0x1C
if True or verbose:
bs += b'[three-choice]'
elif x == 0x86A4: # 0x1D
bs += b'[weight]'
elif x == 0x869F: # 0x1E
a = arg2()
if a == b'0000':
bs += b'[Horseback Archery Score]'
elif a == b'0001':
bs += b'[Poe Points]'
elif a == b'0002':
bs += b'[Largest Fish]'
elif a == b'0003':
bs += b'[Horse Race Time]'
elif a == b'0004':
bs += b'[Marathon Time]'
elif a == b'0006':
bs += b'[Dampe Race time]'
else:
bs += b'[time '+a+b']'
elif x == 0x81A1: # 0x1F
bs += b'[World Time]'
elif x == 0x839F: # 0x9F
bs += b'[A]'
elif x == 0x83A0: # 0xA0
bs += b'[B]'
elif x == 0x83A1: # 0xA1
bs += b'[C]'
elif x == 0x83A2: # 0xA2
bs += b'[L]'
elif x == 0x83A3: # 0xA3
bs += b'[R]'
elif x == 0x83A4: # 0xA4
bs += b'[Z]'
elif x == 0x83A5: # 0xA5
bs += b'[C Up]'
elif x == 0x83A6: # 0xA6
bs += b'[C Down]'
elif x == 0x83A7: # 0xA7
bs += b'[C Left]'
elif x == 0x83A8: # 0xA8
bs += b'[C Right]'
elif x == 0x83A9: # 0xA9
bs += b'[Triangle]'
elif x == 0x83AA: # 0xAA
bs += b'[Control Stick]'
elif x == 0x83AB: # 0xAB
bs += b'[DPad]'
elif x == 0x0000:
lament(bs)
lament('{:04X}'.format(lastx))
raise Exception('unexpected 0000')
elif shifty1 and shifty2o:
if not shifty:
lament('CRAP {:02X}{:02X}'.format(x1, x2))
raise Exception('not actually shifty')
bs += b1
bs += b2
elif shifty1 and shifty2e:
if not shifty:
lament('CRAP {:02X}{:02X}'.format(x1, x2))
raise Exception('not actually shifty')
bs += b1
bs += b2
elif shifty:
# last resort...
lament('SJS {:02X}{:02X}'.format(x1, x2))
lament('{:04X}'.format(lastx))
raise Exception('looks shifty')
bs += b1
bs += b2
else:
lament(bs)
lament('unknown {:02X}{:02X}'.format(x1, x2))
raise Exception('unknown character')
lastx = x
s = bs.decode('shift-jis')
return s
def parse_en_text(f):
s = ''
bs = bytearray()
def arg():
return '{:02X}'.format(ord(f.read(1))).encode('utf-8')
lastx = 0
while 1:
b1 = f.read(1)
if not b1:
break
x = ord(b1)
if b'\x20' <= b1 <= b'\x7E':
# ascii
if b1 == b'"':
bs += b'""'
else:
bs += b1
continue
if b'\x7F' <= b1 <= b'\x9E':
bs += extended[x - 0x7F].encode('utf-8')
continue
elif x == 0x01:
bs += b'\n'
elif x == 0x02:
# end marker
break
elif x == 0x04:
if verbose:
bs += b'[pause]'
bs += b'\n\n'
elif x == 0x05:
if verbose:
bs += b'[color '+arg()+b']'
else:
arg()
elif x == 0x06:
if verbose:
bs += b'[spaces '+arg()+b']'
else:
arg()
elif x == 0x07:
bs += b'[goto '+arg()+arg()+b']'
elif x == 0x08:
if verbose:
bs += b'[instant on]'
elif x == 0x09:
if verbose:
bs += b'[instant off]'
elif x == 0x0A:
# shop-related? keeps dialog open?
if verbose:
bs += b'[keepalive]'
elif x == 0x0B:
if verbose:
bs += b'[event start]'
elif x == 0x0C:
if verbose:
bs += b'[wait '+arg()+b']'
else:
arg()
bs += b'\n\n'
elif x == 0x0E:
if verbose:
bs += b'[fade wait '+arg()+b']'
else:
arg()
elif x == 0x0F:
bs += b'[Link]'
elif x == 0x10:
bs += b'[Ocarina]'
elif x == 0x12:
bs += b'[sound '+arg()+arg()+b']'
elif x == 0x13:
bs += b'[Item Icon '+arg()+b']'
elif x == 0x14:
if verbose:
bs += b'[speed '+arg()+b']'
else:
arg()
elif x == 0x15:
bs += b'[background '+arg()+arg()+arg()+b']'
elif x == 0x16:
bs += b'[Marathon Time]'
elif x == 0x17:
bs += b'[Race Time]'
elif x == 0x18:
bs += b'[Points]'
elif x == 0x19:
bs += b'[Gold Skulltulas]'
elif x == 0x1A:
if verbose:
bs += b'[no skip]'
elif x == 0x1B:
if verbose:
bs += b'[two-choice]'
elif x == 0x1C:
if verbose:
bs += b'[three-choice]'
elif x == 0x1D:
bs += b'[weight]'
elif x == 0x1E:
a = arg()
if a == b'00':
bs += b'[Horseback Archery Score]'
elif a == b'01':
bs += b'[Poe Points]'
elif a == b'02':
bs += b'[Largest Fish]'
elif a == b'03':
bs += b'[Horse Race Time]'
elif a == b'04':
bs += b'[Marathon Time]'
elif a == b'06':
bs += b'[Dampe Race time]'
else:
bs += b'[time '+a+b']'
elif x == 0x1F:
bs += b'[World Time]'
elif x == 0x9F:
bs += b'[A]'
elif x == 0xA0:
bs += b'[B]'
elif x == 0xA1:
bs += b'[C]'
elif x == 0xA2:
bs += b'[L]'
elif x == 0xA3:
bs += b'[R]'
elif x == 0xA4:
bs += b'[Z]'
elif x == 0xA5:
bs += b'[C Up]'
elif x == 0xA6:
bs += b'[C Down]'
elif x == 0xA7:
bs += b'[C Left]'
elif x == 0xA8:
bs += b'[C Right]'
elif x == 0xA9:
bs += b'[Triangle]'
elif x == 0xAA:
bs += b'[Control Stick]'
elif x == 0xAB:
bs += b'[DPad]'
elif x == 0x00:
lament(bs)
lament('{:02X}'.format(lastx))
raise Exception('unexpected 00')
else:
lament(bs)
lament('unknown {:02X}'.format(x))
raise Exception('unknown character')
lastx = x
s = bs.decode('utf-8')
return s
def dump_text(parser, msgtable, msgs):
if not isinstance(msgtable, BytesIO):
msgtable = BytesIO(msgtable)
if not isinstance(msgs, BytesIO):
msgs = BytesIO(msgs)
msgtable_end = msgtable.seek(0, 2)
msgs_end = msgs.seek(0, 2)
msgtable.seek(0)
msgs.seek(0)
lastid = 0
for i in range(msgtable_end//8):
msgid, = unpack('>H', msgtable.read(2))
if msgid >= 0xFFFC:
break
if msgid != lastid + 1:
print('"",""')
xy = msgtable.read(1)
unused = msgtable.read(1)
bank = msgtable.read(1)
offset, = unpack('>L', b'\x00'+msgtable.read(3))
msgs.seek(offset)
text = parser(msgs)
#print('{:04} {:04X} {:06X}'.format(i, msgid, offset))
print('"${:04X}","{}"'.format(msgid, text))
lastid = msgid
def dumpit(codefile, textfile, language, table_offset, table_size):
if os.path.exists(codefile) and os.path.exists(textfile):
with open(codefile, 'rb') as f:
f.seek(table_offset)
msgtable = f.read(table_size)
with open(textfile, 'rb') as f:
msgs = f.read()
if language == 'jp':
dump_text(parse_jp_text, msgtable, msgs)
elif language == 'en':
dump_text(parse_en_text, msgtable, msgs)
else:
raise Exception('unsupported')
return True
else:
return False
import sys
args = sys.argv[1:]
# OoT NTSC 1.0
dirname = 'dump/oot-US10-ad69c91157f6705e8ab06c79fe08aad47bb57ba7'
codefile = dirname+'/0027 V00A87000 code'
jp_start = 0xF98AC
en_start = 0xFD9EC
en_end = 0x101D94
if len(args) > 1 and (args[1].startswith('v') or args[1].startswith('V')):
verbose = True
else:
verbose = False
if args[0] == 'jp':
textfile = dirname+'/0019 V008EB000 jpn_message_data_static'
dumpit(codefile, textfile, 'jp', jp_start, en_start - jp_start)
elif args[0] == 'en':
textfile = dirname+'/0022 V0092D000 nes_message_data_static'
dumpit(codefile, textfile, 'en', en_start, en_end - en_start)
else:
raise Exception('unknown language to dump')