-- i've lost control of my life -- lexer and parser somewhat based on http://chunkbake.luaforge.net/ -- https://github.com/mikeryan/n64dev/tree/master/docs/n64ops -- cajeasm style assembly -- TODO: maybe support reg# style too local registers = { [0]= 'R0', 'AT', 'V0', 'V1', 'A0', 'A1', 'A2', 'A3', 'T0', 'T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'T8', 'T9', 'K0', 'K1', 'GP', 'SP', 'S8', 'RA', } local fpu_registers = { [0]= 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31', } local all_instructions = { 'ADD', 'ADDI', 'ADDIU', 'ADDU', 'AND', 'ANDI', 'BC1F', 'BC1FL', 'BC1T', 'BC1TL', 'BEQ', 'BEQL', 'BGEZ', 'BGEZAL', 'BGEZALL', 'BGEZL', 'BGTZ', 'BGTZL', 'BLEZ', 'BLEZL', 'BLTZ', 'BLTZAL', 'BLTZALL', 'BLTZL', 'BNE', 'BNEL', 'BREAK', 'CACHE', 'CFC1', 'CTC1', 'DADD', 'DADDI', 'DADDIU', 'DADDU', 'DDIV', 'DDIVU', 'DIV', 'DIVU', 'DMFC1', 'DMTC1', 'DMULT', 'DMULTU', 'DSLL', 'DSLL32', 'DSLLV', 'DSRA', 'DSRA32', 'DSRAV', 'DSRL', 'DSRL32', 'DSRLV', 'DSUB', 'DSUBU', 'ERET', 'J', 'JAL', 'JALR', 'JR', 'LB', 'LBU', 'LD', 'LDC1', 'LDC2', 'LDL', 'LDR', 'LH', 'LHU', 'LL', 'LLD', 'LUI', 'LW', 'LWC1', 'LWL', 'LWR', 'LWU', 'MFC0', 'MFC1', 'MFHI', 'MFLO', 'MTC0', 'MTC1', 'MTHI', 'MTLO', 'MULT', 'MULTU', 'NOR', 'OR', 'ORI', 'SB', 'SC', 'SCD', 'SD', 'SDC1', 'SDC2', 'SDL', 'SDR', 'SH', 'SLL', 'SLLV', 'SLT', 'SLTI', 'SLTIU', 'SLTU', 'SRA', 'SRAV', 'SRL', 'SRLV', 'SUB', 'SUBU', 'SW', 'SWC1', 'SWL', 'SWR', 'SYNC', 'SYSCALL', 'TEQ', 'TEQI', 'TGE', 'TGEI', 'TGEIU', 'TGEU', 'TLBP', 'TLBR', 'TLBWI', 'TLBWR', 'TLT', 'TLTI', 'TLTIU', 'TLTU', 'TNE', 'TNEI', 'XOR', 'XORI', 'ABS.D', 'ABS.S', 'ADD.D', 'ADD.S', 'CEIL.L.D', 'CEIL.L.S', 'CEIL.W.D', 'CEIL.W.S', 'CVT.D.L', 'CVT.D.S', 'CVT.D.W', 'CVT.L.D', 'CVT.L.S', 'CVT.S.D', 'CVT.S.L', 'CVT.S.W', 'CVT.W.D', 'CVT.W.S', 'DIV.D', 'DIV.S', 'FLOOR.L.D', 'FLOOR.L.S', 'FLOOR.W.D', 'FLOOR.W.S', 'MOV.F', 'MOV.S', 'MUL.F', 'MUL.S', 'NEG.F', 'NEG.S', 'ROUND.L.D', 'ROUND.L.S', 'ROUND.W.D', 'ROUND.W.S', 'SQRT.D', 'SQRT.S', 'SUB.D', 'SUB.S', 'TRUNC.L.S', 'TRUNC.W.D', 'C.EQ.D', 'C.EQ.S', 'C.F.D', 'C.F.S', 'C.LE.D', 'C.LE.S', 'C.LT.D', 'C.LT.S', 'C.NGE.D', 'C.NGE.S', 'C.NGL.D', 'C.NGL.S', 'C.NGLE.D', 'C.NGLE.S', 'C.NGT.D', 'C.NGT.S', 'C.OLE.D', 'C.OLE.S', 'C.OLT.D', 'C.OLT.S', 'C.SEQ.D', 'C.SEQ.S', 'C.SF.D', 'C.SF.S', 'C.UEQ.D', 'C.UEQ.S', 'C.ULE.D', 'C.ULE.S', 'C.ULT.D', 'C.ULT.S', 'C.UN.D', 'C.UN.S', -- pseudo-instructions 'B', 'BAL', 'BEQI', 'BNEI', 'BGE', 'BGEI', 'BLE', 'BLEI', 'BLT', 'BLTI', 'BGT', 'BGTI', 'CL', 'LI', 'MOV', 'NOP', 'SUBI', 'SUBIU', } local all_directives = { 'ALIGN', 'SKIP', 'ASCII', 'ASCIIZ', 'BYTE', 'HALFWORD', 'WORD', 'FLOAT', --'HEX', -- excluded here due to different syntax 'INC', 'INCASM', 'INCLUDE', 'INCBIN', 'ORG', } local all_tokens = { 'DEF', 'DEFSYM', 'DEREF', 'DIR', 'EOF', 'EOL', 'INSTR', 'LABEL', 'LABELSYM', 'NUM', 'REG', 'SEP', } local all_registers = {} for k, v in pairs(registers) do all_registers[k] = v end for k, v in pairs(fpu_registers) do all_registers[k + 32] = v end -- set up reverse table lookups local function revtable(t) for k, v in pairs(t) do t[v] = k end end revtable(registers) revtable(fpu_registers) revtable(all_registers) revtable(all_instructions) revtable(all_tokens) local argtypes = { bto = 'base rt offset', sti = 'rs rt immediate', std = 'rs rt rd', -- ending with 5 unset bits and a const st = 'rs rt', -- ending with 10 unset bits and a const tds = 'rs rd rs/sa', -- starting with 5 unset bits, ending with a const s = 'rs', -- followed by 15 unset bits and a const sto = 'rs rt offset', stc = 'rs rt code', -- followed by a const so = 'rs offset', -- with a const inbetween sync= 'stype', -- starting with 15 unset bits, ending with a const indx= 'index', lui = 'rt immediate', -- starting with 5 unset bits mf = 'rd', -- 10 unset bits on left, 5 on right, ending with a const jalr= 'rs rd', -- 5 unset bits inbetween, 5 on right, ending with a const code= 'code', -- ending with a const movf= 'rd fs', -- starting with const, ending with 11 unset bits bfo = 'base fs offset', tsdf= 'ft fs fd', -- starting with a const of 16, ending with a const tsdd= 'ft fs fd', -- starting with a const of 17, ending with a const } local at = argtypes -- temporary shorthand local instruction_handlers = { ADD_D = {17, at.tsdd, 0}, ADD_S = {17, at.tsdf, 0}, DIV_D = {17, at.tsdd, 3}, DIV_S = {17, at.tsdf, 3}, MUL_D = {17, at.tsdd, 2}, MUL_S = {17, at.tsdf, 2}, SUB_D = {17, at.tsdd, 1}, SUB_S = {17, at.tsdf, 1}, CFC1 = {17, at.movf, 2}, CTC1 = {17, at.movf, 6}, DMFC1 = {17, at.movf, 1}, DMTC1 = {17, at.movf, 5}, MFC0 = {16, at.movf, 0}, MFC1 = {16, at.movf, 0}, MTC0 = {17, at.movf, 4}, MTC1 = {17, at.movf, 4}, LDC1 = {53, at.bfo}, LWC1 = {49, at.bfo}, SDC1 = {61, at.bfo}, SWC1 = {57, at.bfo}, -- MTHI = { 0, at.s, 17}, MTLO = { 0, at.s, 19}, JR = { 0, at.s, 8}, BREAK = { 0, at.code,13}, SYSCALL = { 0, at.code,12}, SYNC = { 0, at.sync,15}, BEQ = { 4, at.sto}, BEQL = {20, at.sto}, BNE = { 5, at.sto}, BNEL = {21, at.sto}, TEQ = { 0, at.stc, 52}, TGE = { 0, at.stc, 48}, TGEU = { 0, at.stc, 49}, TLT = { 0, at.stc, 50}, TLTU = { 0, at.stc, 51}, TNE = { 0, at.stc, 54}, J = { 2, at.indx}, JAL = { 3, at.indx}, JALR = { 0, at.jalr, 9}, LUI = {15, at.lui}, MFHI = { 0, at.mf, 16}, MFLO = { 0, at.mf, 18}, BGEZ = { 1, at.so, 1}, BGEZAL = { 1, at.so, 17}, BGEZALL = { 1, at.so, 19}, BGEZL = { 1, at.so, 3}, BGTZ = { 7, at.so, 0}, BGTZL = {23, at.so, 0}, BLEZ = { 6, at.so, 0}, BLEZL = {22, at.so, 0}, BLTZ = { 1, at.so, 0}, BLTZAL = { 1, at.so, 16}, BLTZALL = { 1, at.so, 18}, BLTZL = { 1, at.so, 2}, -- NOP = { 0, at.code, 0}, LB = {32, at.bto}, LBU = {36, at.bto}, LD = {55, at.bto}, LDL = {26, at.bto}, LDR = {27, at.bto}, LH = {33, at.bto}, LHU = {37, at.bto}, LL = {48, at.bto}, LLD = {52, at.bto}, LW = {35, at.bto}, LWL = {34, at.bto}, LWR = {38, at.bto}, LWU = {39, at.bto}, SB = {40, at.bto}, SC = {56, at.bto}, SCD = {60, at.bto}, SD = {63, at.bto}, SDL = {44, at.bto}, SDR = {45, at.bto}, SH = {41, at.bto}, SW = {43, at.bto}, SWL = {42, at.bto}, SWR = {46, at.bto}, ADDI = { 8, at.sti}, ADDIU = { 9, at.sti}, ANDI = {12, at.sti}, DADDI = {24, at.sti}, DADDIU = {25, at.sti}, ORI = {13, at.sti}, SLTI = {10, at.sti}, SLTIU = {11, at.sti}, XORI = {14, at.sti}, ADD = { 0, at.std, 32}, ADDU = { 0, at.std, 33}, AND = { 0, at.std, 36}, DADD = { 0, at.std, 44}, DADDU = { 0, at.std, 45}, DSLLV = { 0, at.std, 20}, DSUB = { 0, at.std, 46}, DSUBU = { 0, at.std, 47}, NOR = { 0, at.std, 39}, OR = { 0, at.std, 37}, SLLV = { 0, at.std, 4}, SLT = { 0, at.std, 42}, SLTU = { 0, at.std, 43}, SRAV = { 0, at.std, 7}, SRLV = { 0, at.std, 6}, SUB = { 0, at.std, 34}, SUBU = { 0, at.std, 35}, XOR = { 0, at.std, 38}, DDIV = { 0, at.st, 30}, DDIVU = { 0, at.st, 31}, DIV = { 0, at.st, 26}, DIVU = { 0, at.st, 27}, DMULT = { 0, at.st, 28}, DMULTU = { 0, at.st, 29}, MULT = { 0, at.st, 24}, MULTU = { 0, at.st, 25}, DSLL = { 0, at.tds, 56}, DSLL32 = { 0, at.tds, 60}, DSRA = { 0, at.tds, 59}, DSRA32 = { 0, at.tds, 63}, DSRAV = { 0, at.tds, 23}, DSRL = { 0, at.tds, 58}, DSRL32 = { 0, at.tds, 62}, DSRLV = { 0, at.tds, 22}, SLL = { 0, at.tds, 0}, SRA = { 0, at.tds, 3}, SRL = { 0, at.tds, 2}, } at = nil Lexer = {} function Lexer:setup(asm) self.asm = asm self.pos = 1 self.line = 1 self.EOF = -1 self:nextc() end function Lexer:error(msg) error(string.format('%s:%d: Error: %s', 'file.asm', self.line, msg), 2) end function Lexer:nextc() if self.pos > #self.asm then self.ord = self.EOF self.chr = '' self.chrchr = '' return end if self.chr == '\n' then self.line = self.line + 1 end self.ord = string.byte(self.asm, self.pos) self.pos = self.pos + 1 -- handle newlines; translate CRLF to LF if self.ord == 13 then if self.pos <= #self.asm and string.byte(self.asm, self.pos) == 10 then self.pos = self.pos + 1 end self.ord = 10 end self.chr = string.char(self.ord) if self.pos <= #self.asm then self.ord2 = string.byte(self.asm, self.pos) self.chr2 = string.char(self.ord2) self.chrchr = string.char(self.ord, self.ord2) else self.chrchr = self.chr end end function Lexer:skip_to_EOL() while self.chr ~= '\n' and self.ord ~= self.EOF do self:nextc() end end function Lexer:save_next() self.buff = self.buff..self.chr self:nextc() end function Lexer:read_chars(pattern) while string.find(self.chr, pattern) do self:save_next() end end function Lexer:read_number() self.buff = '' self:nextc() self:read_chars('%d') local num = tonumber(self.buff) if not num then self:error('invalid number') end return num end function Lexer:read_hex() self.buff = '' if self.chr ~= '$' then self:nextc() end self:nextc() self:read_chars('%x') local num = tonumber(self.buff, 16) if not num then self:error('invalid hex number') end return num end function Lexer:read_binary() self.buff = '' self:nextc() self:read_chars('[01]') local num = tonumber(self.buff, 2) if not num then self:error('invalid binary number') end return num end function Lexer:skip_block_comment() self:nextc() self:nextc() while true do if self.ord == self.EOF then self:error('incomplete block comment') elseif self.chrchr == '*/' then self:nextc() self:nextc() break else self:nextc() end end end function Lexer:lex() while true do if self.chr == '\n' then self:nextc() return 'EOL', '\n' elseif self.ord == self.EOF then return 'EOF', self.EOF elseif self.chr == ';' then self:skip_to_EOL() elseif self.chrchr == '//' then self:skip_to_EOL() elseif self.chrchr == '/*' then self:skip_block_comment() elseif self.chr:find('%s') then self:nextc() elseif self.chr == '$' then return 'NUM', self:read_hex() elseif self.chr == '%' then return 'NUM', self:read_binary() elseif self.chr:find('%d') then -- TODO: check if cajaasm accepts 0X0 if self.chr2 == 'x' or self.chr2 == 'X' then return 'NUM', self:read_hex() end return 'NUM', self:read_number() elseif self.chr == ',' then self:nextc() return 'SEP', ',' elseif self.chr == '[' then self.buff = '' self:nextc() self:read_chars('[%w_]') if self.chr ~= ']' then self:error('invalid define name') end self:nextc() if self.chr ~= ':' then self:error('define requires a colon') end self:nextc() return 'DEF', self.buff elseif self.chr == '(' then self.buff = '' self:nextc() self:read_chars('[%w_]') if self.chr ~= ')' then self:error('invalid register name') end self:nextc() local up = self.buff:upper() if not all_registers[up] then self:error('not a register') end return 'DEREF', up elseif self.chr == '.' then self.buff = '' self:read_chars('[%w]') local up = self.buff:upper() if not all_directives[up] then self:error('not a directive') end if up == 'INC' or up == 'INCASM' or up == 'INCLUDE' then return 'DIR', 'UP' end return 'DIR', up elseif self.chr == '@' then self.buff = '' self:nextc() self:read_chars('[%w_]') return 'DEFSYM', self.buff elseif self.chr:find('[%a_]') then self.buff = '' -- now that we know we're looking at an identifier, -- we can start matching numbers and dots too. self:read_chars('[%w_.]') if self.chr == ':' then if self.buff:find('%.') then self:error('labels cannot contain dots') end self:nextc() return 'LABEL', self.buff end local up = self.buff:upper() if up == 'HEX' then return 'DIR', up elseif all_registers[up] then return 'REG', up elseif all_instructions[up] then -- note: this allows instructions like "C_EQ.F" / "C.EQ_F" return 'INSTR', up:gsub('%.', '_') else if self.buff:find('%.') then self:error('labels cannot contain dots') end return 'LABELSYM', self.buff end elseif self.chr == ']' then self:error('unmatched closing bracket') elseif self.chr == ')' then self:error('unmatched closing parenthesis') else self:error('unknown character or control character') end end end Parser = {} function Parser:error(msg) error(string.format('%s:%d: Error: %s', 'file.asm', self.line, msg), 2) end function Parser:advance() self.tt, self.tok = Lexer:lex() self.line = Lexer.line return self.tt, self.tok end function Parser:is_EOL() return self.tt == 'EOL' or self.tt == 'EOF' end function Parser:expect_EOL() if self:is_EOL() then self:advance() return end self:error('expected end of line') end function Parser:optional_comma() if self.tt == 'SEP' and self.tok == ',' then self:advance() return true end end function Parser:number() if self.tt ~= 'NUM' then self:error('expected number') end local value = self.tok self:advance() return value end function Parser:directive() local name = self.tok self:advance() if name == 'ORG' then Dumper:add_directive(name, self:number()) elseif name == 'ALIGN' or name == 'SKIP' then local size = self:number() if self:optional_comma() then Dumper:add_directive(name, size, self:number()) else Dumper:add_directive(name, size) end self:expect_EOL() elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then Dumper:add_directive(name, self:number()) while not self:is_EOL() do self:advance() self:optional_comma() Dumper:add_directive(name, self:number()) end self:expect_EOL() elseif name == 'HEX' then self:error('unimplemented') elseif name == 'INC' or name == 'INCBIN' then self:error('unimplemented') elseif name == 'FLOAT' or name == 'ASCII' or name == 'ASCIIZ' then self:error('unimplemented') else self:error('unknown directive') end end function Parser:register(t) t = t or registers if self.tt ~= 'REG' then if self.tt == 'NUM' and self.tok == '0' then -- i don't think cajeasm actually does this self.tt = 'REG' self.tok = 'R0' else self:error('expected register') end end local reg = self.tok if not t[reg] then self:error('wrong type of register') end self:advance() return reg end function Parser:deref() if self.tt ~= 'DEREF' then self:error('expected register to dereference') end local reg = self.tok self:advance() return reg end function Parser:const() if self.tt ~= 'NUM' and self.tt ~= 'DEFSYM' and self.tt ~= 'LABELSYM' then self:error('expected constant') end local t = {self.tt, self.tok} self:advance() return t end function Parser:instruction() local name = self.tok self:advance() local h = instruction_handlers[name] if h == nil then self:error('undefined instruction') elseif h[2] == argtypes.bto then -- OP rt, offset(base) local rt = self:register() self:optional_comma() local offset = {'LOWER', self:const()} local base = self:deref() Dumper:add_instruction_5_5_16(h[1], base, rt, offset) elseif h[2] == argtypes.bfo then -- OP ft, offset(base) local ft = self:register(fpu_registers) self:optional_comma() local offset = {'LOWER', self:const()} local base = self:deref() Dumper:add_instruction_5_5_16(h[1], base, ft, offset) elseif h[2] == argtypes.sti then -- OP rt, rs, immediate local rs = self:register() self:optional_comma() local rt = self:register() self:optional_comma() local immediate = {'LOWER', self:const()} Dumper:add_instruction_5_5_16(h[1], rs, rt, immediate) elseif h[2] == argtypes.std then -- OP rd, rs, rt local rd = self:register() self:optional_comma() local rs = self:register() self:optional_comma() local rt = self:register() local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_5_11(h[1], rs, rt, rd, const) elseif h[2] == argtypes.st then -- OP rs, rt local rs = self:register() self:optional_comma() local rt = self:register() local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_16(h[1], rs, rt, const) elseif h[2] == argtypes.tds then local rd = self:register() self:optional_comma() local rt = self:register() self:optional_comma() local rs if name == 'DSRAV' or name == 'DSRLV' then -- OP rd, rt, rs rs = self:register() else -- OP rd, rt, sa rs = self:const() end local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_5_5_6(h[1], 0, rt, rd, rs, const) elseif h[2] == argtypes.s then -- OP rs local rs = self:register() local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_16(h[1], rs, 0, const) elseif h[2] == argtypes.sto then -- OP rs, rt, offset local rs = self:register() self:optional_comma() local rt = self:register() self:optional_comma() local offset = self:const() Dumper:add_instruction_5_5_16(h[1], rs, rt, offset) elseif h[2] == argtypes.stc then -- OP TEQ rs, rt local rs = self:register() self:optional_comma() local rt = self:register() local const = h[3] or self:error('internal error: expected const') -- FIXME: there's supposed to be 'code' before const -- but i dunno what it's supposed to be -- so i'm leaving it as zero here Dumper:add_instruction_5_5_16(h[1], rs, rt, const) elseif h[2] == argtypes.so then -- OP rs, offset local rs = self:register() self:optional_comma() local offset = self:const() local const = h[3] or self:error('internal error: expected const') -- FIXME: branches are relative Dumper:add_instruction_5_5_16(h[1], rs, const, offset) elseif h[2] == argtypes.sync then -- OP local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_26(h[1], const) elseif h[2] == argtypes.indx then -- OP target local target = {'INDEX', self:const()} Dumper:add_instruction_26(h[1], target) elseif h[2] == argtypes.lui then -- OP rt, immediate local rt = self:register() self:optional_comma() local immediate = {'UPPER', self:const()} Dumper:add_instruction_5_5_16(h[1], 0, rt, immediate) elseif h[2] == argtypes.mf then -- OP rd local rd = self:register() local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_5_5_6(h[1], 0, 0, rd, 0, const) elseif h[2] == argtypes.jalr then -- OP rs, rd local rs = self:register() self:optional_comma() local rd = self:register() local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_5_5_6(h[1], rs, 0, rd, 0, const) local rd = self:register() elseif h[2] == argtypes.code then -- OP local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_26(h[1], const) elseif h[2] == argtypes.movf then local rt = self:register() self:optional_comma() local rd = nil if name == 'MFC0' or name == 'MTC0' then -- OP rt, rd rd = self:register() else -- OP rt, fs rd = self:register(fpu_registers) end local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_5_5_6(h[1], const, rt, rd, 0, 0) elseif h[2] == argtypes.tsdf then -- OP fd, fs, ft local fd = self:register(fpu_registers) self:optional_comma() local fs = self:register(fpu_registers) self:optional_comma() local ft = self:register(fpu_registers) local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_5_5_6(h[1], 16, fd, fs, ft, const) elseif h[2] == argtypes.tsdd then local fd = self:register(fpu_registers) self:optional_comma() local fs = self:register(fpu_registers) self:optional_comma() local ft = self:register(fpu_registers) local const = h[3] or self:error('internal error: expected const') Dumper:add_instruction_5_5_5_5_6(h[1], 17, fd, fs, ft, const) else self:error('TODO') end self:expect_EOL() end function Parser:parse(asm) self.asm = asm Lexer:setup(asm) Dumper:setup() self:advance() while self.tt ~= 'EOF' do if self.tt == 'EOL' then -- empty line self:advance() elseif self.tt == 'DEF' then local name = self.tok self:advance() Dumper:add_define(name, self:number()) elseif self.tt == 'DIR' then self:directive() elseif self.tt == 'LABEL' then Dumper:add_label(self.tok) self:advance() elseif self.tt == 'INSTR' then self:instruction() else self:error('unexpected token (unknown instruction?)') end end return Dumper:dump() end Dumper = {} function Dumper:setup() self.defines = {} self.labels = {} self.lines = {} end function Dumper:error(msg) error(string.format('Internal Error: %s', msg), 2) end function Dumper:push(t) --print(t.data) table.insert(self.lines, t) end function Dumper:add_instruction_26(i, a) local t = {} t.sizes = {26} t.data = {i, a} self:push(t) end function Dumper:add_instruction_5_5_16(i, a, b, c) local t = {} t.sizes = {5, 5, 16} t.data = {i, a, b, c} self:push(t) end function Dumper:add_instruction_5_5_5_11(i, a, b, c, d) local t = {} t.sizes = {5, 5, 5, 11} t.data = {i, a, b, c, d} self:push(t) end function Dumper:add_instruction_5_5_5_5_6(i, a, b, c, d, e) local t = {} t.sizes = {5, 5, 5, 5, 6} t.data = {i, a, b, c, d, e} self:push(t) end function Dumper:add_define(name, number) self.defines[name] = number end function Dumper:add_label(name) self.labels[name] = #self.lines + 1 end function Dumper:add_directive(...) self:error('unimplemented directive') end function Dumper:print(uw, lw) print(('%04X%04X'):format(uw, lw)) end function Dumper:desym(tok) if type(tok[2]) == 'number' then return tok[2] elseif all_registers[tok] then return registers[tok] or fpu_registers[tok] elseif tok[1] == 'LABELSYM' then print('(label)', tok[2]) return self.labels[tok[2]]*4 elseif tok[1] == 'DEFSYM' then print('(define)') local val = self.defines[tok[2]] if val == nil then self:error('unknown define') end return val end print(tok) self:error('failed to desym') end function Dumper:toval(tok) if tok == nil then self:error('nil value') elseif type(tok) == 'number' then return tok elseif all_registers[tok] then return registers[tok] or fpu_registers[tok] end if type(tok) == 'table' then if #tok ~= 2 then print('toval', tok) self:error('invalid token') end if tok[1] == 'UPPER' then local val = self:desym(tok[2]) while val >= 0x10000 do val = val/2 end return val elseif tok[1] == 'LOWER' then local val = self:desym(tok[2]) % 0x10000 return val elseif tok[1] == 'INDEX' then local val if type(tok[2]) == 'table' and tok[2][1] == 'LABELSYM' then -- don't multiply by 4 twice val = self:desym(tok[2]) else val = self:desym(tok[2])*4 end print('(index)', val) return val else return self:desym(tok) end end print('toval', tok) self:error('invalid value') end function Dumper:validate(n, bits) local max = 2^bits if n == nil then self:error('value is nil') end if n > max or n < 0 then print(("n %08X"):format(math.abs(n))) self:error('value out of range') end end function Dumper:dump() for i, t in ipairs(self.lines) do local uw = 0 local lw = 0 local val = nil local i = t.data[1] uw = uw + i*0x400 if #t.sizes == 1 then if t.sizes[1] == 26 then val = self:toval(t.data[2]) self:validate(val, 26) uw = uw + math.floor(val/0x10000) lw = lw + val % 0x10000 else self:error('bad 1-size') end elseif #t.sizes == 3 then if t.sizes[1] == 5 and t.sizes[2] == 5 and t.sizes[3] == 16 then val = self:toval(t.data[2]) self:validate(val, 5) uw = uw + val*0x20 val = self:toval(t.data[3]) self:validate(val, 5) uw = uw + val val = self:toval(t.data[4]) self:validate(val, 16) lw = lw + val else self:error('bad 3-size') end elseif #t.sizes == 4 then if t.sizes[1] == 5 and t.sizes[2] == 5 and t.sizes[3] == 5 and t.sizes[4] == 11 then val = self:toval(t.data[2]) self:validate(val, 5) uw = uw + val*0x20 val = self:toval(t.data[3]) self:validate(val, 5) uw = uw + val val = self:toval(t.data[4]) self:validate(val, 5) lw = lw + val*0x800 val = self:toval(t.data[5]) self:validate(val, 11) lw = lw + val else self:error('bad 4-size') end elseif #t.sizes == 5 then if t.sizes[1] == 5 and t.sizes[2] == 5 and t.sizes[3] == 5 and t.sizes[4] == 5 and t.sizes[5] == 6 then val = self:toval(t.data[2]) self:validate(val, 5) uw = uw + val*0x20 val = self:toval(t.data[3]) self:validate(val, 5) uw = uw + val val = self:toval(t.data[4]) self:validate(val, 5) lw = lw + val*0x800 val = self:toval(t.data[5]) self:validate(val, 5) lw = lw + val*0x40 val = self:toval(t.data[6]) self:validate(val, 6) lw = lw + val else self:error('bad 5-size') end else self:error('unknown n-size') end self:print(uw, lw) end end function main() local asm = '' local f = io.open('Moonjump 2.asm', 'r') if not f then f = io.open('inject/Moonjump 2.asm', 'r') if not f then error('could not load assembly', 1) return end end asm = f:read('*a') f:close() Parser:parse(asm) end local ok, msg = pcall(main) if not ok then print(msg) end