-- i've lost control of my life -- instructions: https://github.com/mikeryan/n64dev/tree/master/docs/n64ops -- CajeASM style assembly; refer to the manual included with CajeASM. -- lexer and parser are somewhat based on http://chunkbake.luaforge.net/ local assembler = { _VERSION = 'assembler v5', _DESCRIPTION = 'Assembles MIPS assembly files for the R4300i CPU.', _URL = 'https://github.com/notwa/mm/blob/master/Lua/inject/assembler.lua', _LICENSE = [[ Copyright (C) 2015 Connor Olding This program is licensed under the terms of the MIT License, and is distributed without any warranty. You should have received a copy of the license along with this program; see the file LICENSE. ]], } local Class = function(inherit) local class = {} local mt_obj = {__index = class} local mt_class = { __call = function(self, ...) local obj = setmetatable({}, mt_obj) obj:init(...) return obj end, __index = inherit, } return setmetatable(class, mt_class) end -- TODO: maybe support reg# style too local registers = { [0]= 'R0', 'AT', 'V0', 'V1', 'A0', 'A1', 'A2', 'A3', 'T0', 'T1', 'T2', 'T3', 'T4', 'T5', 'T6', 'T7', 'S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'T8', 'T9', 'K0', 'K1', 'GP', 'SP', 'S8', 'RA', } local fpu_registers = { [0]= 'F0', 'F1', 'F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31', } local all_directives = { 'ALIGN', 'SKIP', 'ASCII', 'ASCIIZ', 'BYTE', 'HALFWORD', 'WORD', 'FLOAT', --'HEX', -- excluded here due to different syntax 'INC', 'INCASM', 'INCLUDE', 'INCBIN', 'ORG', } local all_registers = {} for k, v in pairs(registers) do all_registers[k] = v end for k, v in pairs(fpu_registers) do all_registers[k + 32] = v end -- set up reverse table lookups local function revtable(t) for k, v in pairs(t) do t[v] = k end end revtable(registers) revtable(fpu_registers) revtable(all_registers) local fmt_single = 16 local fmt_double = 17 local fmt_word = 20 local fmt_long = 21 local instruction_handlers = { J = {2, 'I', 'I'}, JAL = {3, 'I', 'I'}, JALR = {0, 'sd', 's0d0C', 9}, MTHI = {0, 's', 's000C', 17}, MTLO = {0, 's', 's000C', 19}, JR = {0, 's', 's000C', 8}, BREAK = {0, '', '0000C', 13}, SYSCALL = {0, '', '0000C', 12}, SYNC = {0, '', '0000C', 15}, LB = {32, 'tob', 'bto'}, LBU = {36, 'tob', 'bto'}, LD = {55, 'tob', 'bto'}, LDL = {26, 'tob', 'bto'}, LDR = {27, 'tob', 'bto'}, LH = {33, 'tob', 'bto'}, LHU = {37, 'tob', 'bto'}, LL = {48, 'tob', 'bto'}, LLD = {52, 'tob', 'bto'}, LW = {35, 'tob', 'bto'}, LWL = {34, 'tob', 'bto'}, LWR = {38, 'tob', 'bto'}, LWU = {39, 'tob', 'bto'}, SB = {40, 'tob', 'bto'}, SC = {56, 'tob', 'bto'}, SCD = {60, 'tob', 'bto'}, SD = {63, 'tob', 'bto'}, SDL = {44, 'tob', 'bto'}, SDR = {45, 'tob', 'bto'}, SH = {41, 'tob', 'bto'}, SW = {43, 'tob', 'bto'}, SWL = {42, 'tob', 'bto'}, SWR = {46, 'tob', 'bto'}, LUI = {15, 'tJ', '0ti'}, MFHI = {0, 'd', '00d0C', 16}, MFLO = {0, 'd', '00d0C', 18}, ADDI = { 8, 'tsj', 'sti'}, ADDIU = { 9, 'tsj', 'sti'}, ANDI = {12, 'tsj', 'sti'}, DADDI = {24, 'tsj', 'sti'}, DADDIU = {25, 'tsj', 'sti'}, ORI = {13, 'tsj', 'sti'}, SLTI = {10, 'tsj', 'sti'}, SLTIU = {11, 'tsj', 'sti'}, XORI = {14, 'tsj', 'sti'}, -- first 6 bits of instruction -- | input format -- | | output format -- | | | const (only if used in output) -- | | | | format-const (only if used in output) -- | |____ |______ |_ |_ ADD = {0, 'dst', 'std0C', 32}, ADDU = {0, 'dst', 'std0C', 33}, AND = {0, 'dst', 'std0C', 36}, DADD = {0, 'dst', 'std0C', 44}, DADDU = {0, 'dst', 'std0C', 45}, DSLLV = {0, 'dst', 'std0C', 20}, DSUB = {0, 'dst', 'std0C', 46}, DSUBU = {0, 'dst', 'std0C', 47}, NOR = {0, 'dst', 'std0C', 39}, OR = {0, 'dst', 'std0C', 37}, SLLV = {0, 'dst', 'std0C', 4}, SLT = {0, 'dst', 'std0C', 42}, SLTU = {0, 'dst', 'std0C', 43}, SRAV = {0, 'dst', 'std0C', 7}, SRLV = {0, 'dst', 'std0C', 6}, SUB = {0, 'dst', 'std0C', 34}, SUBU = {0, 'dst', 'std0C', 35}, XOR = {0, 'dst', 'std0C', 38}, DDIV = {0, 'st', 'st00C', 30}, DDIVU = {0, 'st', 'st00C', 31}, DIV = {0, 'st', 'st00C', 26}, DIVU = {0, 'st', 'st00C', 27}, DMULT = {0, 'st', 'st00C', 28}, DMULTU = {0, 'st', 'st00C', 29}, MULT = {0, 'st', 'st00C', 24}, MULTU = {0, 'st', 'st00C', 25}, DSLL = {0, 'dti', '0tdiC', 56}, DSLL32 = {0, 'dti', '0tdiC', 60}, DSRA = {0, 'dti', '0tdiC', 59}, DSRA32 = {0, 'dti', '0tdiC', 63}, DSRAV = {0, 'dts', '0tdsC', 23}, DSRL = {0, 'dti', '0tdiC', 58}, DSRL32 = {0, 'dti', '0tdiC', 62}, DSRLV = {0, 'dts', '0tdsC', 22}, SLL = {0, 'dti', '0tdiC', 0}, SRA = {0, 'dti', '0tdiC', 3}, SRL = {0, 'dti', '0tdiC', 2}, BEQ = { 4, 'str', 'sto'}, BEQL = {20, 'str', 'sto'}, BNE = { 5, 'str', 'sto'}, BNEL = {21, 'str', 'sto'}, BGEZ = { 1, 'sr', 'sCo', 1}, BGEZAL = { 1, 'sr', 'sCo', 17}, BGEZALL = { 1, 'sr', 'sCo', 19}, BGEZL = { 1, 'sr', 'sCo', 3}, BGTZ = { 7, 'sr', 'sCo', 0}, BGTZL = {23, 'sr', 'sCo', 0}, BLEZ = { 6, 'sr', 'sCo', 0}, BLEZL = {22, 'sr', 'sCo', 0}, BLTZ = { 1, 'sr', 'sCo', 0}, BLTZAL = { 1, 'sr', 'sCo', 16}, BLTZALL = { 1, 'sr', 'sCo', 18}, BLTZL = { 1, 'sr', 'sCo', 2}, TEQ = {0, 'st', 'st00C', 52}, TGE = {0, 'st', 'st00C', 48}, TGEU = {0, 'st', 'st00C', 49}, TLT = {0, 'st', 'st00C', 50}, TLTU = {0, 'st', 'st00C', 51}, TNE = {0, 'st', 'st00C', 54}, ADD_D = {17, 'DST', 'FTSDC', 0, fmt_double}, ADD_S = {17, 'DST', 'FTSDC', 0, fmt_single}, DIV_D = {17, 'DST', 'FTSDC', 3, fmt_double}, DIV_S = {17, 'DST', 'FTSDC', 3, fmt_single}, MUL_D = {17, 'DST', 'FTSDC', 2, fmt_double}, MUL_S = {17, 'DST', 'FTSDC', 2, fmt_single}, SUB_D = {17, 'DST', 'FTSDC', 1, fmt_double}, SUB_S = {17, 'DST', 'FTSDC', 1, fmt_single}, CFC1 = {17, 'tS', 'CtS00', 2}, CTC1 = {17, 'tS', 'CtS00', 6}, DMFC1 = {17, 'tS', 'CtS00', 1}, DMTC1 = {17, 'tS', 'CtS00', 5}, MFC0 = {16, 'tS', 'CtS00', 0}, MFC1 = {16, 'tS', 'CtS00', 0}, MTC0 = {17, 'tS', 'CtS00', 4}, MTC1 = {17, 'tS', 'CtS00', 4}, LDC1 = {53, 'Tob', 'bTo'}, LWC1 = {49, 'Tob', 'bTo'}, SDC1 = {61, 'Tob', 'bTo'}, SWC1 = {57, 'Tob', 'bTo'}, C_EQ_D = {17, 'ST', 'FTS0C', 50, fmt_double}, C_EQ_S = {17, 'ST', 'FTS0C', 50, fmt_single}, C_F_D = {17, 'ST', 'FTS0C', 48, fmt_double}, C_F_S = {17, 'ST', 'FTS0C', 48, fmt_single}, C_LE_D = {17, 'ST', 'FTS0C', 62, fmt_double}, C_LE_S = {17, 'ST', 'FTS0C', 62, fmt_single}, C_LT_D = {17, 'ST', 'FTS0C', 60, fmt_double}, C_LT_S = {17, 'ST', 'FTS0C', 60, fmt_single}, C_NGE_D = {17, 'ST', 'FTS0C', 61, fmt_double}, C_NGE_S = {17, 'ST', 'FTS0C', 61, fmt_single}, C_NGL_D = {17, 'ST', 'FTS0C', 59, fmt_double}, C_NGL_S = {17, 'ST', 'FTS0C', 59, fmt_single}, C_NGLE_D= {17, 'ST', 'FTS0C', 57, fmt_double}, C_NGLE_S= {17, 'ST', 'FTS0C', 57, fmt_single}, C_NGT_D = {17, 'ST', 'FTS0C', 63, fmt_double}, C_NGT_S = {17, 'ST', 'FTS0C', 63, fmt_single}, C_OLE_D = {17, 'ST', 'FTS0C', 54, fmt_double}, C_OLE_S = {17, 'ST', 'FTS0C', 54, fmt_single}, C_OLT_D = {17, 'ST', 'FTS0C', 52, fmt_double}, C_OLT_S = {17, 'ST', 'FTS0C', 52, fmt_single}, C_SEQ_D = {17, 'ST', 'FTS0C', 58, fmt_double}, C_SEQ_S = {17, 'ST', 'FTS0C', 58, fmt_single}, C_SF_D = {17, 'ST', 'FTS0C', 56, fmt_double}, C_SF_S = {17, 'ST', 'FTS0C', 56, fmt_single}, C_UEQ_D = {17, 'ST', 'FTS0C', 51, fmt_double}, C_UEQ_S = {17, 'ST', 'FTS0C', 51, fmt_single}, C_ULE_D = {17, 'ST', 'FTS0C', 55, fmt_double}, C_ULE_S = {17, 'ST', 'FTS0C', 55, fmt_single}, C_ULT_D = {17, 'ST', 'FTS0C', 53, fmt_double}, C_ULT_S = {17, 'ST', 'FTS0C', 53, fmt_single}, C_UN_D = {17, 'ST', 'FTS0C', 49, fmt_double}, C_UN_S = {17, 'ST', 'FTS0C', 49, fmt_single}, CVT_D_L = {17, 'DS', 'F0SDC', 33, fmt_long}, CVT_D_S = {17, 'DS', 'F0SDC', 33, fmt_single}, CVT_D_W = {17, 'DS', 'F0SDC', 33, fmt_word}, CVT_L_D = {17, 'DS', 'F0SDC', 37, fmt_double}, CVT_L_S = {17, 'DS', 'F0SDC', 37, fmt_single}, CVT_S_D = {17, 'DS', 'F0SDC', 32, fmt_double}, CVT_S_L = {17, 'DS', 'F0SDC', 32, fmt_long}, CVT_S_W = {17, 'DS', 'F0SDC', 32, fmt_word}, CVT_W_D = {17, 'DS', 'F0SDC', 36, fmt_double}, CVT_W_S = {17, 'DS', 'F0SDC', 36, fmt_single}, ABS_D = {17, 'DS', 'F0SDC', 5, fmt_double}, ABS_S = {17, 'DS', 'F0SDC', 5, fmt_single}, CEIL_L_D= {17, 'DS', 'F0SDC', 10, fmt_double}, CEIL_L_S= {17, 'DS', 'F0SDC', 10, fmt_single}, CEIL_W_D= {17, 'DS', 'F0SDC', 14, fmt_double}, CEIL_W_S= {17, 'DS', 'F0SDC', 14, fmt_single}, FLOOR_L_D={17, 'DS', 'F0SDC', 11, fmt_double}, FLOOR_L_S={17, 'DS', 'F0SDC', 11, fmt_single}, FLOOR_W_D={17, 'DS', 'F0SDC', 15, fmt_double}, FLOOR_W_S={17, 'DS', 'F0SDC', 15, fmt_single}, MOV_D = {17, 'DS', 'F0SDC', 6, fmt_double}, MOV_S = {17, 'DS', 'F0SDC', 6, fmt_single}, NEG_D = {17, 'DS', 'F0SDC', 7, fmt_double}, NEG_S = {17, 'DS', 'F0SDC', 7, fmt_single}, ROUND_L_D={17, 'DS', 'F0SDC', 8, fmt_double}, ROUND_L_S={17, 'DS', 'F0SDC', 8, fmt_single}, ROUND_W_D={17, 'DS', 'F0SDC', 12, fmt_double}, ROUND_W_S={17, 'DS', 'F0SDC', 12, fmt_single}, SQRT_D = {17, 'DS', 'F0SDC', 4, fmt_double}, SQRT_S = {17, 'DS', 'F0SDC', 4, fmt_single}, TRUNC_L_S={17, 'DS', 'F0SDC', 9, fmt_single}, TRUNC_W_D={17, 'DS', 'F0SDC', 13, fmt_double}, TEQI = {1, 'sj', 'sCi', 12}, TGEI = {1, 'sj', 'sCi', 8}, TGEIU = {1, 'sj', 'sCi', 9}, TLTI = {1, 'sj', 'sCi', 10}, TLTIU = {1, 'sj', 'sCi', 11}, TNEI = {1, 'sj', 'sCi', 14}, CACHE = {}, ERET = {}, TLBP = {}, TLBR = {}, TLBWI = {}, TLBWR = {}, BC1F = {}, BC1FL = {}, BC1T = {}, BC1TL = {}, -- are these undocumented? LDC2 = {}, SDC2 = {}, -- pseudo-instructions B = {4, 'r', '00o'}, BAL = {1, 'r', '0Co', 17}, CL = {0, 'd', '00d0C', 32}, MOV = {0, 'dt', '0td0C', 32}, NOP = {0, '', '0'}, SUBI = {8, 'tsk', 'sti'}, SUBIU = {9, 'tsk', 'sti'}, -- ...that expand to multiple instructions LI = {}, -- only one instruction for values < 0x10000 BEQI = {}, BNEI = {}, BGE = {}, BGEI = {}, BLE = {}, BLEI = {}, BLT = {}, BLTI = {}, BGT = {}, BGTI = {}, } local all_instructions = {} local i = 1 for k, v in pairs(instruction_handlers) do all_instructions[k:gsub('_', '.')] = i i = i + 1 end revtable(all_instructions) local Lexer = Class() function Lexer:init(asm, fn) self.asm = asm self.fn = fn or '(string)' self.pos = 1 self.line = 1 self.EOF = -1 self:nextc() end local Dumper = Class() function Dumper:init(writer, fn) self.writer = writer self.fn = fn or '(string)' self.defines = {} self.labels = {} self.lines = {} end local Parser = Class() function Parser:init(writer, fn) self.fn = fn or '(string)' self.dumper = Dumper(writer, fn) end function Lexer:error(msg) error(string.format('%s:%d: Error: %s', self.fn, self.line, msg), 2) end function Lexer:nextc() if self.pos > #self.asm then self.ord = self.EOF self.chr = '' self.chrchr = '' return end if self.chr == '\n' then self.line = self.line + 1 end self.ord = string.byte(self.asm, self.pos) self.pos = self.pos + 1 -- handle newlines; translate CRLF to LF if self.ord == 13 then if self.pos <= #self.asm and string.byte(self.asm, self.pos) == 10 then self.pos = self.pos + 1 end self.ord = 10 end self.chr = string.char(self.ord) if self.pos <= #self.asm then self.ord2 = string.byte(self.asm, self.pos) self.chr2 = string.char(self.ord2) self.chrchr = string.char(self.ord, self.ord2) else self.chrchr = self.chr end end function Lexer:skip_to_EOL() while self.chr ~= '\n' and self.ord ~= self.EOF do self:nextc() end end function Lexer:save_next() self.buff = self.buff..self.chr self:nextc() end function Lexer:read_chars(pattern) while string.find(self.chr, pattern) do self:save_next() end end function Lexer:read_number() self.buff = '' self:nextc() self:read_chars('%d') local num = tonumber(self.buff) if not num then self:error('invalid number') end return num end function Lexer:read_hex() self.buff = '' if self.chr ~= '$' then self:nextc() end self:nextc() self:read_chars('%x') local num = tonumber(self.buff, 16) if not num then self:error('invalid hex number') end return num end function Lexer:read_binary() self.buff = '' self:nextc() self:read_chars('[01]') local num = tonumber(self.buff, 2) if not num then self:error('invalid binary number') end return num end function Lexer:skip_block_comment() self:nextc() self:nextc() while true do if self.ord == self.EOF then self:error('incomplete block comment') elseif self.chrchr == '*/' then self:nextc() self:nextc() break else self:nextc() end end end function Lexer:lex() while true do if self.chr == '\n' then self:nextc() return 'EOL', '\n' elseif self.ord == self.EOF then return 'EOF', self.EOF elseif self.chr == ';' then self:skip_to_EOL() elseif self.chrchr == '//' then self:skip_to_EOL() elseif self.chrchr == '/*' then self:skip_block_comment() elseif self.chr:find('%s') then self:nextc() elseif self.chr == '$' then return 'NUM', self:read_hex() elseif self.chr == '%' then return 'NUM', self:read_binary() elseif self.chr:find('%d') then -- TODO: check if cajaasm accepts 0X0 if self.chr2 == 'x' or self.chr2 == 'X' then return 'NUM', self:read_hex() end return 'NUM', self:read_number() elseif self.chr == ',' then self:nextc() return 'SEP', ',' elseif self.chr == '[' then self.buff = '' self:nextc() self:read_chars('[%w_]') if self.chr ~= ']' then self:error('invalid define name') end self:nextc() if self.chr ~= ':' then self:error('define requires a colon') end self:nextc() return 'DEF', self.buff elseif self.chr == '(' then self.buff = '' self:nextc() self:read_chars('[%w_]') if self.chr ~= ')' then self:error('invalid register name') end self:nextc() local up = self.buff:upper() if not all_registers[up] then self:error('not a register') end return 'DEREF', up elseif self.chr == '.' then self.buff = '' self:read_chars('[%w]') local up = self.buff:upper() if not all_directives[up] then self:error('not a directive') end if up == 'INC' or up == 'INCASM' or up == 'INCLUDE' then return 'DIR', 'UP' end return 'DIR', up elseif self.chr == '@' then self.buff = '' self:nextc() self:read_chars('[%w_]') return 'DEFSYM', self.buff elseif self.chr:find('[%a_]') then self.buff = '' -- now that we know we're looking at an identifier, -- we can start matching numbers and dots too. self:read_chars('[%w_.]') if self.chr == ':' then if self.buff:find('%.') then self:error('labels cannot contain dots') end self:nextc() return 'LABEL', self.buff end local up = self.buff:upper() if up == 'HEX' then return 'DIR', up elseif all_registers[up] then return 'REG', up elseif all_instructions[up] then return 'INSTR', up:gsub('%.', '_') else if self.buff:find('%.') then self:error('labels cannot contain dots') end return 'LABELSYM', self.buff end elseif self.chr == ']' then self:error('unmatched closing bracket') elseif self.chr == ')' then self:error('unmatched closing parenthesis') else self:error('unknown character or control character') end end end function Parser:error(msg) error(string.format('%s:%d: Error: %s', self.fn, self.line, msg), 2) end function Parser:advance() self.tt, self.tok = self.lexer:lex() self.line = self.lexer.line return self.tt, self.tok end function Parser:is_EOL() return self.tt == 'EOL' or self.tt == 'EOF' end function Parser:expect_EOL() if self:is_EOL() then self:advance() return end self:error('expected end of line') end function Parser:optional_comma() if self.tt == 'SEP' and self.tok == ',' then self:advance() return true end end function Parser:number() if self.tt ~= 'NUM' then self:error('expected number') end local value = self.tok self:advance() return value end function Parser:directive() local name = self.tok self:advance() if name == 'ORG' then self.dumper:add_directive(name, self:number()) elseif name == 'ALIGN' or name == 'SKIP' then local size = self:number() if self:optional_comma() then self.dumper:add_directive(name, size, self:number()) else self.dumper:add_directive(name, size) end self:expect_EOL() elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then self.dumper:add_directive(name, self:number()) while not self:is_EOL() do self:advance() self:optional_comma() self.dumper:add_directive(name, self:number()) end self:expect_EOL() elseif name == 'HEX' then self:error('unimplemented') elseif name == 'INC' or name == 'INCBIN' then self:error('unimplemented') elseif name == 'FLOAT' or name == 'ASCII' or name == 'ASCIIZ' then self:error('unimplemented') else self:error('unknown directive') end end function Parser:register(t) t = t or registers if self.tt ~= 'REG' then if self.tt == 'NUM' and self.tok == '0' then -- i don't think cajeasm actually does this self.tt = 'REG' self.tok = 'R0' else self:error('expected register') end end local reg = self.tok if not t[reg] then self:error('wrong type of register') end self:advance() return reg end function Parser:deref() if self.tt ~= 'DEREF' then self:error('expected register to dereference') end local reg = self.tok self:advance() return reg end function Parser:const(relative) if self.tt ~= 'NUM' and self.tt ~= 'DEFSYM' and self.tt ~= 'LABELSYM' then self:error('expected constant') end if relative and self.tt == 'LABELSYM' then self.tt = 'LABELREL' end local t = {self.tt, self.tok} self:advance() return t end function Parser:format_in(informat) args = {} for i=1,#informat do local c = informat:sub(i, i) local c2 = informat:sub(i + 1, i + 1) if c == 'd' and not args.rd then args.rd = self:register() elseif c == 's' and not args.rs then args.rs = self:register() elseif c == 't' and not args.rt then args.rt = self:register() elseif c == 'D' and not args.fd then args.fd = self:register(fpu_registers) elseif c == 'S' and not args.fs then args.fs = self:register(fpu_registers) elseif c == 'T' and not args.ft then args.ft = self:register(fpu_registers) elseif c == 'o' and not args.offset then args.offset = {'LOWER', self:const()} elseif c == 'r' and not args.offset then args.offset = {'LOWER', self:const('relative')} elseif c == 'i' and not args.immediate then args.immediate = self:const() elseif c == 'I' and not args.index then args.index = {'INDEX', self:const()} elseif c == 'j' and not args.immediate then args.immediate = {'LOWER', self:const()} elseif c == 'J' and not args.immediate then args.immediate = {'UPPER', self:const()} elseif c == 'k' and not args.immediate then args.immediate = {'NEGATE', self:const()} elseif c == 'b' and not args.base then args.base = self:deref() else error('Internal Error: invalid input formatting string', 1) end if c2:find('[dstDSToiIjJr]') then self:optional_comma() end end return args end function Parser:format_out(outformat, first, args, const, formatconst) local lookup = { [1]=self.dumper.add_instruction_26, [3]=self.dumper.add_instruction_5_5_16, [4]=self.dumper.add_instruction_5_5_5_11, [5]=self.dumper.add_instruction_5_5_5_5_6, } out = {} for i=1,#outformat do local c = outformat:sub(i, i) if c == 'd' then out[#out+1] = args.rd elseif c == 's' then out[#out+1] = args.rs elseif c == 't' then out[#out+1] = args.rt elseif c == 'D' then out[#out+1] = args.fd elseif c == 'S' then out[#out+1] = args.fs elseif c == 'T' then out[#out+1] = args.ft elseif c == 'o' then out[#out+1] = args.offset elseif c == 'i' then out[#out+1] = args.immediate elseif c == 'I' then out[#out+1] = args.index elseif c == 'b' then out[#out+1] = args.base elseif c == '0' then out[#out+1] = 0 elseif c == 'C' then out[#out+1] = const elseif c == 'F' then out[#out+1] = formatconst end end local f = lookup[#outformat] if f == nil then error('Internal Error: invalid output formatting string', 1) end f(self.dumper, first, out[1], out[2], out[3], out[4], out[5]) end function Parser:instruction() local name = self.tok self:advance() local h = instruction_handlers[name] if h == nil then self:error('undefined instruction') elseif h[2] ~= nil then -- get format from strings args = self:format_in(h[2]) self:format_out(h[3], h[1], args, h[4], h[5]) else self:error('unimplemented instruction') end self:expect_EOL() end function Parser:parse(asm) self.asm = asm self.lexer = Lexer(asm) self:advance() while self.tt ~= 'EOF' do if self.tt == 'EOL' then -- empty line self:advance() elseif self.tt == 'DEF' then local name = self.tok self:advance() self.dumper:add_define(name, self:number()) elseif self.tt == 'DIR' then self:directive() elseif self.tt == 'LABEL' then self.dumper:add_label(self.tok) self:advance() elseif self.tt == 'INSTR' then self:instruction() else self:error('unexpected token (unknown instruction?)') end end return self.dumper:dump() end function Dumper:error(msg) -- TODO: sometimes internal error, sometimes not. -- also, we should pass line numbers down to add_instruction. error(string.format('%s:%d: Dumper Error: %s', self.fn, self.line, msg), 2) end function Dumper:push(t) --print(t.data) table.insert(self.lines, t) end function Dumper:add_instruction_26(i, a) local t = {} t.sizes = {26} t.data = {i, a} self:push(t) end function Dumper:add_instruction_5_5_16(i, a, b, c) local t = {} t.sizes = {5, 5, 16} t.data = {i, a, b, c} self:push(t) end function Dumper:add_instruction_5_5_5_11(i, a, b, c, d) local t = {} t.sizes = {5, 5, 5, 11} t.data = {i, a, b, c, d} self:push(t) end function Dumper:add_instruction_5_5_5_5_6(i, a, b, c, d, e) local t = {} t.sizes = {5, 5, 5, 5, 6} t.data = {i, a, b, c, d, e} self:push(t) end function Dumper:add_define(name, number) self.defines[name] = number end function Dumper:add_label(name) self.labels[name] = #self.lines + 1 end function Dumper:add_directive(...) self:error('unimplemented directive') end function Dumper:print(uw, lw) self.writer(('%04X%04X'):format(uw, lw)) end function Dumper:desym(tok) if type(tok[2]) == 'number' then return tok[2] elseif all_registers[tok] then return registers[tok] or fpu_registers[tok] elseif tok[1] == 'LABELSYM' then --print('(label)', tok[2]) return self.labels[tok[2]]*4 elseif tok[1] == 'LABELREL' then local rel = self.labels[tok[2]] - 2 - self.line if rel > 0x8000 or rel <= -0x8000 then self:error('branch too far') end return (0x10000 + rel) % 0x10000 elseif tok[1] == 'DEFSYM' then --print('(define)') local val = self.defines[tok[2]] if val == nil then self:error('unknown define') end return val end --print(tok) self:error('failed to desym') end function Dumper:toval(tok) if tok == nil then self:error('nil value') elseif type(tok) == 'number' then return tok elseif all_registers[tok] then return registers[tok] or fpu_registers[tok] end if type(tok) == 'table' then if #tok ~= 2 then --print('toval', tok) self:error('invalid token') end if tok[1] == 'UPPER' then local val = self:desym(tok[2]) while val >= 0x10000 do val = val/2 end return val elseif tok[1] == 'LOWER' then local val = self:desym(tok[2]) return val % 0x10000 elseif tok[1] == 'NEGATE' then local val = -self:desym(tok[2]) return val % 0x10000 elseif tok[1] == 'INDEX' then local val if type(tok[2]) == 'table' and tok[2][1] == 'LABELSYM' then -- don't multiply by 4 twice val = self:desym(tok[2]) else val = self:desym(tok[2])*4 end --print('(index)', val) return val else return self:desym(tok) end end --print('toval', tok) self:error('invalid value') end function Dumper:validate(n, bits) local max = 2^bits if n == nil then self:error('value is nil') end if n > max or n < 0 then --print(('n %08X'):format(math.abs(n))) self:error('value out of range') end end function Dumper:valvar(tok, bits) local val = self:toval(tok) self:validate(val, bits) return val end function Dumper:dump() for i, t in ipairs(self.lines) do self.line = i local uw = 0 local lw = 0 local val = nil local i = t.data[1] uw = uw + i*0x400 if #t.sizes == 1 then if t.sizes[1] == 26 then val = self:valvar(t.data[2], 26) uw = uw + math.floor(val/0x10000) lw = lw + val % 0x10000 else self:error('bad 1-size') end elseif #t.sizes == 3 then if t.sizes[1] == 5 and t.sizes[2] == 5 and t.sizes[3] == 16 then val = self:valvar(t.data[2], 5) uw = uw + val*0x20 val = self:valvar(t.data[3], 5) uw = uw + val val = self:valvar(t.data[4], 16) lw = lw + val else self:error('bad 3-size') end elseif #t.sizes == 4 then if t.sizes[1] == 5 and t.sizes[2] == 5 and t.sizes[3] == 5 and t.sizes[4] == 11 then val = self:valvar(t.data[2], 5) uw = uw + val*0x20 val = self:valvar(t.data[3], 5) uw = uw + val val = self:valvar(t.data[4], 5) lw = lw + val*0x800 val = self:valvar(t.data[5], 11) lw = lw + val else self:error('bad 4-size') end elseif #t.sizes == 5 then if t.sizes[1] == 5 and t.sizes[2] == 5 and t.sizes[3] == 5 and t.sizes[4] == 5 and t.sizes[5] == 6 then val = self:valvar(t.data[2], 5) uw = uw + val*0x20 val = self:valvar(t.data[3], 5) uw = uw + val val = self:valvar(t.data[4], 5) lw = lw + val*0x800 val = self:valvar(t.data[5], 5) lw = lw + val*0x40 val = self:valvar(t.data[6], 6) lw = lw + val else self:error('bad 5-size') end else self:error('unknown n-size') end self:print(uw, lw) end end local function assemble(fn_or_asm, writer) -- assemble MIPS R4300i assembly code -- if fn_or_asm contains a newline; treat as assembly, otherwise load file -- returns error message on error, or nil on success fn_or_asm = tostring(fn_or_asm) writer = writer or io.write function main() local asm = '' if fn_or_asm:find('[\r\n]') then asm = fn_or_asm else local f = io.open(fn_or_asm, 'r') if not f then error('could not read assembly file', 1) end asm = f:read('*a') f:close() end local parser = Parser(writer) return parser:parse(asm) end local ok, err = pcall(main) return err end return setmetatable(assembler, { __call = function(_, ...) return assemble(...) end, Lexer = Lexer, Parser = Parser, Dumper = Dumper, registers = registers, fpu_registers = fpu_registers, all_registers = all_registers, all_instructions = all_instructions, all_directives = all_directives, })