mirror of
https://github.com/notwa/lips
synced 2024-11-14 18:19:03 -08:00
413 lines
12 KiB
Lua
413 lines
12 KiB
Lua
|
local insert = table.insert
|
||
|
local format = string.format
|
||
|
|
||
|
local data = require "data"
|
||
|
local overrides = require "overrides"
|
||
|
local Dumper = require "Dumper"
|
||
|
local Lexer = require "Lexer"
|
||
|
|
||
|
local Parser = require("Class")()
|
||
|
function Parser:init(writer, fn, options)
|
||
|
self.fn = fn or '(string)'
|
||
|
self.main_fn = self.fn
|
||
|
self.options = options or {}
|
||
|
self.dumper = Dumper(writer, fn, options)
|
||
|
self.defines = {}
|
||
|
end
|
||
|
|
||
|
function Parser:error(msg)
|
||
|
error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2)
|
||
|
end
|
||
|
|
||
|
function Parser:advance()
|
||
|
self.i = self.i + 1
|
||
|
local t = self.tokens[self.i]
|
||
|
self.tt = t.tt
|
||
|
self.tok = t.tok
|
||
|
self.fn = t.fn
|
||
|
self.line = t.line
|
||
|
return t.tt, t.tok
|
||
|
end
|
||
|
|
||
|
function Parser:is_EOL()
|
||
|
return self.tt == 'EOL' or self.tt == 'EOF'
|
||
|
end
|
||
|
|
||
|
function Parser:expect_EOL()
|
||
|
if self:is_EOL() then
|
||
|
self:advance()
|
||
|
return
|
||
|
end
|
||
|
self:error('expected end of line')
|
||
|
end
|
||
|
|
||
|
function Parser:optional_comma()
|
||
|
if self.tt == 'SEP' and self.tok == ',' then
|
||
|
self:advance()
|
||
|
return true
|
||
|
end
|
||
|
end
|
||
|
|
||
|
function Parser:number()
|
||
|
if self.tt ~= 'NUM' then
|
||
|
self:error('expected number')
|
||
|
end
|
||
|
local value = self.tok
|
||
|
self:advance()
|
||
|
return value
|
||
|
end
|
||
|
|
||
|
function Parser:directive()
|
||
|
local name = self.tok
|
||
|
self:advance()
|
||
|
local line = self.line
|
||
|
if name == 'ORG' then
|
||
|
self.dumper:add_directive(line, name, self:number())
|
||
|
elseif name == 'ALIGN' or name == 'SKIP' then
|
||
|
if self:is_EOL() and name == 'ALIGN' then
|
||
|
self.dumper:add_directive(line, name, 0)
|
||
|
else
|
||
|
local size = self:number()
|
||
|
if self:is_EOL() then
|
||
|
self.dumper:add_directive(line, name, size)
|
||
|
else
|
||
|
self:optional_comma()
|
||
|
self.dumper:add_directive(line, name, size, self:number())
|
||
|
end
|
||
|
self:expect_EOL()
|
||
|
end
|
||
|
elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then
|
||
|
self.dumper:add_directive(line, name, self:number())
|
||
|
while not self:is_EOL() do
|
||
|
self:advance()
|
||
|
self:optional_comma()
|
||
|
self.dumper:add_directive(line, name, self:number())
|
||
|
end
|
||
|
self:expect_EOL()
|
||
|
elseif name == 'HEX' then
|
||
|
self:error('unimplemented')
|
||
|
elseif name == 'INC' then
|
||
|
-- noop
|
||
|
elseif name == 'INCBIN' then
|
||
|
self:error('unimplemented')
|
||
|
elseif name == 'FLOAT' or name == 'ASCII' or name == 'ASCIIZ' then
|
||
|
self:error('unimplemented')
|
||
|
else
|
||
|
self:error('unknown directive')
|
||
|
end
|
||
|
end
|
||
|
|
||
|
function Parser:register(t)
|
||
|
t = t or data.registers
|
||
|
if self.tt ~= 'REG' then
|
||
|
self:error('expected register')
|
||
|
end
|
||
|
local reg = self.tok
|
||
|
if not t[reg] then
|
||
|
self:error('wrong type of register')
|
||
|
end
|
||
|
self:advance()
|
||
|
return reg
|
||
|
end
|
||
|
|
||
|
function Parser:deref()
|
||
|
if self.tt ~= 'DEREF' then
|
||
|
self:error('expected register to dereference')
|
||
|
end
|
||
|
local reg = self.tok
|
||
|
self:advance()
|
||
|
return reg
|
||
|
end
|
||
|
|
||
|
function Parser:const(relative, no_label)
|
||
|
if self.tt ~= 'NUM' and self.tt ~= 'LABELSYM' then
|
||
|
self:error('expected constant')
|
||
|
end
|
||
|
if no_label and self.tt == 'LABELSYM' then
|
||
|
self:error('labels are not allowed here')
|
||
|
end
|
||
|
if relative and self.tt == 'LABELSYM' then
|
||
|
self.tt = 'LABELREL'
|
||
|
end
|
||
|
local t = {self.tt, self.tok}
|
||
|
self:advance()
|
||
|
return t
|
||
|
end
|
||
|
|
||
|
function Parser:format_in(informat)
|
||
|
local args = {}
|
||
|
for i=1,#informat do
|
||
|
local c = informat:sub(i, i)
|
||
|
local c2 = informat:sub(i + 1, i + 1)
|
||
|
if c == 'd' and not args.rd then
|
||
|
args.rd = self:register()
|
||
|
elseif c == 's' and not args.rs then
|
||
|
args.rs = self:register()
|
||
|
elseif c == 't' and not args.rt then
|
||
|
args.rt = self:register()
|
||
|
elseif c == 'D' and not args.fd then
|
||
|
args.fd = self:register(fpu_registers)
|
||
|
elseif c == 'S' and not args.fs then
|
||
|
args.fs = self:register(fpu_registers)
|
||
|
elseif c == 'T' and not args.ft then
|
||
|
args.ft = self:register(fpu_registers)
|
||
|
elseif c == 'X' and not args.rd then
|
||
|
args.rd = self:register(sys_registers)
|
||
|
elseif c == 'Y' and not args.rs then
|
||
|
args.rs = self:register(sys_registers)
|
||
|
elseif c == 'Z' and not args.rt then
|
||
|
args.rt = self:register(sys_registers)
|
||
|
elseif c == 'o' and not args.offset then
|
||
|
args.offset = {'SIGNED', self:const()}
|
||
|
elseif c == 'r' and not args.offset then
|
||
|
args.offset = {'SIGNED', self:const('relative')}
|
||
|
elseif c == 'i' and not args.immediate then
|
||
|
args.immediate = self:const(nil, 'no label')
|
||
|
elseif c == 'I' and not args.index then
|
||
|
args.index = {'INDEX', self:const()}
|
||
|
elseif c == 'k' and not args.immediate then
|
||
|
args.immediate = {'NEGATE', self:const(nil, 'no label')}
|
||
|
elseif c == 'K' and not args.immediate then
|
||
|
args.immediate = {'SIGNED', self:const(nil, 'no label')}
|
||
|
elseif c == 'b' and not args.base then
|
||
|
args.base = self:deref()
|
||
|
else
|
||
|
error('Internal Error: invalid input formatting string', 1)
|
||
|
end
|
||
|
if c2:find('[dstDSTorIikKXYZ]') then
|
||
|
self:optional_comma()
|
||
|
end
|
||
|
end
|
||
|
return args
|
||
|
end
|
||
|
|
||
|
function Parser:format_out_raw(outformat, first, args, const, formatconst)
|
||
|
local lookup = {
|
||
|
[1]=self.dumper.add_instruction_j,
|
||
|
[3]=self.dumper.add_instruction_i,
|
||
|
[5]=self.dumper.add_instruction_r,
|
||
|
}
|
||
|
local out = {}
|
||
|
for i=1,#outformat do
|
||
|
local c = outformat:sub(i, i)
|
||
|
if c == 'd' then
|
||
|
out[#out+1] = args.rd
|
||
|
elseif c == 's' then
|
||
|
out[#out+1] = args.rs
|
||
|
elseif c == 't' then
|
||
|
out[#out+1] = args.rt
|
||
|
elseif c == 'D' then
|
||
|
out[#out+1] = args.fd
|
||
|
elseif c == 'S' then
|
||
|
out[#out+1] = args.fs
|
||
|
elseif c == 'T' then
|
||
|
out[#out+1] = args.ft
|
||
|
elseif c == 'o' then
|
||
|
out[#out+1] = args.offset
|
||
|
elseif c == 'i' then
|
||
|
out[#out+1] = args.immediate
|
||
|
elseif c == 'I' then
|
||
|
out[#out+1] = args.index
|
||
|
elseif c == 'b' then
|
||
|
out[#out+1] = args.base
|
||
|
elseif c == '0' then
|
||
|
out[#out+1] = 0
|
||
|
elseif c == 'C' then
|
||
|
out[#out+1] = const
|
||
|
elseif c == 'F' then
|
||
|
out[#out+1] = formatconst
|
||
|
end
|
||
|
end
|
||
|
local f = lookup[#outformat]
|
||
|
if f == nil then
|
||
|
error('Internal Error: invalid output formatting string', 1)
|
||
|
end
|
||
|
f(self.dumper, self.line, first, out[1], out[2], out[3], out[4], out[5])
|
||
|
end
|
||
|
|
||
|
function Parser:format_out(t, args)
|
||
|
self:format_out_raw(t[3], t[1], args, t[4], t[5])
|
||
|
end
|
||
|
|
||
|
function Parser:instruction()
|
||
|
local name = self.tok
|
||
|
local h = data.instructions[name]
|
||
|
self:advance()
|
||
|
|
||
|
-- FIXME: errors thrown here probably have the wrong line number (+1)
|
||
|
|
||
|
if h == nil then
|
||
|
self:error('undefined instruction')
|
||
|
elseif overrides[name] then
|
||
|
overrides[name](self, name)
|
||
|
elseif h[2] == 'tob' then -- or h[2] == 'Tob' then
|
||
|
local lui = data.instructions['LUI']
|
||
|
local args = {}
|
||
|
args.rt = self:register()
|
||
|
self:optional_comma()
|
||
|
local o = self:const()
|
||
|
local is_label = o[1] == 'LABELSYM'
|
||
|
if self:is_EOL() then
|
||
|
local lui_args = {}
|
||
|
lui_args.immediate = {'UPPEROFF', o}
|
||
|
lui_args.rt = 'AT'
|
||
|
self:format_out(lui, lui_args)
|
||
|
args.offset = {'LOWER', o}
|
||
|
args.base = 'AT'
|
||
|
else
|
||
|
if is_label then
|
||
|
self:error('labels cannot be used as offsets')
|
||
|
end
|
||
|
args.offset = {'SIGNED', o}
|
||
|
self:optional_comma()
|
||
|
args.base = self:deref()
|
||
|
end
|
||
|
self:format_out(h, args)
|
||
|
elseif h[2] ~= nil then
|
||
|
local args = self:format_in(h[2])
|
||
|
self:format_out(h, args)
|
||
|
else
|
||
|
self:error('unimplemented instruction')
|
||
|
end
|
||
|
self:expect_EOL()
|
||
|
end
|
||
|
|
||
|
function Parser:tokenize(asm)
|
||
|
self.tokens = {}
|
||
|
self.i = 0
|
||
|
|
||
|
local routine = coroutine.create(function()
|
||
|
local lexer = Lexer(asm, self.main_fn, self.options)
|
||
|
lexer:lex(coroutine.yield)
|
||
|
end)
|
||
|
|
||
|
local function lex()
|
||
|
local t = {}
|
||
|
local ok, a, b, c, d = coroutine.resume(routine)
|
||
|
if not ok then
|
||
|
a = a or 'Internal Error: lexer coroutine has stopped'
|
||
|
error(a)
|
||
|
end
|
||
|
t.tt = a
|
||
|
t.tok = b
|
||
|
t.fn = c
|
||
|
t.line = d
|
||
|
insert(self.tokens, t)
|
||
|
return t.tt, t.tok, t.fn, t.line
|
||
|
end
|
||
|
|
||
|
-- first pass: collect tokens, constants, and relative labels.
|
||
|
-- can't do more because instruction size can depend on a constant's size
|
||
|
-- and labels depend on instruction size.
|
||
|
-- note however, instruction size does not depend on label size.
|
||
|
-- this would cause a recursive problem to solve,
|
||
|
-- which is too much for our simple assembler.
|
||
|
local plus_labels = {} -- constructed forwards
|
||
|
local minus_labels = {} -- constructed backwards
|
||
|
while true do
|
||
|
local tt, tok, fn, line = lex()
|
||
|
self.fn = fn
|
||
|
self.line = line
|
||
|
if tt == 'DEF' then
|
||
|
local tt2, tok2 = lex()
|
||
|
if tt2 ~= 'NUM' then
|
||
|
self:error('expected number for define')
|
||
|
end
|
||
|
self.defines[tok] = tok2
|
||
|
elseif tt == 'RELLABEL' then
|
||
|
if tok == '+' then
|
||
|
insert(plus_labels, #self.tokens)
|
||
|
elseif tok == '-' then
|
||
|
insert(minus_labels, 1, #self.tokens)
|
||
|
else
|
||
|
error('Internal Error: unexpected token for relative label', 1)
|
||
|
end
|
||
|
elseif tt == 'EOL' then
|
||
|
-- noop
|
||
|
elseif tt == 'EOF' then
|
||
|
if fn == self.main_fn then
|
||
|
break
|
||
|
end
|
||
|
elseif tt == nil then
|
||
|
error('Internal Error: missing token', 1)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
-- resolve defines and relative labels
|
||
|
for i, t in ipairs(self.tokens) do
|
||
|
self.fn = t.fn
|
||
|
self.line = t.line
|
||
|
if t.tt == 'DEFSYM' then
|
||
|
t.tt = 'NUM'
|
||
|
t.tok = self.defines[t.tok]
|
||
|
if t.tok == nil then
|
||
|
self:error('undefined define') -- uhhh nice wording
|
||
|
end
|
||
|
elseif t.tt == 'RELLABEL' then
|
||
|
t.tt = 'LABEL'
|
||
|
-- exploits the fact that user labels can't begin with a number
|
||
|
t.tok = tostring(i)
|
||
|
elseif t.tt == 'RELLABELSYM' then
|
||
|
t.tt = 'LABELSYM'
|
||
|
local rel = t.tok
|
||
|
local seen = 0
|
||
|
-- TODO: don't iterate over *every* label, just the ones nearby
|
||
|
if rel > 0 then
|
||
|
for _, label_i in ipairs(plus_labels) do
|
||
|
if label_i > i then
|
||
|
seen = seen + 1
|
||
|
if seen == rel then
|
||
|
t.tok = tostring(label_i)
|
||
|
break
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
else
|
||
|
for _, label_i in ipairs(minus_labels) do
|
||
|
if label_i < i then
|
||
|
seen = seen - 1
|
||
|
if seen == rel then
|
||
|
t.tok = tostring(label_i)
|
||
|
break
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
if seen ~= rel then
|
||
|
self:error('could not find appropriate relative label')
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
|
||
|
function Parser:parse(asm)
|
||
|
self:tokenize(asm)
|
||
|
self:advance()
|
||
|
while true do
|
||
|
if self.tt == 'EOF' then
|
||
|
if self.fn == self.main_fn then
|
||
|
break
|
||
|
end
|
||
|
self:advance()
|
||
|
elseif self.tt == 'EOL' then
|
||
|
-- empty line
|
||
|
self:advance()
|
||
|
elseif self.tt == 'DEF' then
|
||
|
self:advance()
|
||
|
self:advance()
|
||
|
elseif self.tt == 'DIR' then
|
||
|
self:directive()
|
||
|
elseif self.tt == 'LABEL' then
|
||
|
self.dumper:add_label(self.tok)
|
||
|
self:advance()
|
||
|
elseif self.tt == 'INSTR' then
|
||
|
self:instruction()
|
||
|
else
|
||
|
self:error('unexpected token (unknown instruction?)')
|
||
|
end
|
||
|
end
|
||
|
return self.dumper:dump()
|
||
|
end
|
||
|
|
||
|
return Parser
|