From 8f4c4a41b98b2ad36135d42d1eeede61dc0ce0e9 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Thu, 21 Apr 2016 13:04:49 -0700 Subject: [PATCH] update lips --- Lua/lib/lips/Base.lua | 17 ++ Lua/lib/lips/Class.lua | 14 -- Lua/lib/lips/Collector.lua | 241 ++++++++++++++++++ Lua/lib/lips/Dumper.lua | 500 ++++++++++++++++++++++--------------- Lua/lib/lips/Lexer.lua | 56 ++++- Lua/lib/lips/Muncher.lua | 26 +- Lua/lib/lips/Parser.lua | 317 +++++------------------ Lua/lib/lips/Preproc.lua | 285 +++++++++++++-------- Lua/lib/lips/Reader.lua | 93 +++++++ Lua/lib/lips/Statement.lua | 53 ++++ Lua/lib/lips/Token.lua | 61 ++++- Lua/lib/lips/data.lua | 2 +- Lua/lib/lips/init.lua | 16 +- Lua/lib/lips/overrides.lua | 376 ++++++++++++---------------- Lua/lib/lips/util.lua | 60 +++-- 15 files changed, 1267 insertions(+), 850 deletions(-) create mode 100644 Lua/lib/lips/Base.lua delete mode 100644 Lua/lib/lips/Class.lua create mode 100644 Lua/lib/lips/Collector.lua create mode 100644 Lua/lib/lips/Reader.lua create mode 100644 Lua/lib/lips/Statement.lua diff --git a/Lua/lib/lips/Base.lua b/Lua/lib/lips/Base.lua new file mode 100644 index 0000000..83acf74 --- /dev/null +++ b/Lua/lib/lips/Base.lua @@ -0,0 +1,17 @@ +-- mostly just semantics over knife.base +-- https://github.com/airstruck/knife/blob/master/knife/base.lua +return { + extend = function(self, subtype) + subtype = subtype or {} + local meta = { __index = subtype } + return setmetatable(subtype, { + __index = self, + __call = function(self, ...) + local obj = setmetatable({}, meta) + return obj, obj:init(...) + end + }) + end, + + init = function() end, +} diff --git a/Lua/lib/lips/Class.lua b/Lua/lib/lips/Class.lua deleted file mode 100644 index f03bef3..0000000 --- a/Lua/lib/lips/Class.lua +++ /dev/null @@ -1,14 +0,0 @@ -return function(inherit) - local class = {} - local mt_obj = {__index = class} - local mt_class = { - __call = function(self, ...) - local obj = setmetatable({}, mt_obj) - obj:init(...) - return obj - end, - __index = inherit, - } - - return setmetatable(class, mt_class) -end diff --git a/Lua/lib/lips/Collector.lua b/Lua/lib/lips/Collector.lua new file mode 100644 index 0000000..3bd338d --- /dev/null +++ b/Lua/lib/lips/Collector.lua @@ -0,0 +1,241 @@ +local insert = table.insert + +local path = string.gsub(..., "[^.]+$", "") +local Token = require(path.."Token") +local Statement = require(path.."Statement") +local Muncher = require(path.."Muncher") + +local arg_types = { -- for instructions + NUM = true, + REG = true, + VARSYM = true, + LABELSYM = true, + RELLABELSYM = true, +} + +local Collector = Muncher:extend() +function Collector:init(options) + self.options = options or {} +end + +function Collector:statement(...) + local s = Statement(self.fn, self.line, ...) + return s +end + +function Collector:push_data(datum, size) + --[[ pseudo-example: + Statement{type='!DATA', + {tt='BYTES', tok={0, 1, 2}}, + {tt='HALFWORDS', tok={3, 4, 5}}, + {tt='WORDS', tok={6, 7, 8}}, + {tt='LABEL', tok='myLabel'}, + } + --]] + + -- TODO: consider not scrunching data statements, just their tokens + + local last_statement = self.statements[#self.statements] + local s + if last_statement and last_statement.type == '!DATA' then + s = last_statement + else + s = self:statement('!DATA') + insert(self.statements, s) + end + + if type(datum) == 'string' and size == 'WORD' then + -- labels will be assembled to words + insert(s, Token('LABEL', datum)) + return + end + + if size ~= 'BYTE' and size ~= 'HALFWORD' and size ~= 'WORD' then + error('Internal Error: unknown data size argument') + end + + local sizes = size..'S' + + local last_token = s[#s] + local t + if last_token and last_token.tt == sizes then + t = last_token + else + t = self:token(sizes, {}) + insert(s, t) + s:validate() + end + insert(t.tok, datum) +end + +function Collector:variable() + local t = self.t + local t2 = self:advance() + + local s = self:statement('!VAR', t, t2) + insert(self.statements, s) + self:advance() +end + +function Collector:directive() + local name = self.tok + self:advance() + local function add(kind, ...) + insert(self.statements, self:statement('!'..kind, ...)) + end + if name == 'ORG' or name == 'BASE' then + add(name, self:const(nil, 'no labels')) + elseif name == 'ALIGN' or name == 'SKIP' then + if self:is_EOL() and name == 'ALIGN' then + add(name) + else + local size = self:number() + if self:is_EOL() then + add(name, size) + else + self:optional_comma() + add(name, size, self:number()) + end + self:expect_EOL() + end + elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then + self:push_data(self:const().tok, name) + while not self:is_EOL() do + self:advance() + self:optional_comma() + self:push_data(self:const().tok, name) + end + self:expect_EOL() + elseif name == 'INC' or name == 'INCBIN' then + -- noop, handled by lexer + elseif name == 'ASCII' or name == 'ASCIIZ' then + local bytes = self:string() + for i, number in ipairs(bytes.tok) do + self:push_data(number, 'BYTE') + end + if name == 'ASCIIZ' then + self:push_data(0, 'BYTE') + end + self:expect_EOL() + elseif name == 'FLOAT' then + self:error('unimplemented directive', name) + else + self:error('unknown directive', name) + end +end + +function Collector:basic_special() + local name, args = self:special() + + local portion + if name == 'hi' then + portion = 'upperoff' + elseif name == 'up' then + portion = 'upper' + elseif name == 'lo' then + portion = 'lower' + else + self:error('unknown special', name) + end + + if #args ~= 1 then + self:error(name..' expected one argument', #args) + end + + local t = self:token(args[1]):set('portion', portion) + return t +end + +function Collector:instruction() + local s = self:statement(self.tok) + insert(self.statements, s) + self:advance() + + while self.tt ~= 'EOL' do + local t = self.t + if self.tt == 'OPEN' then + t = self:deref() + t.tt = 'DEREF' -- TODO: should just be returned by :deref + insert(s, t) + elseif self.tt == 'UNARY' then + local peek = self.tokens[self.i + 1] + if peek.tt == 'VARSYM' then + local negate = t.tok == -1 + t = self:advance() + t = Token(t):set('negate', negate) + insert(s, t) + self:advance() + elseif peek.tt == 'EOL' or peek.tt == 'SEP' then + local tok = t.tok == 1 and '+' or t.tok == -1 and '-' + t = Token(self.fn, self.line, 'RELLABELSYM', tok) + insert(s, t) + self:advance() + else + self:error('unexpected token after unary operator', peek.tt) + end + elseif self.tt == 'SPECIAL' then + t = self:basic_special() + insert(s, t) + self:advance() + elseif self.tt == 'SEP' then + self:error('extraneous comma') + elseif not arg_types[self.tt] then + self:error('unexpected argument type in instruction', self.tt) + else + insert(s, t) + self:advance() + end + self:optional_comma() + end + + self:expect_EOL() + s:validate() +end + +function Collector:collect(tokens, fn) + self.tokens = tokens + self.fn = fn or '(string)' + self.main_fn = self.fn + + self.statements = {} + + -- this works, but probably shouldn't be in this function specifically + if self.options.offset then + local s = Statement('(options)', 0, '!ORG', self.options.offset) + insert(self.statements, s) + end + if self.options.base then + local s = Statement('(options)', 0, '!BASE', self.options.base) + insert(self.statements, s) + end + + self.i = 0 -- set up Muncher iteration + self:advance() -- load up the first token + while true do + if self.tt == 'EOF' then + -- don't break if this is an included file's EOF + if self.fn == self.main_fn then + break + end + self:advance() + elseif self.tt == 'EOL' then + -- empty line + self:advance() + elseif self.tt == 'VAR' then + self:variable() -- handles advancing + elseif self.tt == 'LABEL' or self.tt == 'RELLABEL' then + insert(self.statements, self:statement('!LABEL', self.t)) + self:advance() + elseif self.tt == 'DIR' then + self:directive() -- handles advancing + elseif self.tt == 'INSTR' then + self:instruction() -- handles advancing + else + self:error('expected starting token for statement', self.tt) + end + end + + return self.statements +end + +return Collector diff --git a/Lua/lib/lips/Dumper.lua b/Lua/lib/lips/Dumper.lua index c02345e..6a2d320 100644 --- a/Lua/lib/lips/Dumper.lua +++ b/Lua/lib/lips/Dumper.lua @@ -1,25 +1,33 @@ local floor = math.floor local format = string.format local insert = table.insert +local unpack = unpack or table.unpack -local data = require "lips.data" -local util = require "lips.util" +local path = string.gsub(..., "[^.]+$", "") +local data = require(path.."data") +local util = require(path.."util") +local Token = require(path.."Token") +local Statement = require(path.."Statement") +local Reader = require(path.."Reader") local bitrange = util.bitrange -local Dumper = util.Class() -function Dumper:init(writer, fn, options) +local function label_delta(from, to) + -- TODO: consider removing the % here since .base should handle that now + to = to + from = from + return floor(to/4) - 1 - floor(from/4) +end + +local Dumper = Reader:extend() +function Dumper:init(writer, options) self.writer = writer - self.fn = fn or '(string)' self.options = options or {} self.labels = setmetatable({}, {__index=options.labels}) self.commands = {} - self.pos = options.offset or 0 self.lastcommand = nil -end - -function Dumper:error(msg) - error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2) + self.pos = 0 + self.base = 0 end function Dumper:export_labels(t) @@ -33,117 +41,9 @@ function Dumper:export_labels(t) return t end -function Dumper:advance(by) - self.pos = self.pos + by -end - -function Dumper:push_instruction(t) - t.kind = 'instruction' - insert(self.commands, t) - self:advance(4) -end - -function Dumper:add_instruction_j(fn, line, o, T) - self:push_instruction{fn=fn, line=line, o, T} -end - -function Dumper:add_instruction_i(fn, line, o, s, t, i) - self:push_instruction{fn=fn, line=line, o, s, t, i} -end - -function Dumper:add_instruction_r(fn, line, o, s, t, d, f, c) - self:push_instruction{fn=fn, line=line, o, s, t, d, f, c} -end - -function Dumper:add_label(name) - self.labels[name] = self.pos -end - -function Dumper:add_bytes(line, ...) - local use_last = self.lastcommand and self.lastcommand.kind == 'bytes' - local t - if use_last then - t = self.lastcommand - else - t = {} - t.kind = 'bytes' - t.size = 0 - t.fn = self.fn - t.line = self.line - end - t.line = line - for _, b in ipairs{...} do - t.size = t.size + 1 - t[t.size] = b - end - if not use_last then - insert(self.commands, t) - end - self:advance(t.size) -end - -function Dumper:add_directive(fn, line, name, a, b) - self.fn = fn - self.line = line - local t = {} - t.fn = self.fn - t.line = self.line - if name == 'BYTE' then - self:add_bytes(line, a % 0x100) - elseif name == 'HALFWORD' then - local b0 = bitrange(a, 0, 7) - local b1 = bitrange(a, 8, 15) - self:add_bytes(line, b1, b0) - elseif name == 'WORD' then - if type(a) == 'string' then - t.kind = 'label' - t.name = a - insert(self.commands, t) - self:advance(4) - else - local b0 = bitrange(a, 0, 7) - local b1 = bitrange(a, 8, 15) - local b2 = bitrange(a, 16, 23) - local b3 = bitrange(a, 24, 31) - self:add_bytes(line, b3, b2, b1, b0) - end - elseif name == 'ORG' then - t.kind = 'goto' - t.addr = a - insert(self.commands, t) - self.pos = a - self:advance(0) - elseif name == 'ALIGN' then - t.kind = 'ahead' - local align - if a == 0 then - align = 4 - elseif a < 0 then - self:error('negative alignment') - else - align = 2^a - end - local temp = self.pos + align - 1 - t.skip = temp - (temp % align) - self.pos - t.fill = t.fill or 0 - insert(self.commands, t) - self:advance(t.skip) - elseif name == 'SKIP' then - t.kind = 'ahead' - t.skip = a - t.fill = b - insert(self.commands, t) - self:advance(t.skip) - else - self:error('unimplemented directive') - end -end - function Dumper:desym(t) if t.tt == 'REL' then - local target = t.tok % 0x80000000 - local pos = self.pos % 0x80000000 - local rel = floor(target/4) - 1 - floor(pos/4) + local rel = label_delta(self:pc(), t.tok) if rel > 0x8000 or rel <= -0x8000 then self:error('branch too far') end @@ -159,7 +59,7 @@ function Dumper:desym(t) elseif t.tt == 'LABELSYM' or t.tt == 'LABELREL' then local label = self.labels[t.tok] if label == nil then - self:error('undefined label') + self:error('undefined label', t.tok) end if t.offset then label = label + t.offset @@ -168,9 +68,7 @@ function Dumper:desym(t) return label end - label = label % 0x80000000 - local pos = self.pos % 0x80000000 - local rel = floor(label/4) - 1 - floor(pos/4) + local rel = label_delta(self:pc(), label) if rel > 0x8000 or rel <= -0x8000 then self:error('branch too far') end @@ -179,56 +77,29 @@ function Dumper:desym(t) error('Internal Error: failed to desym') end -function Dumper:toval(t) - assert(type(t) == 'table', 'Internal Error: invalid value') - - local val = self:desym(t) - - if t.index then - val = val % 0x80000000 - val = floor(val/4) - end - if t.negate then - val = -val - end - if t.negate or t.signed then - if val >= 0x10000 or val < -0x8000 then - self:error('value out of range') - end - val = val % 0x10000 - end - - if t.portion == 'upper' then - val = bitrange(val, 16, 31) - elseif t.portion == 'lower' then - val = bitrange(val, 0, 15) - elseif t.portion == 'upperoff' then - local upper = bitrange(val, 16, 31) - local lower = bitrange(val, 0, 15) - if lower >= 0x8000 then - -- accommodate for offsets being signed - upper = (upper + 1) % 0x10000 - end - val = upper - end - - return val -end - function Dumper:validate(n, bits) local max = 2^bits if n == nil then self:error('value is nil') -- internal error? end if n > max or n < 0 then - self:error('value out of range') + self:error('value out of range', ("%X"):format(n)) end + return n end function Dumper:valvar(t, bits) - local val = self:toval(t) - self:validate(val, bits) - return val + local val = t + local err + if type(val) ~= 'number' then + t.tok = self:desym(t) + t.tt = 'NUM' + val, err = t:compute() + if err then + self:error(err, val) + end + end + return self:validate(val, bits) end function Dumper:write(t) @@ -266,42 +137,281 @@ function Dumper:dump_instruction(t) return uw, lw end -function Dumper:dump() - self.pos = self.options.offset or 0 - for i, t in ipairs(self.commands) do - assert(t.fn, 'Internal Error: no file name available') - assert(t.line, 'Internal Error: no line number available') - self.fn = t.fn - self.line = t.line - if t.kind == 'instruction' then - local uw, lw = self:dump_instruction(t) - local b0 = bitrange(lw, 0, 7) - local b1 = bitrange(lw, 8, 15) - local b2 = bitrange(uw, 0, 7) - local b3 = bitrange(uw, 8, 15) - self:write{b3, b2, b1, b0} - elseif t.kind == 'bytes' then - self:write(t) - elseif t.kind == 'goto' then - self.pos = t.addr - elseif t.kind == 'ahead' then - if t.fill then - for i=1, t.skip do - self:write{t.fill} +function Dumper:assemble_j(first, out) + local w = 0 + w = w + self:valvar(first, 6) * 0x04000000 + w = w + self:valvar(out[1], 26) * 0x00000001 + local t = Token(self.fn, self.line, 'WORDS', {w}) + local s = Statement(self.fn, self.line, '!DATA', t) + return s +end +function Dumper:assemble_i(first, out) + local w = 0 + w = w + self:valvar(first, 6) * 0x04000000 + w = w + self:valvar(out[1], 5) * 0x00200000 + w = w + self:valvar(out[2], 5) * 0x00010000 + w = w + self:valvar(out[3], 16) * 0x00000001 + local t = Token(self.fn, self.line, 'WORDS', {w}) + local s = Statement(self.fn, self.line, '!DATA', t) + return s +end +function Dumper:assemble_r(first, out) + local w = 0 + w = w + self:valvar(first, 6) * 0x04000000 + w = w + self:valvar(out[1], 5) * 0x00200000 + w = w + self:valvar(out[2], 5) * 0x00010000 + w = w + self:valvar(out[3], 5) * 0x00000800 + w = w + self:valvar(out[4], 5) * 0x00000040 + w = w + self:valvar(out[5], 6) * 0x00000001 + local t = Token(self.fn, self.line, 'WORDS', {w}) + local s = Statement(self.fn, self.line, '!DATA', t) + return s +end + +function Dumper:format_in(informat) + -- see data.lua for a guide on what all these mean + local args = {} + --if #informat ~= #s then error('mismatch') end + for i=1, #informat do + self.i = i + local c = informat:sub(i, i) + if c == 'd' and not args.rd then + args.rd = self:register(data.registers) + elseif c == 's' and not args.rs then + args.rs = self:register(data.registers) + elseif c == 't' and not args.rt then + args.rt = self:register(data.registers) + elseif c == 'D' and not args.fd then + args.fd = self:register(data.fpu_registers) + elseif c == 'S' and not args.fs then + args.fs = self:register(data.fpu_registers) + elseif c == 'T' and not args.ft then + args.ft = self:register(data.fpu_registers) + elseif c == 'X' and not args.rd then + args.rd = self:register(data.sys_registers) + elseif c == 'Y' and not args.rs then + args.rs = self:register(data.sys_registers) + elseif c == 'Z' and not args.rt then + args.rt = self:register(data.sys_registers) + elseif c == 'o' and not args.offset then + args.offset = self:const():set('signed') + elseif c == 'r' and not args.offset then + args.offset = self:const('relative'):set('signed') + elseif c == 'i' and not args.immediate then + args.immediate = self:const(nil, 'no label') + elseif c == 'I' and not args.index then + args.index = self:const():set('index') + elseif c == 'k' and not args.immediate then + args.immediate = self:const(nil, 'no label'):set('negate') + elseif c == 'K' and not args.immediate then + args.immediate = self:const(nil, 'no label'):set('signed') + elseif c == 'b' and not args.base then + args.base = self:deref():set('tt', 'REG') + else + error('Internal Error: invalid input formatting string') + end + end + return args +end + +function Dumper:format_out_raw(outformat, first, args, const, formatconst) + -- see data.lua for a guide on what all these mean + local lookup = { + [1]=self.assemble_j, + [3]=self.assemble_i, + [5]=self.assemble_r, + } + local out = {} + for i=1, #outformat do + local c = outformat:sub(i, i) + if c == 'd' then out[#out+1] = args.rd + elseif c == 's' then insert(out, args.rs) + elseif c == 't' then insert(out, args.rt) + elseif c == 'D' then insert(out, args.fd) + elseif c == 'S' then insert(out, args.fs) + elseif c == 'T' then insert(out, args.ft) + elseif c == 'o' then insert(out, args.offset) + elseif c == 'i' then insert(out, args.immediate) + elseif c == 'I' then insert(out, args.index) + elseif c == 'b' then insert(out, args.base) + elseif c == '0' then insert(out, 0) + elseif c == 'C' then insert(out, const) + elseif c == 'F' then insert(out, formatconst) + end + end + local f = lookup[#outformat] + assert(f, 'Internal Error: invalid output formatting string') + return f(self, first, out) +end + +function Dumper:format_out(t, args) + return self:format_out_raw(t[3], t[1], args, t[4], t[5]) +end + +function Dumper:assemble(s) + local name = s.type + local h = data.instructions[name] + self.s = s + if h[2] ~= nil then + local args = self:format_in(h[2]) + return self:format_out(h, args) + else + self:error('unimplemented instruction', name) + end +end + +function Dumper:fill(length, content) + self:validate(content, 8) + local bytes = {} + for i=1, length do + insert(bytes, content) + end + local t = Token(self.fn, self.line, 'BYTES', bytes) + local s = Statement(self.fn, self.line, '!DATA', t) + return s +end + +function Dumper:pc() + return self.pos + self.base +end + +function Dumper:load(statements) + local new_statements = {} + self.pos = 0 + self.base = 0 + for i=1, #statements do + local s = statements[i] + self.fn = s.fn + self.line = s.line + if s.type:sub(1, 1) == '!' then + if s.type == '!LABEL' then + self.labels[s[1].tok] = self:pc() + elseif s.type == '!DATA' then + s.length = util.measure_data(s) -- cache for next pass + self.pos = self.pos + s.length + insert(new_statements, s) + elseif s.type == '!ORG' then + self.pos = s[1].tok + insert(new_statements, s) + elseif s.type == '!BASE' then + self.base = s[1].tok + insert(new_statements, s) + elseif s.type == '!ALIGN' or s.type == '!SKIP' then + local length, content + if s.type == '!ALIGN' then + local align = s[1] and s[1].tok or 2 + content = s[2] and s[2].tok or 0 + if align < 0 then + self:error('negative alignment') + else + align = 2^align + end + local temp = self:pc() + align - 1 + length = temp - (temp % align) - self:pc() + else + length = s[1] and s[1].tok or 0 + content = s[2] and s[2].tok or nil + end + + self.pos = self.pos + length + if content == nil then + local new = Statement(self.fn, self.line, '!ORG', self.pos) + insert(new_statements, new) + elseif length > 0 then + insert(new_statements, self:fill(length, content)) + elseif length < 0 then + local new = Statement(self.fn, self.line, '!ORG', self.pos) + insert(new_statements, new) + insert(new_statements, self:fill(length, content)) + local new = Statement(self.fn, self.line, '!ORG', self.pos) + insert(new_statements, new) + else + -- length is 0, noop end else - self.pos = self.pos + t.skip + error('Internal Error: unknown statement, got '..s.type) end - elseif t.kind == 'label' then - local val = self:desym{tt='LABELSYM', tok=t.name} - val = (val % 0x80000000) + 0x80000000 - local b0 = bitrange(val, 0, 7) - local b1 = bitrange(val, 8, 15) - local b2 = bitrange(val, 16, 23) - local b3 = bitrange(val, 24, 31) - self:write{b3, b2, b1, b0} else - error('Internal Error: unknown command') + self.pos = self.pos + 4 + insert(new_statements, s) + end + end + + statements = new_statements + + new_statements = {} + self.pos = 0 + self.base = 0 + for i=1, #statements do + local s = statements[i] + self.fn = s.fn + self.line = s.line + if s.type:sub(1, 1) ~= '!' then + local new = self:assemble(s) + self.pos = self.pos + 4 + insert(new_statements, new) + elseif s.type == '!DATA' then + for i, t in ipairs(s) do + if t.tt == 'LABEL' then + local label = self.labels[t.tok] + if label == nil then + self:error('undefined label', t.tok) + end + t.tt = 'WORDS' + t.tok = {label} + end + end + self.pos = self.pos + (s.length or util.measure_data(s)) + insert(new_statements, s) + elseif s.type == '!ORG' then + self.pos = s[1].tok + insert(new_statements, s) + elseif s.type == '!BASE' then + self.base = s[1].tok + elseif s.type == '!LABEL' then + -- noop + else + error('Internal Error: unknown statement, got '..s.type) + end + end + + self.statements = new_statements + return self.statements +end + +function Dumper:dump() + self.pos = 0 + self.base = nil + for i, s in ipairs(self.statements) do + if s.type == '!DATA' then + for j, t in ipairs(s) do + if t.tt == 'WORDS' then + for _, w in ipairs(t.tok) do + local b0 = bitrange(w, 0, 7) + local b1 = bitrange(w, 8, 15) + local b2 = bitrange(w, 16, 23) + local b3 = bitrange(w, 24, 31) + self:write{b3, b2, b1, b0} + end + elseif t.tt == 'HALFWORDS' then + for _, h in ipairs(t.tok) do + local b0 = bitrange(h, 0, 7) + local b1 = bitrange(h, 8, 15) + self:write{b1, b0} + end + elseif t.tt == 'BYTES' then + for _, b in ipairs(t.tok) do + local b0 = bitrange(b, 0, 7) + self:write{b0} + end + else + error('Internal Error: unknown !DATA token') + end + end + elseif s.type == '!ORG' then + self.pos = s[1].tok + else + error('Internal Error: cannot dump unassembled statement') end end end diff --git a/Lua/lib/lips/Lexer.lua b/Lua/lib/lips/Lexer.lua index 29e9bfe..d035991 100644 --- a/Lua/lib/lips/Lexer.lua +++ b/Lua/lib/lips/Lexer.lua @@ -4,8 +4,10 @@ local find = string.find local format = string.format local insert = table.insert -local data = require "lips.data" -local util = require "lips.util" +local path = string.gsub(..., "[^.]+$", "") +local data = require(path.."data") +local util = require(path.."util") +local Base = require(path.."Base") local simple_escapes = { ['0'] = 0x00, @@ -20,7 +22,7 @@ local simple_escapes = { ['v'] = 0x0B, } -local Lexer = util.Class() +local Lexer = Base:extend() function Lexer:init(asm, fn, options) self.asm = asm self.fn = fn or '(string)' @@ -37,6 +39,19 @@ function Lexer:error(msg) end function Lexer:nextc() + -- iterate to the next character while translating newlines. + -- outputs: + --self.chr the character as a string + --self.chr2 the character after it as a string + --self.chrchr both characters as a string + -- chr values can be empty + --self.ord numeric value of the character + --self.ord2 numeric value of the character after it + -- ord values can be self.EOF + --self.was_EOL if the character was an EOL + -- this EOL state is preserved past the EOF + -- so it can be used to determine if the file lacks a final EOL + if self.pos > #self.asm then self.ord = self.EOF self.ord2 = self.EOF @@ -129,10 +144,18 @@ function Lexer:read_number() self:nextc() return self:read_hex() elseif self.chr:find('%d') then - if self.chr2 == 'x' or self.chr2 == 'X' then + if self.chr2 == 'x' then self:nextc() self:nextc() return self:read_hex() + elseif self.chr2 == 'o' then + self:nextc() + self:nextc() + return self:read_octal() + elseif self.chr2 == 'b' then + self:nextc() + self:nextc() + return self:read_binary() elseif self.chr == '0' and self.chr2:find('%d') then self:nextc() return self:read_octal() @@ -284,11 +307,12 @@ function Lexer:lex_include_binary(_yield) self:lex_string_naive(function(tt, tok) fn = tok end) + -- TODO: allow optional offset and size arguments if self.options.path then fn = self.options.path..fn end - -- NOTE: this allocates two tables for each byte. - -- this could easily cause performance issues on big files. + -- FIXME: this allocates two tables for each byte. + -- this could easily cause performance issues on big files. local data = util.readfile(fn, true) for b in string.gfind(data, '.') do _yield('DIR', 'BYTE', fn, 0) @@ -327,14 +351,14 @@ function Lexer:lex(_yield) self:nextc() local buff = self:read_chars('[%w_]') if self.chr ~= ']' then - self:error('invalid define name') + self:error('invalid variable name') end self:nextc() if self.chr ~= ':' then - self:error('define requires a colon') + self:error('expected a colon after closing bracket') end self:nextc() - yield('DEF', buff) + yield('VAR', buff) elseif self.chr == ']' then self:error('unmatched closing bracket') elseif self.chr == '(' then @@ -367,11 +391,19 @@ function Lexer:lex(_yield) elseif self.chr == '@' then self:nextc() local buff = self:read_chars('[%w_]') - yield('DEFSYM', buff) + yield('VARSYM', buff) elseif self.chr == '%' then self:nextc() - local call = self:read_chars('[%w_]') - yield('SPECIAL', call) + if self.chr:find('[%a_]') then + local call = self:read_chars('[%w_]') + if call ~= '' then + yield('SPECIAL', call) + end + elseif self.chr:find('[01]') then + yield('NUM', self:read_binary()) + else + self:error('unknown % syntax') + end elseif self.chr:find('[%a_]') then local buff = self:read_chars('[%w_.]') local up = buff:upper() diff --git a/Lua/lib/lips/Muncher.lua b/Lua/lib/lips/Muncher.lua index c0ab378..b3f52fa 100644 --- a/Lua/lib/lips/Muncher.lua +++ b/Lua/lib/lips/Muncher.lua @@ -1,22 +1,26 @@ local format = string.format local insert = table.insert -local data = require "lips.data" -local util = require "lips.util" -local Token = require "lips.Token" +local path = string.gsub(..., "[^.]+$", "") +local data = require(path.."data") +local Base = require(path.."Base") +local Token = require(path.."Token") local arg_types = { NUM = true, REG = true, - DEFSYM = true, + VARSYM = true, LABELSYM = true, RELLABELSYM = true, } -local Muncher = util.Class() +local Muncher = Base:extend() -- no base init method -function Muncher:error(msg) +function Muncher:error(msg, got) + if got ~= nil then + msg = msg..', got '..tostring(got) + end error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2) end @@ -25,9 +29,11 @@ function Muncher:token(t, val) if type(t) == 'table' then t.fn = self.fn t.line = self.line - return Token(t) + local token = Token(t) + return token else - return Token(self.fn, self.line, t, val) + local token = Token(self.fn, self.line, t, val) + return token end end @@ -109,8 +115,8 @@ function Muncher:deref() end function Muncher:const(relative, no_label) - if self.tt ~= 'NUM' and self.tt ~= 'LABELSYM' then - self:error('expected constant') + if self.tt ~= 'NUM' and self.tt ~= 'VARSYM' and self.tt ~= 'LABELSYM' then + self:error('expected constant', self.tt) end if no_label and self.tt == 'LABELSYM' then self:error('labels are not allowed here') diff --git a/Lua/lib/lips/Parser.lua b/Lua/lib/lips/Parser.lua index 2a760d0..78c66f6 100644 --- a/Lua/lib/lips/Parser.lua +++ b/Lua/lib/lips/Parser.lua @@ -1,285 +1,80 @@ local insert = table.insert -local data = require "lips.data" -local util = require "lips.util" -local overrides = require "lips.overrides" -local Token = require "lips.Token" -local Lexer = require "lips.Lexer" -local Dumper = require "lips.Dumper" -local Muncher = require "lips.Muncher" -local Preproc = require "lips.Preproc" +local path = string.gsub(..., "[^.]+$", "") +local Base = require(path.."Base") +local Token = require(path.."Token") +local Lexer = require(path.."Lexer") +local Collector = require(path.."Collector") +local Preproc = require(path.."Preproc") +local Dumper = require(path.."Dumper") -local Parser = util.Class(Muncher) +local Parser = Base:extend() function Parser:init(writer, fn, options) + self.writer = writer self.fn = fn or '(string)' self.main_fn = self.fn self.options = options or {} - self.dumper = Dumper(writer, fn, options) -end - -function Parser:directive() - local name = self.tok - self:advance() - local function add(...) - self.dumper:add_directive(self.fn, self.line, ...) - end - if name == 'ORG' then - add(name, self:number().tok) - elseif name == 'ALIGN' or name == 'SKIP' then - if self:is_EOL() and name == 'ALIGN' then - add(name, 0) - else - local size = self:number().tok - if self:is_EOL() then - add(name, size) - else - self:optional_comma() - add(name, size, self:number().tok) - end - self:expect_EOL() - end - elseif name == 'BYTE' or name == 'HALFWORD' then - add(name, self:number().tok) - while not self:is_EOL() do - self:advance() - self:optional_comma() - add(name, self:number().tok) - end - self:expect_EOL() - elseif name == 'WORD' then -- allow labels in word directives - add(name, self:const().tok) - while not self:is_EOL() do - self:advance() - self:optional_comma() - add(name, self:const().tok) - end - self:expect_EOL() - elseif name == 'INC' or name == 'INCBIN' then - -- noop, handled by lexer - elseif name == 'ASCII' or name == 'ASCIIZ' then - local bytes = self:string() - for i, number in ipairs(bytes.tok) do - add('BYTE', number) - end - if name == 'ASCIIZ' then - add('BYTE', 0) - end - self:expect_EOL() - elseif name == 'FLOAT' then - self:error('unimplemented') - else - self:error('unknown directive') - end -end - -function Parser:format_in(informat) - local args = {} - for i=1,#informat do - local c = informat:sub(i, i) - local c2 = informat:sub(i + 1, i + 1) - if c == 'd' and not args.rd then - args.rd = self:register() - elseif c == 's' and not args.rs then - args.rs = self:register() - elseif c == 't' and not args.rt then - args.rt = self:register() - elseif c == 'D' and not args.fd then - args.fd = self:register(data.fpu_registers) - elseif c == 'S' and not args.fs then - args.fs = self:register(data.fpu_registers) - elseif c == 'T' and not args.ft then - args.ft = self:register(data.fpu_registers) - elseif c == 'X' and not args.rd then - args.rd = self:register(data.sys_registers) - elseif c == 'Y' and not args.rs then - args.rs = self:register(data.sys_registers) - elseif c == 'Z' and not args.rt then - args.rt = self:register(data.sys_registers) - elseif c == 'o' and not args.offset then - args.offset = Token(self:const()):set('signed') - elseif c == 'r' and not args.offset then - args.offset = Token(self:const('relative')):set('signed') - elseif c == 'i' and not args.immediate then - args.immediate = self:const(nil, 'no label') - elseif c == 'I' and not args.index then - args.index = Token(self:const()):set('index') - elseif c == 'k' and not args.immediate then - args.immediate = Token(self:const(nil, 'no label')):set('negate') - elseif c == 'K' and not args.immediate then - args.immediate = Token(self:const(nil, 'no label')):set('signed') - elseif c == 'b' and not args.base then - args.base = self:deref() - else - error('Internal Error: invalid input formatting string') - end - if c2:find('[dstDSTorIikKXYZ]') then - self:optional_comma() - end - end - return args -end - -function Parser:format_out_raw(outformat, first, args, const, formatconst) - local lookup = { - [1]=self.dumper.add_instruction_j, - [3]=self.dumper.add_instruction_i, - [5]=self.dumper.add_instruction_r, - } - local out = {} - for i=1,#outformat do - local c = outformat:sub(i, i) - if c == 'd' then - out[#out+1] = self:token(args.rd) - elseif c == 's' then - out[#out+1] = self:token(args.rs) - elseif c == 't' then - out[#out+1] = self:token(args.rt) - elseif c == 'D' then - out[#out+1] = self:token(args.fd) - elseif c == 'S' then - out[#out+1] = self:token(args.fs) - elseif c == 'T' then - out[#out+1] = self:token(args.ft) - elseif c == 'o' then - out[#out+1] = self:token(args.offset) - elseif c == 'i' then - out[#out+1] = self:token(args.immediate) - elseif c == 'I' then - out[#out+1] = self:token(args.index) - elseif c == 'b' then - out[#out+1] = self:token(args.base) - elseif c == '0' then - out[#out+1] = self:token(0) - elseif c == 'C' then - out[#out+1] = self:token(const) - elseif c == 'F' then - out[#out+1] = self:token(formatconst) - end - end - local f = lookup[#outformat] - if f == nil then - error('Internal Error: invalid output formatting string') - end - f(self.dumper, self.fn, self.line, first, out[1], out[2], out[3], out[4], out[5]) -end - -function Parser:format_out(t, args) - self:format_out_raw(t[3], t[1], args, t[4], t[5]) -end - -function Parser:instruction() - local name = self.tok - local h = data.instructions[name] - self:advance() - - if h == nil then - error('Internal Error: undefined instruction') - elseif overrides[name] then - overrides[name](self, name) - elseif h[2] == 'tob' then -- TODO: or h[2] == 'Tob' then - -- handle all the addressing modes for lw/sw-like instructions - local lui = data.instructions['LUI'] - local addu = data.instructions['ADDU'] - local args = {} - args.rt = self:register() - self:optional_comma() - if self.tt == 'OPEN' then - args.offset = 0 - args.base = self:deref() - else -- NUM or LABELSYM - local lui_args = {} - local addu_args = {} - local o = self:const() - if self.tt == 'NUM' then - o:set('offset', self:const().tok) - end - args.offset = self:token(o) - if not o.portion then - args.offset:set('portion', 'lower') - end - if not o.portion and (o.tt == 'LABELSYM' or o.tok >= 0x80000000) then - lui_args.immediate = Token(o):set('portion', 'upperoff') - lui_args.rt = 'AT' - self:format_out(lui, lui_args) - if not self:is_EOL() then - addu_args.rd = 'AT' - addu_args.rs = 'AT' - addu_args.rt = self:deref() - self:format_out(addu, addu_args) - end - args.base = 'AT' - else - args.base = self:deref() - end - end - self:format_out(h, args) - elseif h[2] ~= nil then - local args = self:format_in(h[2]) - self:format_out(h, args) - else - self:error('unimplemented instruction') - end - self:expect_EOL() end function Parser:tokenize(asm) - self.i = 0 - - local routine = coroutine.create(function() - local lexer = Lexer(asm, self.main_fn, self.options) - lexer:lex(coroutine.yield) - end) - + local lexer = Lexer(asm, self.main_fn, self.options) local tokens = {} - while true do - local ok, a, b, c, d = coroutine.resume(routine) - if not ok then - a = a or 'Internal Error: lexer coroutine has stopped' - error(a) - end - assert(a, 'Internal Error: missing token') - local t = Token(c, d, a, b) - insert(tokens, t) - - if t.tt == 'EOF' and t.fn == self.main_fn then - break - end + local loop = true + while loop do + lexer:lex(function(tt, tok, fn, line) + assert(tt, 'Internal Error: missing token') + local t = Token(fn, line, tt, tok) + insert(tokens, t) + -- don't break if this is an included file's EOF + if tt == 'EOF' and fn == self.main_fn then + loop = false + end + end) end - local preproc = Preproc(self.options) - self.tokens = preproc:process(tokens) + -- the lexer guarantees an EOL and EOF for a blank file + assert(#tokens > 0, 'Internal Error: no tokens after preprocessing') - assert(#self.tokens > 0, 'Internal Error: no tokens after preprocessing') + local collector = Collector(self.options) + self.statements = collector:collect(tokens, self.main_fn) +end + +function Parser:debug_dump() + for i, s in ipairs(self.statements) do + local values = '' + for j, v in ipairs(s) do + local tok = v.tok + if type(tok) == 'number' then + tok = ("$%X"):format(tok) + end + values = values..'\t'..v.tt..'('..tostring(tok)..')' + end + values = values:sub(2) + print(s.line, s.type, values) + end end function Parser:parse(asm) self:tokenize(asm) - self:advance() - while true do - if self.tt == 'EOF' then - if self.fn == self.main_fn then - break - end - self:advance() - elseif self.tt == 'EOL' then - -- empty line - self:advance() - elseif self.tt == 'DIR' then - self:directive() - elseif self.tt == 'LABEL' then - self.dumper:add_label(self.tok) - self:advance() - elseif self.tt == 'INSTR' then - self:instruction() - else - self:error('unexpected token (unknown instruction?)') - end - end + + if self.options.debug_token then self:debug_dump() end + + local preproc = Preproc(self.options) + self.statements = preproc:process(self.statements) + self.statements = preproc:expand(self.statements) + + if self.options.debug_pre then self:debug_dump() end + + local dumper = Dumper(self.writer, self.options) + self.statements = dumper:load(self.statements) + + if self.options.debug_dump then self:debug_dump() end + if self.options.labels then - self.dumper:export_labels(self.options.labels) + dumper:export_labels(self.options.labels) end - return self.dumper:dump() + return dumper:dump() end return Parser diff --git a/Lua/lib/lips/Preproc.lua b/Lua/lib/lips/Preproc.lua index 89116e8..7a09e22 100644 --- a/Lua/lib/lips/Preproc.lua +++ b/Lua/lib/lips/Preproc.lua @@ -1,9 +1,10 @@ local insert = table.insert -local data = require "lips.data" -local util = require "lips.util" -local Muncher = require "lips.Muncher" -local Token = require "lips.Token" +local path = string.gsub(..., "[^.]+$", "") +local data = require(path.."data") +local overrides = require(path.."overrides") +local Statement = require(path.."Statement") +local Reader = require(path.."Reader") local abs = math.abs @@ -19,98 +20,41 @@ local function signs(s) end end -local function RelativeLabel(index, name) - return { - index = index, - name = name, - } -end - -local Preproc = util.Class(Muncher) +local Preproc = Reader:extend() function Preproc:init(options) self.options = options or {} end -function Preproc:process(tokens) - self.tokens = tokens - - local defines = {} - local plus_labels = {} -- constructed forwards - local minus_labels = {} -- constructed backwards - - -- first pass: resolve unary ops, defines, and collect relative labels - local new_tokens = {} - self.i = 0 - while self.i < #self.tokens do - local t = self:advance() - local sign = 1 - if t.tt == 'UNARY' then - sign = t.tok - local peek = self.tokens[self.i + 1] - if peek.tt == 'UNARY' then - self:error('unary operators cannot be chained') - elseif peek.tt == 'EOL' or peek.tt == 'SEP' then - t.tt = 'RELLABELSYM' - t.tok = sign == 1 and '+' or sign == -1 and '-' - elseif peek.tt == 'DEFSYM' then - t = self:advance() - else - self:error('expected a symbolic constant after unary operator') - end +function Preproc:lookup(t) + if t.tt == 'VARSYM' then + local name = t.tok + t.tt = 'NUM' + t.tok = self.variables[name] + if t.tok == nil then + self:error('undefined variable', name) end - if t.tt == nil then - error('Internal Error: missing token') - elseif t.tt == 'DEF' then - local t2 = self:advance() - if t2.tt ~= 'NUM' then - self:error('expected number for define') - end - defines[t.tok] = t2.tok - elseif t.tt == 'DEFSYM' then - local tt = 'NUM' - local tok = defines[t.tok] - if tok == nil then - self:error('undefined define') -- uhhh nice wording - end - insert(new_tokens, self:token(tt, tok * sign)) - elseif t.tt == 'RELLABEL' then - local label = t.tok or '' - local rl = RelativeLabel(#new_tokens + 1, label:sub(2)) - if label:sub(1, 1) == '+' then - insert(plus_labels, rl) - elseif label:sub(1, 1) == '-' then - insert(minus_labels, 1, rl) - else - error('Internal Error: unexpected token for relative label') - end - insert(new_tokens, t) - else - insert(new_tokens, t) - end - end - - -- second pass: resolve relative labels - for i, t in ipairs(new_tokens) do - self.fn = t.fn - self.line = t.line + elseif self.do_labels and t.tt == 'RELLABELSYM' or t.tt == 'RELLABEL' then if t.tt == 'RELLABEL' then t.tt = 'LABEL' -- exploits the fact that user labels can't begin with a number local name = t.tok:sub(2) - t.tok = tostring(i)..name + t.tok = tostring(self.i)..name elseif t.tt == 'RELLABELSYM' then + local i = self.i t.tt = 'LABELSYM' local rel = signs(t.tok) - if rel == 0 then - error('Internal Error: relative label without signs') - end + assert(rel ~= 0, 'Internal Error: relative label without signs') + local name = t.tok:sub(abs(rel) + 1) local seen = 0 - -- TODO: don't iterate over *every* label, just the ones nearby + -- TODO: don't iterate over *every* label, just the ones nearby. + -- we could do this by popping labels as we pass over them. + -- (would need to iterate once forwards and once backwards + -- for plus and minus labels respectively) if rel > 0 then - for _, rl in ipairs(plus_labels) do + for _, rl in ipairs(self.plus_labels) do if rl.name == name and rl.index > i then seen = seen + 1 if seen == rel then @@ -120,7 +64,7 @@ function Preproc:process(tokens) end end else - for _, rl in ipairs(minus_labels) do + for _, rl in ipairs(self.minus_labels) do if rl.name == name and rl.index < i then seen = seen - 1 if seen == rel then @@ -132,50 +76,169 @@ function Preproc:process(tokens) end if seen ~= rel then - self:error('could not find appropriate relative label') + self:error('could not find appropriate relative label', t.tok) end end + else + return false + end + return true +end + +function Preproc:check(s, i, tt) + s = s or self.s + i = i or self.i + local t = s[i] + if t == nil then + self:error("expected another argument") end - self.tokens = new_tokens - new_tokens = {} + self.fn = t.fn + self.line = t.line - -- third pass: resolve specials - self.i = 0 - while self.i < #self.tokens do - local t = self:advance() - if t.tt == 'SPECIAL' then - local name, args = self:special() - -- TODO: split to its own file, not unlike overrides.lua - if name == 'hi' then - if #args ~= 1 then - self:error('%hi expected exactly one argument') + if t.tt ~= tt then + self:lookup(t) + end + + if t.tt ~= tt then + local err = ("argument %i of %s expected type %s"):format(i, s.type, tt) + self:error(err, t.tt) + end + return t.tok +end + +function Preproc:process(statements) + self.statements = statements + + self.variables = {} + self.plus_labels = {} -- constructed forwards + self.minus_labels = {} -- constructed backwards + self.do_labels = false + + -- first pass: resolve variables and collect relative labels + local new_statements = {} + for i=1, #self.statements do + local s = self.statements[i] + self.fn = s.fn + self.line = s.line + if s.type:sub(1, 1) == '!' then + -- directive, label, etc. + if s.type == '!VAR' then + local a = self:check(s, 1, 'VAR') + local b = self:check(s, 2, 'NUM') + self.variables[a] = b + elseif s.type == '!LABEL' then + if s[1].tt == 'RELLABEL' then + local label = s[1].tok + local rl = { + index = #new_statements + 1, + name = label:sub(2) + } + local c = label:sub(1, 1) + if c == '+' then + insert(self.plus_labels, rl) + elseif c == '-' then + insert(self.minus_labels, 1, rl) -- remember, it's backwards + else + error('Internal Error: unexpected token for relative label') + end end - local tnew = self:token(args[1]):set('portion', 'upperoff') - insert(new_tokens, tnew) - elseif name == 'up' then - if #args ~= 1 then - self:error('%up expected exactly one argument') - end - local tnew = self:token(args[1]):set('portion', 'upper') - insert(new_tokens, tnew) - elseif name == 'lo' then - if #args ~= 1 then - self:error('%lo expected exactly one argument') - end - local tnew = self:token(args[1]):set('portion', 'lower') - insert(new_tokens, tnew) + insert(new_statements, s) else - self:error('unknown special') + insert(new_statements, s) end else - insert(new_tokens, t) + -- regular instruction + for j, t in ipairs(s) do + self:lookup(t) + end + insert(new_statements, s) end end - self.tokens = new_tokens + -- second pass: resolve relative labels + self.do_labels = true + for i=1, #new_statements do + self.i = i -- make visible to :lookup + local s = new_statements[i] + self.fn = s.fn + self.line = s.line + for j, t in ipairs(s) do + self:lookup(t) + end + end - return self.tokens + return new_statements +end + +function Preproc:statement(...) + self.fn = self.s.fn + self.line = self.s.line + local s = Statement(self.fn, self.line, ...) + return s +end + +function Preproc:push(s) + s:validate() + insert(self.statements, s) +end + +function Preproc:push_new(...) + self:push(self:statement(...)) +end + +function Preproc:pop(kind) + local ret + if kind == nil then + -- noop + elseif kind == 'CPU' then + ret = self:register(data.registers) + elseif kind == 'DEREF' then + ret = self:deref() + elseif kind == 'CONST' then + ret = self:const() + elseif kind == 'REL' then + ret = self:const('REL') + elseif kind == 'END' then + if self.s[self.i + 1] ~= nil then + self:error('too many arguments') + end + return -- don't increment self.i past end of arguments + else + error('Internal Error: unknown kind, got '..tostring(kind)) + end + self.i = self.i + 1 + return ret +end + +function Preproc:expand(statements) + -- third pass: expand pseudo-instructions + self.statements = {} + for i=1, #statements do + local s = statements[i] + self.s = s + self.fn = s.fn + self.line = s.line + if s.type:sub(1, 1) == '!' then + self:push(s) + else + local name = s.type + local h = data.instructions[name] + if h == nil then + error('Internal Error: unknown instruction') + end + + if overrides[name] then + self.i = 1 + overrides[name](self, name) + self:pop('END') + else + self:push(s) + end + end + end + + return self.statements end return Preproc diff --git a/Lua/lib/lips/Reader.lua b/Lua/lib/lips/Reader.lua new file mode 100644 index 0000000..8c31484 --- /dev/null +++ b/Lua/lib/lips/Reader.lua @@ -0,0 +1,93 @@ +local path = string.gsub(..., "[^.]+$", "") +local Base = require(path.."Base") +local Token = require(path.."Token") + +local Reader = Base:extend() +-- no base init method + +-- Reader expects self.s to be set to a statement, and self.i to a token index + +function Reader:error(msg, got) + if got ~= nil then + msg = msg..', got '..tostring(got) + end + error(('%s:%d: Error: %s'):format(self.fn, self.line, msg), 2) +end + +function Reader:token(t, ...) + local new + if type(t) == 'table' then + new = Token(t, ...) + else + new = Token(self.fn, self.line, t, ...) + end + return new +end + +function Reader:expect(tts) + local t = self.s[self.i] + if t == nil then + self:error("expected another argument") -- TODO: more verbose + end + + self.fn = t.fn + self.line = t.line + + for _, tt in pairs(tts) do + if t.tt == tt then + return t.ok + end + end + + --local err = ("argument %i of %s expected type %s"):format(self.i, self.s.type, tt) + local err = ("unexpected type for argument %i of %s"):format(self.i, self.s.type) + self:error(err, t.tt) +end + +function Reader:register(registers) + self:expect{'REG'} + local t = self.s[self.i] + local numeric = registers[t.tok] + if not numeric then + self:error('wrong type of register') + end + local new = Token(t) + return new +end + +function Reader:const(relative, no_label) + self:expect{'NUM', 'LABELSYM', 'LABELREL'} + local t = self.s[self.i] + -- overrides will want to LUI a label; let portioned labels pass + if no_label and not t.portion then + self:expect{'NUM', 'LABELREL'} + end + local new = Token(t) + if relative then + if t.tt == 'LABELSYM' then + new.tt = 'LABELREL' + elseif t.tt == 'NUM' then + new.tt = 'REL' + end + end + return new +end + +function Reader:deref() + self:expect{'DEREF'} + local t = self.s[self.i] + local new = Token(t) + --new.tt = 'REG' + return new +end + +function Reader:peek(tt) + local t = self.s[self.i] + local seen = t and t.tt or nil + if tt ~= nil then + return seen == tt + end + return t +end + +return Reader diff --git a/Lua/lib/lips/Statement.lua b/Lua/lib/lips/Statement.lua new file mode 100644 index 0000000..384d17a --- /dev/null +++ b/Lua/lib/lips/Statement.lua @@ -0,0 +1,53 @@ +local path = string.gsub(..., "[^.]+$", "") +local util = require(path.."util") +local Base = require(path.."Base") +local Token = require(path.."Token") + +local Statement = Base:extend() +function Statement:init(...) + local args = {...} + if #args == 1 then + local t = args[1] + if util.parent(t) ~= Statement then + error('Internal Error: 1-arg Statement:init expected a Statement', 3) + end + if type(t) == 'table' then + for k, v in pairs(t) do + self[k] = v + end + end + elseif #args >= 3 then + self.fn = args[1] + self.line = args[2] + self.type = args[3] + for i, v in ipairs(args) do + if i > 3 then + self[i - 3] = v + end + end + else + error('Internal Error: Statement:init takes 1 or 3+ arguments', 3) + end + self:validate(1) + return self +end + +function Statement:validate(n) + n = (n or 0) + 3 -- depth for error message + if not self.fn then + error('Internal Error: statements require a filename', n) + end + if not self.line then + error('Internal Error: statements require a line number', n) + end + if not self.type then + error('Internal Error: statement is missing a type', n) + end + for i, v in ipairs(self) do + if util.parent(v) ~= Token then + self[i] = Token(self.fn, self.line, v) + end + end +end + +return Statement diff --git a/Lua/lib/lips/Token.lua b/Lua/lib/lips/Token.lua index 7978fec..482b9f8 100644 --- a/Lua/lib/lips/Token.lua +++ b/Lua/lib/lips/Token.lua @@ -1,6 +1,12 @@ -local util = require "lips.util" +local floor = math.floor -local Token = util.Class() +local path = string.gsub(..., "[^.]+$", "") +local Base = require(path.."Base") +local util = require(path.."util") + +local bitrange = util.bitrange + +local Token = Base:extend() function Token:init(...) local args = {...} if #args == 1 then @@ -34,19 +40,24 @@ function Token:init(...) else error('Internal Error: init takes 1, 3 or 4 arguments', 3) end + self:validate(1) + return self +end + +function Token:validate(n) + n = (n or 0) + 3 -- depth for error message if not self.fn then - error('Internal Error: tokens require a filename', 3) + error('Internal Error: tokens require a filename', n) end if not self.line then - error('Internal Error: tokens require a line number', 3) + error('Internal Error: tokens require a line number', n) end if not self.tt then - error('Internal Error: token is missing a type', 3) + error('Internal Error: token is missing a type', n) end if not self.tok then - error('Internal Error: token is missing a value', 3) + error('Internal Error: token is missing a value', n) end - return self end function Token:set(key, value) @@ -57,4 +68,40 @@ function Token:set(key, value) return self end +function Token:compute() + assert(self.tt == 'NUM', 'Internal Error: cannot compute a non-number token') + local n = self.tok + if self.index then + -- TODO: should this still be here now that we have .base? + n = n % 0x80000000 + n = floor(n/4) + end + if self.negate then + n = -n + end + + if self.portion == 'upper' then + n = bitrange(n, 16, 31) + elseif self.portion == 'lower' then + n = bitrange(n, 0, 15) + elseif self.portion == 'upperoff' then + local upper = bitrange(n, 16, 31) + local lower = bitrange(n, 0, 15) + if lower >= 0x8000 then + -- accommodate for offsets being signed + upper = (upper + 1) % 0x10000 + end + n = upper + end + + if self.negate or self.signed then + if n >= 0x10000 or n < -0x8000 then + return n, 'value out of range' + end + n = n % 0x10000 + end + + return n +end + return Token diff --git a/Lua/lib/lips/data.lua b/Lua/lib/lips/data.lua index 7d880e7..b693848 100644 --- a/Lua/lib/lips/data.lua +++ b/Lua/lib/lips/data.lua @@ -29,7 +29,7 @@ data.fpu_registers = { } data.all_directives = { - 'ORG', 'ALIGN', 'SKIP', + 'ORG', 'BASE', 'ALIGN', 'SKIP', 'ASCII', 'ASCIIZ', 'BYTE', 'HALFWORD', 'WORD', --'HEX', -- excluded here due to different syntax diff --git a/Lua/lib/lips/init.lua b/Lua/lib/lips/init.lua index 74bc83b..9e823b7 100644 --- a/Lua/lib/lips/init.lua +++ b/Lua/lib/lips/init.lua @@ -10,8 +10,9 @@ local lips = { ]], } -local util = require "lips.util" -local Parser = require "lips.Parser" +local path = string.gsub(..., "%.init$", "").."." +local util = require(path.."util") +local Parser = require(path.."Parser") function lips.word_writer() local buff = {} @@ -44,6 +45,17 @@ function lips.assemble(fn_or_asm, writer, options) options = options or {} local function main() + if options.offset then + if options.origin or options.base then + error('offset and origin/base options are mutually exclusive') + end + io.stderr:write('Warning: options.offset is deprecated.\n') + options.origin = options.offset + options.base = 0 + else + options.base = options.base or 0x80000000 + end + local fn = nil local asm if fn_or_asm:find('[\r\n]') then diff --git a/Lua/lib/lips/overrides.lua b/Lua/lib/lips/overrides.lua index 28ec0c0..55f231d 100644 --- a/Lua/lib/lips/overrides.lua +++ b/Lua/lib/lips/overrides.lua @@ -1,21 +1,58 @@ local insert = table.insert -local data = require "lips.data" -local util = require "lips.util" - -local instructions = data.instructions +local path = string.gsub(..., "[^.]+$", "") +local data = require(path.."data") local overrides = {} --- note: "self" is an instance of Parser +-- note: "self" is an instance of Preproc + +local function tob_override(self, name) + -- handle all the addressing modes for lw/sw-like instructions + local rt = self:pop('CPU') + local offset, base + if self:peek('DEREF') then + offset = 0 + base = self:pop('DEREF') + else -- NUM or LABELSYM + local o = self:pop('CONST') + if self:peek('NUM') then + local temp, err = self:pop('CONST'):compute() + if err then + self:error(err, temp) + end + o:set('offset', temp) + end + offset = self:token(o) + if not o.portion then + offset:set('portion', 'lower') + end + -- attempt to use the fewest possible instructions for this offset + if not o.portion and (o.tt == 'LABELSYM' or o.tok >= 0x80000000) then + local immediate = self:token(o):set('portion', 'upperoff') + self:push_new('LUI', 'AT', immediate) + if self.s[self.i] ~= nil then + local reg = self:pop('DEREF'):set('tt', 'REG') + if reg.tok ~= 'R0' then + self:push_new('ADDU', 'AT', 'AT', 'R0') + end + end + base = self:token('DEREF', 'AT') + else + base = self:pop('DEREF') + end + end + self:push_new(name, rt, offset, base) +end + +for k, v in pairs(data.instructions) do + if v[2] == 'tob' then + overrides[k] = tob_override + end +end function overrides.LI(self, name) - local lui = instructions['LUI'] - local ori = instructions['ORI'] - local addiu = instructions['ADDIU'] - local args = {} - args.rt = self:register() - self:optional_comma() - local im = self:const() + local rt = self:pop('CPU') + local im = self:pop('CONST') -- for us, this is just semantics. for a "real" assembler, -- LA could add appropriate RELO LUI/ADDIU directives. @@ -24,272 +61,180 @@ function overrides.LI(self, name) end if im.portion then - args.rs = 'R0' - args.immediate = im - self:format_out(addiu, args) + -- FIXME: use appropriate instruction based on portion? + self:push_new('ADDIU', rt, 'R0', im) return end im.tok = im.tok % 0x100000000 if im.tok >= 0x10000 and im.tok <= 0xFFFF8000 then - args.rs = args.rt - args.immediate = self:token(im):set('portion', 'upper') - self:format_out(lui, args) + local rs = rt + local immediate = self:token(im):set('portion', 'upper') + self:push_new('LUI', rt, immediate) if im.tok % 0x10000 ~= 0 then - args.immediate = self:token(im):set('portion', 'lower') - self:format_out(ori, args) + local immediate = self:token(im):set('portion', 'lower') + self:push_new('ORI', rt, rs, immediate) end elseif im.tok >= 0x8000 and im.tok < 0x10000 then - args.rs = 'R0' - args.immediate = self:token(im):set('portion', 'lower') - self:format_out(ori, args) + local immediate = self:token(im):set('portion', 'lower') + self:push_new('ORI', rt, 'R0', immediate) else - args.rs = 'R0' - args.immediate = self:token(im):set('portion', 'lower') - self:format_out(addiu, args) + local immediate = self:token(im):set('portion', 'lower') + self:push_new('ADDIU', rt, 'R0', immediate) end end function overrides.LA(self, name) - local lui = instructions['LUI'] - local addiu = instructions['ADDIU'] - local args = {} - args.rt = self:register() - self:optional_comma() - local im = self:const() + local rt = self:pop('CPU') + local im = self:pop('CONST') - args.rs = args.rt - args.immediate = self:token(im):set('portion', 'upperoff') - self:format_out(lui, args) - args.immediate = self:token(im):set('portion', 'lower') - self:format_out(addiu, args) + local rs = rt + local immediate = self:token(im):set('portion', 'upperoff') + self:push_new('LUI', rt, immediate) + local immediate = self:token(im):set('portion', 'lower') + self:push_new('ADDIU', rt, rt, immediate) end function overrides.PUSH(self, name) - local addi = instructions['ADDI'] - local w = instructions[name == 'PUSH' and 'SW' or 'LW'] - local jr = instructions['JR'] + local w = name == 'PUSH' and 'SW' or 'LW' local stack = {} - while not self:is_EOL() do - if self.tt == 'NUM' then - if self.tok < 0 then - self:error("can't push a negative number of spaces") + for _, t in ipairs(self.s) do + if t.tt == 'NUM' then + if t.tok < 0 then + self:error("can't push a negative number of spaces", t.tok) end - for i=1,self.tok do + for i=1, t.tok do insert(stack, '') end - self:advance() + self:pop() else - insert(stack, self:register()) - end - if not self:is_EOL() then - self:optional_comma() + insert(stack, self:pop('CPU')) end end if #stack == 0 then self:error(name..' requires at least one argument') end - local args = {} if name == 'PUSH' then - args.rt = 'SP' - args.rs = 'SP' - args.immediate = self:token(#stack*4):set('negate') - self:format_out(addi, args) + local immediate = self:token(#stack*4):set('negate') + self:push_new('ADDIU', 'SP', 'SP', immediate) end - args.base = 'SP' for i, r in ipairs(stack) do - args.rt = r if r ~= '' then - args.offset = (i - 1)*4 - self:format_out(w, args) + local offset = (i - 1)*4 + self:push_new(w, r, offset, self:token('DEREF', 'SP')) end end if name == 'JPOP' then - args.rs = 'RA' - self:format_out(jr, args) + self:push_new('JR', 'RA') end if name == 'POP' or name == 'JPOP' then - args.rt = 'SP' - args.rs = 'SP' - args.immediate = #stack*4 - self:format_out(addi, args) + local immediate = #stack * 4 + self:push_new('ADDIU', 'SP', 'SP', immediate) end end overrides.POP = overrides.PUSH overrides.JPOP = overrides.PUSH function overrides.NAND(self, name) - local and_ = instructions['AND'] - local nor = instructions['NOR'] - local args = {} - args.rd = self:register() - self:optional_comma() - args.rs = self:register() - self:optional_comma() - args.rt = self:register() - self:format_out(and_, args) - args.rs = args.rd - args.rt = 'R0' - self:format_out(nor, args) + local rd = self:pop('CPU') + local rs = self:pop('CPU') + local rt = self:pop('CPU') + self:push_new('AND', rd, rs, rt) + local rs = rd + local rt = 'R0' + self:push_new('NOR', rd, rs, rt) end function overrides.NANDI(self, name) - local andi = instructions['ANDI'] - local nor = instructions['NOR'] - local args = {} - args.rt = self:register() - self:optional_comma() - args.rs = self:register() - self:optional_comma() - args.immediate = self:const() - self:format_out(andi[3], andi[1], args, andi[4], andi[5]) - args.rd = args.rt - args.rs = args.rt - args.rt = 'R0' - self:format_out(nor[3], nor[1], args, nor[4], nor[5]) + local rt = self:pop('CPU') + local rs = self:pop('CPU') + local immediate = self:pop('CONST') + self:push_new('ANDI', rt, rs, immediate) + local rd = rt + local rs = rt + local rt = 'R0' + self:push_new('NOR', rd, rs, rt) end function overrides.NORI(self, name) - local ori = instructions['ORI'] - local nor = instructions['NOR'] - local args = {} - args.rt = self:register() - self:optional_comma() - args.rs = self:register() - self:optional_comma() - args.immediate = self:const() - self:format_out(ori, args) - args.rd = args.rt - args.rs = args.rt - args.rt = 'R0' - self:format_out(nor, args) + local rt = self:pop('CPU') + local rs = self:pop('CPU') + local immediate = self:pop('CONST') + self:push_new('ORI', rt, rs, immediate) + local rd = rt + local rs = rt + local rt = 'R0' + self:push_new('NOR', rd, rs, rt) end function overrides.ROL(self, name) - local sll = instructions['SLL'] - local srl = instructions['SRL'] - local or_ = instructions['OR'] - local args = {} - local left = self:register() - self:optional_comma() - args.rt = self:register() - self:optional_comma() - args.immediate = self:const() - args.rd = left - if args.rd == 'AT' or args.rt == 'AT' then - self:error('registers cannot be AT in this pseudo-instruction') - end - if args.rd == args.rt and args.rd ~= 'R0' then - self:error('registers cannot be the same') - end - self:format_out(sll, args) - args.rd = 'AT' - args.immediate = 32 - args.immediate[2] - self:format_out(srl, args) - args.rd = left - args.rs = left - args.rt = 'AT' - self:format_out(or_, args) + -- FIXME + local rd, rs, rt + local left = self:pop('CPU') + rt = self:pop('CPU') + local immediate = self:pop('CONST') + error('Internal Error: unimplemented') end function overrides.ROR(self, name) - local sll = instructions['SLL'] - local srl = instructions['SRL'] - local or_ = instructions['OR'] - local args = {} - local right = self:register() - self:optional_comma() - args.rt = self:register() - self:optional_comma() - args.immediate = self:const() - args.rd = right - if args.rt == 'AT' or args.rd == 'AT' then - self:error('registers cannot be AT in a pseudo-instruction that uses AT') - end - if args.rd == args.rt and args.rd ~= 'R0' then - self:error('registers cannot be the same') - end - self:format_out(srl, args) - args.rd = 'AT' - args.immediate = 32 - args.immediate[2] - self:format_out(sll, args) - args.rd = right - args.rs = right - args.rt = 'AT' - self:format_out(or_, args) + -- FIXME + local right = self:pop('CPU') + local rt = self:pop('CPU') + local immediate = self:pop('CONST') + error('Internal Error: unimplemented') end function overrides.JR(self, name) - local jr = instructions['JR'] - local args = {} - if self:is_EOL() then - args.rs = 'RA' - else - args.rs = self:register() - end - self:format_out(jr, args) + local rs = self:peek() and self:pop('CPU') or 'RA' + self:push_new('JR', rs) end local branch_basics = { - BEQI = "BEQ", - BGEI = "BEQ", - BGTI = "BEQ", - BLEI = "BNE", - BLTI = "BNE", - BNEI = "BNE", - BEQIL = "BEQL", - BGEIL = "BEQL", - BGTIL = "BEQL", - BLEIL = "BNEL", - BLTIL = "BNEL", - BNEIL = "BNEL", + BEQI = 'BEQ', + BGEI = 'BEQ', + BGTI = 'BEQ', + BLEI = 'BNE', + BLTI = 'BNE', + BNEI = 'BNE', + BEQIL = 'BEQL', + BGEIL = 'BEQL', + BGTIL = 'BEQL', + BLEIL = 'BNEL', + BLTIL = 'BNEL', + BNEIL = 'BNEL', } function overrides.BEQI(self, name) - local addiu = instructions['ADDIU'] - local branch = instructions[branch_basics[name]] - local args = {} - local reg = self:register() - self:optional_comma() - args.immediate = self:const() - self:optional_comma() - args.offset = self:token(self:const('relative')):set('signed') + local branch = branch_basics[name] + local reg = self:pop('CPU') + local immediate = self:pop('CONST') + local offset = self:pop('REL'):set('signed') if reg == 'AT' then self:error('register cannot be AT in this pseudo-instruction') end - args.rt = 'AT' - args.rs = 'R0' - self:format_out(addiu, args) + self:push_new('ADDIU', 'AT', 'R0', immediate) - args.rs = reg - self:format_out(branch, args) + self:push_new(branch, reg, 'AT', offset) end overrides.BNEI = overrides.BEQI overrides.BEQIL = overrides.BEQI overrides.BNEIL = overrides.BEQI function overrides.BLTI(self, name) - local slti = instructions['SLTI'] - local branch = instructions[branch_basics[name]] - local args = {} - args.rs = self:register() - self:optional_comma() - args.immediate = self:const() - self:optional_comma() - args.offset = self:token(self:const('relative')):set('signed') + local branch = branch_basics[name] + local reg = self:pop('CPU') + local immediate = self:pop('CONST') + local offset = self:pop('REL'):set('signed') - if args.rs == 'AT' then + if reg == 'AT' then self:error('register cannot be AT in this pseudo-instruction') end - args.rt = 'AT' - self:format_out(slti, args) + self:push_new('SLTI', 'AT', reg, immediate) - args.rs = 'AT' - args.rt = 'R0' - self:format_out(branch, args) + self:push_new(branch, 'R0', 'AT', offset) end overrides.BGEI = overrides.BLTI overrides.BLTIL = overrides.BLTI @@ -297,40 +242,29 @@ overrides.BGEIL = overrides.BLTI function overrides.BLEI(self, name) -- TODO: this can probably be optimized - local addiu = instructions['ADDIU'] - local slt = instructions['SLT'] - local branch = instructions[branch_basics[name]] - local beq = instructions['BEQ'] - local args = {} - local reg = self:register() - self:optional_comma() - args.immediate = self:const() - self:optional_comma() - local offset = self:token(self:const('relative')):set('signed') + local branch = branch_basics[name] + local reg = self:pop('CPU') + local immediate = self:pop('CONST') + local offset = self:pop('REL'):set('signed') if reg == 'AT' then self:error('register cannot be AT in this pseudo-instruction') end - args.rt = 'AT' - args.rs = 'R0' - self:format_out(addiu, args) + self:push_new('ADDIU', 'AT', 'R0', immediate) + local beq_offset if name == 'BLEI' then - args.offset = offset + beq_offset = offset else - args.offset = 2 -- branch to delay slot of the next branch + -- FIXME: this probably isn't correct for branch-likely instructions + beq_offset = 2 -- branch to delay slot of the next branch end - args.rs = reg - self:format_out(beq, args) + self:push_new('BEQ', reg, 'R0', beq_offset) - args.rd = 'AT' - self:format_out(slt, args) + self:push_new('SLT', 'AT', reg, immediate) - args.rs = 'AT' - args.rt = 'R0' - args.offset = offset - self:format_out(branch, args) + self:push_new(branch, 'AT', 'R0', offset) end overrides.BGTI = overrides.BLEI overrides.BLEIL = overrides.BLEI diff --git a/Lua/lib/lips/util.lua b/Lua/lib/lips/util.lua index fabd7e9..4b80607 100644 --- a/Lua/lib/lips/util.lua +++ b/Lua/lib/lips/util.lua @@ -1,21 +1,6 @@ local floor = math.floor local open = io.open -local function Class(inherit) - local class = {} - local mt_obj = {__index = class} - local mt_class = { - __call = function(self, ...) - local obj = setmetatable({}, mt_obj) - obj:init(...) - return obj - end, - __index = inherit, - } - - return setmetatable(class, mt_class) -end - local function readfile(fn, binary) local mode = binary and 'rb' or 'r' local f = open(fn, mode) @@ -32,8 +17,51 @@ local function bitrange(x, lower, upper) return floor(x/2^lower) % 2^(upper - lower + 1) end +local function parent(t) + local mt = getmetatable(t) + if mt == nil then + return nil + end + return mt.__index +end + +-- http://stackoverflow.com/a/9279009 +local loadcode +if setfenv and loadstring then -- 5.1, JIT + loadcode = function(code, environment) + local f = assert(loadstring(code)) + setfenv(f, environment) + return f + end +else -- 5.2, 5.3 + loadcode = function(code, environment) + return assert(load(code, nil, 't', environment)) + end +end + +local function measure_data(s) + assert(s and s.type == '!DATA', 'Internal Error: expected !DATA statement') + local n = 0 + for i, t in ipairs(s) do + if t.tt == 'LABEL' then + n = n + 4 + elseif t.tt == 'WORDS' then + n = n + #t.tok * 4 + elseif t.tt == 'HALFWORDS' then + n = n + #t.tok * 2 + elseif t.tt == 'BYTES' then + n = n + #t.tok * 1 + else + error('Internal Error: unknown data type in !DATA') + end + end + return n +end + return { - Class = Class, readfile = readfile, bitrange = bitrange, + parent = parent, + loadcode = loadcode, + measure_data = measure_data, }