From 486ccb99af0d3d1bbb177bc28c737bf7a42cad78 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Wed, 20 Apr 2016 16:23:44 -0700 Subject: [PATCH] begin refactor; add statement collection --- lips/Collector.lua | 236 +++++++++++++++++++++++++++++++++++++++++++++ lips/Muncher.lua | 9 +- lips/Parser.lua | 104 +++++--------------- lips/Statement.lua | 53 ++++++++++ lips/util.lua | 24 +++++ 5 files changed, 342 insertions(+), 84 deletions(-) create mode 100644 lips/Collector.lua create mode 100644 lips/Statement.lua diff --git a/lips/Collector.lua b/lips/Collector.lua new file mode 100644 index 0000000..5ce86a5 --- /dev/null +++ b/lips/Collector.lua @@ -0,0 +1,236 @@ +local insert = table.insert +local unpack = unpack or table.unpack + +local path = string.gsub(..., "[^.]+$", "") +local Token = require(path.."Token") +local Statement = require(path.."Statement") +local Muncher = require(path.."Muncher") + +local arg_types = { -- for instructions + NUM = true, + REG = true, + DEFSYM = true, + LABELSYM = true, + RELLABELSYM = true, +} + +local Collector = Muncher:extend() +function Collector:init(options) + self.options = options or {} +end + +function Collector:statement(...) + local s = Statement(self.fn, self.line, ...) + return s +end + +function Collector:format_out(t, args) + self:format_out_raw(t[3], t[1], args, t[4], t[5]) +end + +function Collector:push_data(data, size) + -- FIXME: local 'data' name clashes with lips.data + --[[ pseudo-example: + Statement{type='!DATA', + {tt='BYTES', tok={0, 1, 2}}, + {tt='HALFWORDS', tok={3, 4, 5}}, + {tt='WORDS', tok={6, 7, 8}}, + {tt='LABEL', tok='myLabel'}, + } + --]] + + -- TODO: consider not scrunching data statements, just their tokens + + local last_statement = self.statements[#self.statements] + local s + if last_statement and last_statement.type == '!DATA' then + s = last_statement + else + s = self:statement('!DATA') + insert(self.statements, s) + end + + if type(data) == 'string' and size == 'WORD' then + -- labels will be assembled to words + insert(s, Token('LABEL', data)) + return + end + + if size ~= 'BYTE' and size ~= 'HALFWORD' and size ~= 'WORD' then + error('Internal Error: unknown data size argument') + end + + local sizes = size..'S' + + local last_token = s[#s] + local t + if last_token and last_token.tt == sizes then + t = last_token + else + t = self:token(sizes, {}) + insert(s, t) + s:validate() + end + insert(t.tok, data) +end + +function Collector:variable() + local t = self.t + local t2 = self:advance() + + local s = self:statement('!DEF', t, t2) + insert(self.statements, s) + self:advance() +end + +function Collector:directive() + local name = self.tok + self:advance() + local function add(kind, ...) + insert(self.statements, self:statement('!'..kind, ...)) + end + if name == 'ORG' then + add(name, self:const(false, true)) + elseif name == 'ALIGN' or name == 'SKIP' then + if self:is_EOL() and name == 'ALIGN' then + add(name, self:token('NUM', 0)) + else + local size = self:number() + if self:is_EOL() then + add(name, size) + else + self:optional_comma() + add(name, size, self:number()) + end + self:expect_EOL() + end + elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then + self:push_data(self:const().tok, name) + while not self:is_EOL() do + self:advance() + self:optional_comma() + self:push_data(self:const().tok, name) + end + self:expect_EOL() + elseif name == 'INC' or name == 'INCBIN' then + -- noop, handled by lexer + elseif name == 'ASCII' or name == 'ASCIIZ' then + local bytes = self:string() + for i, number in ipairs(bytes.tok) do + self:push_data(number, 'BYTE') + end + if name == 'ASCIIZ' then + self:push_data(0, 'BYTE') + end + self:expect_EOL() + elseif name == 'FLOAT' then + self:error('unimplemented directive', name) + else + self:error('unknown directive', name) + end +end + +function Collector:basic_special() + local name, args = self:special() + + local portion + if name == 'hi' then + portion = 'upperoff' + elseif name == 'up' then + portion = 'upper' + elseif name == 'lo' then + portion = 'lower' + else + self:error('unknown special', name) + end + + if #args ~= 1 then + self:error(name..' expected one argument', #args) + end + + local t = self:token(args[1]):set('portion', portion) + return t +end + +function Collector:instruction() + local s = self:statement(self.tok) + insert(self.statements, s) + self:advance() + + while self.tt ~= 'EOL' do + local t = self.t + if self.tt == 'OPEN' then + t = self:deref() + t.tt = 'DEREF' -- TODO: should just be returned by :deref + insert(s, t) + elseif self.tt == 'UNARY' then + local peek = self.tokens[self.i + 1] + if peek.tt == 'DEFSYM' then + t = self:advance() + t = Token(t):set('negate') + insert(s, t) + self:advance() + elseif peek.tt == 'EOL' or peek.tt == 'SEP' then + local tok = t.tok == 1 and '+' or t.tok == -1 and '-' + t = Token(self.fn, self.line, 'RELLABELSYM', tok) + insert(s, t) + self:advance() + else + self:error('unexpected token after unary operator', peek.tt) + end + elseif self.tt == 'SPECIAL' then + t = self:basic_special() + insert(s, t) + self:advance() + elseif self.tt == 'SEP' then + self:error('extraneous comma') + elseif not arg_types[self.tt] then + self:error('unexpected argument type in instruction', self.tt) + else + insert(s, t) + self:advance() + end + self:optional_comma() + end + + self:expect_EOL() + s:validate() +end + +function Collector:collect(tokens, fn) + self.tokens = tokens + self.fn = fn or '(string)' + self.main_fn = self.fn + + self.statements = {} + + self.i = 0 -- set up Muncher iteration + self:advance() -- load up the first token + while true do + if self.tt == 'EOF' then + -- don't break if this is an included file's EOF + if self.fn == self.main_fn then + break + end + self:advance() + elseif self.tt == 'EOL' then + -- empty line + self:advance() + elseif self.tt == 'DEF' then + self:variable() -- handles advancing + elseif self.tt == 'LABEL' or self.tt == 'RELLABEL' then + insert(self.statements, self:statement('!LABEL', self.t)) + self:advance() + elseif self.tt == 'DIR' then + self:directive() -- handles advancing + elseif self.tt == 'INSTR' then + self:instruction() -- handles advancing + else + self:error('expected starting token for statement', self.tt) + end + end + + return self.statements +end + +return Collector diff --git a/lips/Muncher.lua b/lips/Muncher.lua index 235fee0..1d8842b 100644 --- a/lips/Muncher.lua +++ b/lips/Muncher.lua @@ -17,7 +17,10 @@ local arg_types = { local Muncher = Base:extend() -- no base init method -function Muncher:error(msg) +function Muncher:error(msg, got) + if got ~= nil then + msg = msg..', got '..tostring(got) + end error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2) end @@ -112,8 +115,8 @@ function Muncher:deref() end function Muncher:const(relative, no_label) - if self.tt ~= 'NUM' and self.tt ~= 'LABELSYM' then - self:error('expected constant') + if self.tt ~= 'NUM' and self.tt ~= 'DEFSYM' and self.tt ~= 'LABELSYM' then + self:error('expected constant', self.tt) end if no_label and self.tt == 'LABELSYM' then self:error('labels are not allowed here') diff --git a/lips/Parser.lua b/lips/Parser.lua index 1129dcf..2a64ddb 100644 --- a/lips/Parser.lua +++ b/lips/Parser.lua @@ -3,13 +3,14 @@ local insert = table.insert local path = string.gsub(..., "[^.]+$", "") local data = require(path.."data") local overrides = require(path.."overrides") +local Base = require(path.."Base") local Token = require(path.."Token") local Lexer = require(path.."Lexer") -local Dumper = require(path.."Dumper") -local Muncher = require(path.."Muncher") +local Collector = require(path.."Collector") local Preproc = require(path.."Preproc") +local Dumper = require(path.."Dumper") -local Parser = Muncher:extend() +local Parser = Base:extend() function Parser:init(writer, fn, options) self.fn = fn or '(string)' self.main_fn = self.fn @@ -17,61 +18,7 @@ function Parser:init(writer, fn, options) self.dumper = Dumper(writer, fn, options) end -function Parser:directive() - local name = self.tok - self:advance() - local function add(...) - self.dumper:add_directive(self.fn, self.line, ...) - end - if name == 'ORG' then - add(name, self:number().tok) - elseif name == 'ALIGN' or name == 'SKIP' then - if self:is_EOL() and name == 'ALIGN' then - add(name, 0) - else - local size = self:number().tok - if self:is_EOL() then - add(name, size) - else - self:optional_comma() - add(name, size, self:number().tok) - end - self:expect_EOL() - end - elseif name == 'BYTE' or name == 'HALFWORD' then - add(name, self:number().tok) - while not self:is_EOL() do - self:advance() - self:optional_comma() - add(name, self:number().tok) - end - self:expect_EOL() - elseif name == 'WORD' then - -- allow labels in word directives - add(name, self:const().tok) - while not self:is_EOL() do - self:advance() - self:optional_comma() - add(name, self:const().tok) - end - self:expect_EOL() - elseif name == 'INC' or name == 'INCBIN' then - -- noop, handled by lexer - elseif name == 'ASCII' or name == 'ASCIIZ' then - local bytes = self:string() - for i, number in ipairs(bytes.tok) do - add('BYTE', number) - end - if name == 'ASCIIZ' then - add('BYTE', 0) - end - self:expect_EOL() - elseif name == 'FLOAT' then - self:error('unimplemented directive') - else - self:error('unknown directive') - end -end +--[[ function Parser:format_in(informat) -- see data.lua for a guide on what all these mean @@ -222,10 +169,9 @@ function Parser:instruction() end self:expect_EOL() end +--]] function Parser:tokenize(asm) - self.i = 0 - local lexer = Lexer(asm, self.main_fn, self.options) local tokens = {} @@ -242,41 +188,37 @@ function Parser:tokenize(asm) end) end + local collector = Collector(self.options) + local statements = collector:collect(tokens, self.main_fn) + + --[[ local preproc = Preproc(self.options) - self.tokens = preproc:process(tokens) + self.statements = preproc:process(statements) -- the lexer guarantees an EOL and EOF for a blank file assert(#self.tokens > 0, 'Internal Error: no tokens after preprocessing') + --]] + self.statements = statements end function Parser:parse(asm) self:tokenize(asm) - self:advance() -- load up the first token - while true do - if self.tt == 'EOF' then - -- don't break if this is an included file's EOF - if self.fn == self.main_fn then - break - end - self:advance() - elseif self.tt == 'EOL' then - -- empty line - self:advance() - elseif self.tt == 'DIR' then - self:directive() -- handles advancing - elseif self.tt == 'LABEL' then - self.dumper:add_label(self.tok) - self:advance() - elseif self.tt == 'INSTR' then - self:instruction() -- handles advancing - else - self:error('unexpected token (unknown instruction?)') + + -- DEBUG + for i, s in ipairs(self.statements) do + local values = '' + for j, v in ipairs(s) do + values = values..'\t'..v.tt end + values = values:sub(2) + print(i, s.type, values) end + --[[ if self.options.labels then self.dumper:export_labels(self.options.labels) end return self.dumper:dump() + --]] end return Parser diff --git a/lips/Statement.lua b/lips/Statement.lua new file mode 100644 index 0000000..6f1f726 --- /dev/null +++ b/lips/Statement.lua @@ -0,0 +1,53 @@ +local path = string.gsub(..., "[^.]+$", "") +local util = require(path.."util") +local Base = require(path.."Base") +local Token = require(path.."Token") + +local Statement = Base:extend() +function Statement:init(...) + local args = {...} + if #args == 1 then + local t = args[1] + if util.parent(t) ~= Statement then + error('Internal Error: 1-arg Statement:init expected a Statement') + end + if type(t) == 'table' then + for k, v in pairs(t) do + self[k] = v + end + end + elseif #args >= 3 then + self.fn = args[1] + self.line = args[2] + self.type = args[3] + for i, v in ipairs(args) do + if i > 3 then + self[i - 3] = v + end + end + else + error('Internal Error: Statement:init takes 1 or 3+ arguments', 3) + end + self:validate(1) + return self +end + +function Statement:validate(n) + n = (n or 0) + 3 -- depth for error message + if not self.fn then + error('Internal Error: statements require a filename', n) + end + if not self.line then + error('Internal Error: statements require a line number', n) + end + if not self.type then + error('Internal Error: statement is missing a type', n) + end + for i, v in ipairs(self) do + if util.parent(v) ~= Token then + error(('Internal Error: Statement[%i] is not a Token'):format(i), n) + end + end +end + +return Statement diff --git a/lips/util.lua b/lips/util.lua index 717fa2d..d2cdcf5 100644 --- a/lips/util.lua +++ b/lips/util.lua @@ -17,7 +17,31 @@ local function bitrange(x, lower, upper) return floor(x/2^lower) % 2^(upper - lower + 1) end +local function parent(t) + local mt = getmetatable(t) + if mt == nil then + return nil + end + return mt.__index +end + +-- http://stackoverflow.com/a/9279009 +local loadcode +if setfenv and loadstring then -- 5.1, JIT + loadcode = function(code, environment) + local f = assert(loadstring(code)) + setfenv(f, environment) + return f + end +else -- 5.2, 5.3 + loadcode = function(code, environment) + return assert(load(code, nil, 't', environment)) + end +end + return { readfile = readfile, bitrange = bitrange, + parent = parent, + loadcode = loadcode, }