1
0
Fork 0
mirror of https://github.com/notwa/lips synced 2024-05-02 17:43:23 -07:00

refactor Muncher into separate TokenIter class

this also fixes lexing the EOL after an include directive.
This commit is contained in:
Connor Olding 2016-11-27 05:52:45 -08:00
parent e1ae1fdd64
commit 0d1527a773
4 changed files with 217 additions and 168 deletions

7
TODO
View File

@ -1,3 +1,10 @@
unify/optimize ascii/asciiz/byte/halfword/word into BIN directives
directive aliases, are these right?
DB = 'BYTE',
DH = 'HALFWORD',
DW = 'WORD',
add basic command-line interface (patch.lua) add basic command-line interface (patch.lua)
add macros add macros

View File

@ -1,11 +1,17 @@
local insert = table.insert local insert = table.insert
local path = string.gsub(..., "[^.]+$", "") local path = string.gsub(..., "[^.]+$", "")
local Base = require(path.."Base")
local Token = require(path.."Token") local Token = require(path.."Token")
local TokenIter = require(path.."TokenIter")
local Statement = require(path.."Statement") local Statement = require(path.."Statement")
local Muncher = require(path.."Muncher")
local arg_types = { -- for instructions local Collector = Base:extend()
function Collector:init(options)
self.options = options or {}
end
Collector.arg_types = { -- for instructions
NUM = true, NUM = true,
REG = true, REG = true,
VARSYM = true, VARSYM = true,
@ -13,17 +19,14 @@ local arg_types = { -- for instructions
RELLABELSYM = true, RELLABELSYM = true,
} }
local Collector = Muncher:extend()
function Collector:init(options)
self.options = options or {}
end
function Collector:statement(...) function Collector:statement(...)
local s = Statement(self.fn, self.line, ...) local I = self.iter
local s = Statement(I.fn, I.line, ...)
return s return s
end end
function Collector:push_data(datum, size) function Collector:push_data(datum, size)
local I = self.iter
--[[ pseudo-example: --[[ pseudo-example:
Statement{type='!DATA', Statement{type='!DATA',
{tt='BYTES', tok={0, 1, 2}}, {tt='BYTES', tok={0, 1, 2}},
@ -35,9 +38,10 @@ function Collector:push_data(datum, size)
-- FIXME: optimize the hell out of this garbage, preferably in the lexer -- FIXME: optimize the hell out of this garbage, preferably in the lexer
-- TODO: consider not scrunching data statements, just their tokens -- TODO: consider not scrunching data statements, just their tokens
-- TODO: concatenate strings; use !BIN instead of !DATA
if type(datum) == 'number' then if type(datum) == 'number' then
datum = self:token(datum) datum = I:token(datum)
end end
local last_statement = self.statements[#self.statements] local last_statement = self.statements[#self.statements]
@ -59,13 +63,13 @@ function Collector:push_data(datum, size)
insert(s, datum) insert(s, datum)
return return
else else
self:error('labels are too large to be used in this directive') I:error('labels are too large to be used in this directive')
end end
elseif datum.tt == 'VARSYM' then elseif datum.tt == 'VARSYM' then
insert(s, datum:set('size', size)) insert(s, datum:set('size', size))
return return
elseif datum.tt ~= 'NUM' then elseif datum.tt ~= 'NUM' then
self:error('unsupported data type', datum.tt) I:error('unsupported data type', datum.tt)
end end
local sizes = size..'S' local sizes = size..'S'
@ -75,77 +79,67 @@ function Collector:push_data(datum, size)
if last_token and last_token.tt == sizes then if last_token and last_token.tt == sizes then
t = last_token t = last_token
else else
t = self:token(sizes, {}) t = I:token(sizes, {})
insert(s, t) insert(s, t)
s:validate() s:validate()
end end
insert(t.tok, datum.tok) insert(t.tok, datum.tok)
end end
function Collector:variable() function Collector:directive(name)
local t = self.t local I = self.iter
local t2 = self:advance()
local s = self:statement('!VAR', t, t2)
insert(self.statements, s)
self:advance()
end
function Collector:directive()
local name = self.tok
self:advance()
local function add(kind, ...) local function add(kind, ...)
insert(self.statements, self:statement('!'..kind, ...)) insert(self.statements, self:statement('!'..kind, ...))
end end
if name == 'ORG' or name == 'BASE' then if name == 'ORG' or name == 'BASE' then
add(name, self:const(nil, 'no labels')) add(name, I:const(nil, 'no labels'))
elseif name == 'PUSH' or name == 'POP' then elseif name == 'PUSH' or name == 'POP' then
add(name, self:const()) add(name, I:const())
while not self:is_EOL() do while not I:is_EOL() do
self:optional_comma() I:eat_comma()
add(name, self:const()) add(name, I:const())
end end
elseif name == 'ALIGN' or name == 'SKIP' then elseif name == 'ALIGN' or name == 'SKIP' then
if self:is_EOL() and name == 'ALIGN' then if I:is_EOL() and name == 'ALIGN' then
add(name) add(name)
else else
local size = self:const(nil, 'no label') local size = I:const(nil, 'no label')
if self:is_EOL() then if I:is_EOL() then
add(name, size) add(name, size)
else else
self:optional_comma() I:eat_comma()
add(name, size, self:const(nil, 'no label')) add(name, size, I:const(nil, 'no label'))
end end
end end
elseif name == 'BIN' then elseif name == 'BIN' then
-- FIXME: not a real directive, just a workaround -- FIXME: not a real directive, just a workaround
add(name, self:string()) add(name, I:string())
elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then
self:push_data(self:const(), name) self:push_data(I:const(), name)
while not self:is_EOL() do while not I:is_EOL() do
self:optional_comma() I:eat_comma()
self:push_data(self:const(), name) self:push_data(I:const(), name)
end end
elseif name == 'HEX' then elseif name == 'HEX' then
if self.tt ~= 'OPEN' then if I.tt ~= 'OPEN' then
self:error('expected opening brace for hex directive', self.tt) I:error('expected opening brace for hex directive', I.tt)
end end
self:advance() I:next()
while self.tt ~= 'CLOSE' do while I.tt ~= 'CLOSE' do
if self.tt == 'EOL' then if I.tt == 'EOL' then
self:advance() I:next()
else else
self:push_data(self:const(), 'BYTE') self:push_data(I:const(), 'BYTE')
end end
end end
self:advance() I:next()
elseif name == 'INC' or name == 'INCBIN' then elseif name == 'INC' or name == 'INCBIN' then
-- noop, handled by lexer -- noop, handled by lexer
self:string() I:string()
return -- don't expect EOL
elseif name == 'ASCII' or name == 'ASCIIZ' then elseif name == 'ASCII' or name == 'ASCIIZ' then
local bytes = self:string() local bytes = I:string()
for i, number in ipairs(bytes.tok) do for i, number in ipairs(bytes.tok) do
self:push_data(number, 'BYTE') self:push_data(number, 'BYTE')
end end
@ -153,85 +147,61 @@ function Collector:directive()
self:push_data(0, 'BYTE') self:push_data(0, 'BYTE')
end end
elseif name == 'FLOAT' then elseif name == 'FLOAT' then
self:error('unimplemented directive', name) I:error('unimplemented directive', name)
else else
self:error('unknown directive', name) I:error('unknown directive', name)
end end
self:expect_EOL()
I:expect_EOL()
end end
function Collector:basic_special() function Collector:instruction(name)
local name, args = self:special() local I = self.iter
local s = self:statement(name)
local portion
if name == 'hi' then
portion = 'upperoff'
elseif name == 'up' then
portion = 'upper'
elseif name == 'lo' then
portion = 'lower'
else
self:error('unknown special', name)
end
if #args ~= 1 then
self:error(name..' expected one argument', #args)
end
local t = self:token(args[1]):set('portion', portion)
return t
end
function Collector:instruction()
local s = self:statement(self.tok)
insert(self.statements, s) insert(self.statements, s)
self:advance()
while self.tt ~= 'EOL' do while I.tt ~= 'EOL' do
local t = self.t local t = I.t
if self.tt == 'OPEN' then if I.tt == 'OPEN' then
t = self:deref() insert(s, I:deref())
t.tt = 'DEREF' -- TODO: should just be returned by :deref elseif I.tt == 'UNARY' then
insert(s, t) local peek = assert(I:peek())
elseif self.tt == 'UNARY' then
local peek = self.tokens[self.i + 1]
if peek.tt == 'VARSYM' then if peek.tt == 'VARSYM' then
local negate = t.tok == -1 local negate = t.tok == -1
t = self:advance() t = I:next()
t = Token(t):set('negate', negate) t = Token(t):set('negate', negate)
insert(s, t) insert(s, t)
self:advance() I:next()
elseif peek.tt == 'EOL' or peek.tt == 'SEP' then elseif peek.tt == 'EOL' or peek.tt == 'SEP' then
local tok = t.tok == 1 and '+' or t.tok == -1 and '-' local tok = t.tok == 1 and '+' or t.tok == -1 and '-'
t = Token(self.fn, self.line, 'RELLABELSYM', tok) t = Token(I.fn, I.line, 'RELLABELSYM', tok)
insert(s, t) insert(s, t)
self:advance() I:next()
else else
self:error('unexpected token after unary operator', peek.tt) I:error('unexpected token after unary operator', peek.tt)
end end
elseif self.tt == 'SPECIAL' then elseif I.tt == 'SPECIAL' then
t = self:basic_special() t = I:basic_special()
insert(s, t) insert(s, t)
self:advance() I:next()
elseif self.tt == 'SEP' then elseif I.tt == 'SEP' then
self:error('extraneous comma') I:error('extraneous comma')
elseif not arg_types[self.tt] then elseif not self.arg_types[I.tt] then
self:error('unexpected argument type in instruction', self.tt) I:error('unexpected argument type in instruction', I.tt)
else else
insert(s, t) insert(s, t)
self:advance() I:next()
end end
self:optional_comma() I:eat_comma()
end end
self:expect_EOL() I:expect_EOL()
s:validate() s:validate()
end end
function Collector:collect(tokens, fn) function Collector:collect(tokens, fn)
self.tokens = tokens self.iter = TokenIter(tokens)
self.fn = fn or '(string)' local I = self.iter
self.main_fn = self.fn
self.statements = {} self.statements = {}
@ -245,29 +215,28 @@ function Collector:collect(tokens, fn)
insert(self.statements, s) insert(self.statements, s)
end end
self.i = 0 -- set up Muncher iteration for t in I do
self:advance() -- load up the first token print(t.tt, t.tok)
while true do if t.tt == 'EOF' then
if self.tt == 'EOF' then -- noop
-- don't break if this is an included file's EOF elseif t.tt == 'EOL' then
if self.fn == self.main_fn then -- noop; empty line
break elseif t.tt == 'LABEL' or t.tt == 'RELLABEL' then
end insert(self.statements, self:statement('!LABEL', t))
self:advance() elseif t.tt == 'VAR' then
elseif self.tt == 'EOL' then local t2 = I:next()
-- empty line I:next()
self:advance() local s = self:statement('!VAR', t, t2)
elseif self.tt == 'VAR' then insert(self.statements, s)
self:variable() -- handles advancing I:expect_EOL()
elseif self.tt == 'LABEL' or self.tt == 'RELLABEL' then elseif t.tt == 'DIR' then
insert(self.statements, self:statement('!LABEL', self.t)) I:next()
self:advance() self:directive(t.tok)
elseif self.tt == 'DIR' then elseif t.tt == 'INSTR' then
self:directive() -- handles advancing I:next()
elseif self.tt == 'INSTR' then self:instruction(t.tok)
self:instruction() -- handles advancing
else else
self:error('expected starting token for statement', self.tt) I:error('expected starting token for statement', t.tt)
end end
end end

View File

@ -289,7 +289,7 @@ function Lexer:lex_string_naive(yield) -- no escape sequences
yield('STRING', buff) yield('STRING', buff)
end end
function Lexer:lex_include(_yield) function Lexer:lex_filename(_yield)
self:read_spaces() self:read_spaces()
local fn local fn
self:lex_string_naive(function(tt, tok) self:lex_string_naive(function(tt, tok)
@ -297,6 +297,18 @@ function Lexer:lex_include(_yield)
end) end)
_yield('STRING', fn, self.fn, self.line) _yield('STRING', fn, self.fn, self.line)
if self.chr ~= '\n' then
self:error('expected EOL after filename')
end
_yield('EOL', '\n', self.fn, self.line)
self:nextc()
return fn
end
function Lexer:lex_include(_yield)
local fn = self:lex_filename(_yield)
if self.options.path then if self.options.path then
fn = self.options.path..fn fn = self.options.path..fn
end end
@ -308,12 +320,7 @@ function Lexer:lex_include(_yield)
end end
function Lexer:lex_include_binary(_yield) function Lexer:lex_include_binary(_yield)
self:read_spaces() local fn = self:lex_filename(_yield)
local fn
self:lex_string_naive(function(tt, tok)
fn = tok
end)
_yield('STRING', fn, self.fn, self.line)
-- TODO: allow optional offset and size arguments -- TODO: allow optional offset and size arguments
if self.options.path then if self.options.path then

View File

@ -2,29 +2,71 @@ local format = string.format
local insert = table.insert local insert = table.insert
local path = string.gsub(..., "[^.]+$", "") local path = string.gsub(..., "[^.]+$", "")
local data = require(path.."data")
local Base = require(path.."Base")
local Token = require(path.."Token") local Token = require(path.."Token")
local arg_types = { local Iter = {}
NUM = true, function Iter:__call()
REG = true, return self:next(1)
VARSYM = true, end
LABELSYM = true,
RELLABELSYM = true,
}
local Muncher = Base:extend() local TokenIter = {}
-- no base init method function TokenIter:init(tokens)
assert(tokens ~= nil)
self.tokens = tokens
self:reset()
end
function Muncher:error(msg, got) function TokenIter:error(msg, got)
if got ~= nil then if got ~= nil then
msg = msg..', got '..tostring(got) msg = msg..', got '..tostring(got)
end end
error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2) error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2)
end end
function Muncher:token(t, val) function TokenIter:reset()
self.i = 0
self.tt = nil
self.tok = nil
self.fn = nil
self.line = nil
self.ended = false
end
function TokenIter:advance(n)
n = n or 0
if self.ended then
error('Internal Error: attempted to advance iterator past end', 2 + n)
end
self.i = self.i + 1
self.t = self.tokens[self.i]
if self.t == nil then
self.tt = nil
self.tok = nil
self.fn = nil
self.line = nil
self.ended = true
else
self.tt = self.t.tt
self.tok = self.t.tok
self.fn = self.t.fn
self.line = self.t.line
end
end
function TokenIter:next(n)
n = n or 0
self:advance(n + 1)
if self.t then return self.t end
end
function TokenIter:peek()
return self.tokens[self.i + 1]
end
-- now begins the parsing stuff
function TokenIter:token(t, val)
-- note: call Token directly if you want to specify fn and line manually -- note: call Token directly if you want to specify fn and line manually
if type(t) == 'table' then if type(t) == 'table' then
t.fn = self.fn t.fn = self.fn
@ -37,36 +79,25 @@ function Muncher:token(t, val)
end end
end end
function Muncher:advance() function TokenIter:is_EOL()
self.i = self.i + 1
self.t = self.tokens[self.i]
self.tt = self.t.tt
self.tok = self.t.tok
self.fn = self.t.fn
self.line = self.t.line
return self.t
end
function Muncher:is_EOL()
return self.tt == 'EOL' or self.tt == 'EOF' return self.tt == 'EOL' or self.tt == 'EOF'
end end
function Muncher:expect_EOL() function TokenIter:expect_EOL()
if self:is_EOL() then if self:is_EOL() then
self:advance()
return return
end end
self:error('expected end of line', self.tt) self:error('expected end of line', self.tt)
end end
function Muncher:optional_comma() function TokenIter:eat_comma()
if self.tt == 'SEP' and self.tok == ',' then if self.tt == 'SEP' and self.tok == ',' then
self:advance() self:advance()
return true return true
end end
end end
function Muncher:number() function TokenIter:number()
if self.tt ~= 'NUM' then if self.tt ~= 'NUM' then
self:error('expected number', self.tt) self:error('expected number', self.tt)
end end
@ -75,7 +106,7 @@ function Muncher:number()
return self:token(t) return self:token(t)
end end
function Muncher:string() function TokenIter:string()
if self.tt ~= 'STRING' then if self.tt ~= 'STRING' then
self:error('expected string', self.tt) self:error('expected string', self.tt)
end end
@ -84,7 +115,7 @@ function Muncher:string()
return self:token(t) return self:token(t)
end end
function Muncher:register(registers) function TokenIter:register(registers)
registers = registers or data.registers registers = registers or data.registers
if self.tt ~= 'REG' then if self.tt ~= 'REG' then
self:error('expected register', self.tt) self:error('expected register', self.tt)
@ -97,7 +128,7 @@ function Muncher:register(registers)
return self:token(t) return self:token(t)
end end
function Muncher:deref() function TokenIter:deref()
if self.tt ~= 'OPEN' then if self.tt ~= 'OPEN' then
self:error('expected opening parenthesis for dereferencing', self.tt) self:error('expected opening parenthesis for dereferencing', self.tt)
end end
@ -111,10 +142,10 @@ function Muncher:deref()
self:error('expected closing parenthesis for dereferencing', self.tt) self:error('expected closing parenthesis for dereferencing', self.tt)
end end
self:advance() self:advance()
return self:token(t) return self:token(t):set('tt', 'DEREF')
end end
function Muncher:const(relative, no_label) function TokenIter:const(relative, no_label)
local good = { local good = {
NUM = true, NUM = true,
EXPR = true, EXPR = true,
@ -132,7 +163,7 @@ function Muncher:const(relative, no_label)
return t return t
end end
function Muncher:special() function TokenIter:special()
if self.tt ~= 'SPECIAL' then if self.tt ~= 'SPECIAL' then
self:error('expected special name to call', self.tt) self:error('expected special name to call', self.tt)
end end
@ -163,4 +194,39 @@ function Muncher:special()
return name, args return name, args
end end
return Muncher function TokenIter:basic_special()
local name, args = self:special()
local portion
if name == 'hi' then
portion = 'upperoff'
elseif name == 'up' then
portion = 'upper'
elseif name == 'lo' then
portion = 'lower'
else
self:error('unknown special', name)
end
if #args ~= 1 then
self:error(name..' expected one argument', #args)
end
local t = self:token(args[1]):set('portion', portion)
return t
end
-- TODO: move this boilerplate elsewhere
local MetaBlah = {
__index = TokenIter,
__call = TokenIter.next,
}
local ClassBlah = {}
function ClassBlah:__call(...)
local obj = setmetatable({}, MetaBlah)
return obj, obj:init(...)
end
return setmetatable(TokenIter, ClassBlah)