mirror of
https://github.com/notwa/lips
synced 2024-11-12 22:29:03 -08:00
begin refactor; add statement collection
This commit is contained in:
parent
c47136442e
commit
486ccb99af
5 changed files with 342 additions and 84 deletions
236
lips/Collector.lua
Normal file
236
lips/Collector.lua
Normal file
|
@ -0,0 +1,236 @@
|
|||
local insert = table.insert
|
||||
local unpack = unpack or table.unpack
|
||||
|
||||
local path = string.gsub(..., "[^.]+$", "")
|
||||
local Token = require(path.."Token")
|
||||
local Statement = require(path.."Statement")
|
||||
local Muncher = require(path.."Muncher")
|
||||
|
||||
local arg_types = { -- for instructions
|
||||
NUM = true,
|
||||
REG = true,
|
||||
DEFSYM = true,
|
||||
LABELSYM = true,
|
||||
RELLABELSYM = true,
|
||||
}
|
||||
|
||||
local Collector = Muncher:extend()
|
||||
function Collector:init(options)
|
||||
self.options = options or {}
|
||||
end
|
||||
|
||||
function Collector:statement(...)
|
||||
local s = Statement(self.fn, self.line, ...)
|
||||
return s
|
||||
end
|
||||
|
||||
function Collector:format_out(t, args)
|
||||
self:format_out_raw(t[3], t[1], args, t[4], t[5])
|
||||
end
|
||||
|
||||
function Collector:push_data(data, size)
|
||||
-- FIXME: local 'data' name clashes with lips.data
|
||||
--[[ pseudo-example:
|
||||
Statement{type='!DATA',
|
||||
{tt='BYTES', tok={0, 1, 2}},
|
||||
{tt='HALFWORDS', tok={3, 4, 5}},
|
||||
{tt='WORDS', tok={6, 7, 8}},
|
||||
{tt='LABEL', tok='myLabel'},
|
||||
}
|
||||
--]]
|
||||
|
||||
-- TODO: consider not scrunching data statements, just their tokens
|
||||
|
||||
local last_statement = self.statements[#self.statements]
|
||||
local s
|
||||
if last_statement and last_statement.type == '!DATA' then
|
||||
s = last_statement
|
||||
else
|
||||
s = self:statement('!DATA')
|
||||
insert(self.statements, s)
|
||||
end
|
||||
|
||||
if type(data) == 'string' and size == 'WORD' then
|
||||
-- labels will be assembled to words
|
||||
insert(s, Token('LABEL', data))
|
||||
return
|
||||
end
|
||||
|
||||
if size ~= 'BYTE' and size ~= 'HALFWORD' and size ~= 'WORD' then
|
||||
error('Internal Error: unknown data size argument')
|
||||
end
|
||||
|
||||
local sizes = size..'S'
|
||||
|
||||
local last_token = s[#s]
|
||||
local t
|
||||
if last_token and last_token.tt == sizes then
|
||||
t = last_token
|
||||
else
|
||||
t = self:token(sizes, {})
|
||||
insert(s, t)
|
||||
s:validate()
|
||||
end
|
||||
insert(t.tok, data)
|
||||
end
|
||||
|
||||
function Collector:variable()
|
||||
local t = self.t
|
||||
local t2 = self:advance()
|
||||
|
||||
local s = self:statement('!DEF', t, t2)
|
||||
insert(self.statements, s)
|
||||
self:advance()
|
||||
end
|
||||
|
||||
function Collector:directive()
|
||||
local name = self.tok
|
||||
self:advance()
|
||||
local function add(kind, ...)
|
||||
insert(self.statements, self:statement('!'..kind, ...))
|
||||
end
|
||||
if name == 'ORG' then
|
||||
add(name, self:const(false, true))
|
||||
elseif name == 'ALIGN' or name == 'SKIP' then
|
||||
if self:is_EOL() and name == 'ALIGN' then
|
||||
add(name, self:token('NUM', 0))
|
||||
else
|
||||
local size = self:number()
|
||||
if self:is_EOL() then
|
||||
add(name, size)
|
||||
else
|
||||
self:optional_comma()
|
||||
add(name, size, self:number())
|
||||
end
|
||||
self:expect_EOL()
|
||||
end
|
||||
elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then
|
||||
self:push_data(self:const().tok, name)
|
||||
while not self:is_EOL() do
|
||||
self:advance()
|
||||
self:optional_comma()
|
||||
self:push_data(self:const().tok, name)
|
||||
end
|
||||
self:expect_EOL()
|
||||
elseif name == 'INC' or name == 'INCBIN' then
|
||||
-- noop, handled by lexer
|
||||
elseif name == 'ASCII' or name == 'ASCIIZ' then
|
||||
local bytes = self:string()
|
||||
for i, number in ipairs(bytes.tok) do
|
||||
self:push_data(number, 'BYTE')
|
||||
end
|
||||
if name == 'ASCIIZ' then
|
||||
self:push_data(0, 'BYTE')
|
||||
end
|
||||
self:expect_EOL()
|
||||
elseif name == 'FLOAT' then
|
||||
self:error('unimplemented directive', name)
|
||||
else
|
||||
self:error('unknown directive', name)
|
||||
end
|
||||
end
|
||||
|
||||
function Collector:basic_special()
|
||||
local name, args = self:special()
|
||||
|
||||
local portion
|
||||
if name == 'hi' then
|
||||
portion = 'upperoff'
|
||||
elseif name == 'up' then
|
||||
portion = 'upper'
|
||||
elseif name == 'lo' then
|
||||
portion = 'lower'
|
||||
else
|
||||
self:error('unknown special', name)
|
||||
end
|
||||
|
||||
if #args ~= 1 then
|
||||
self:error(name..' expected one argument', #args)
|
||||
end
|
||||
|
||||
local t = self:token(args[1]):set('portion', portion)
|
||||
return t
|
||||
end
|
||||
|
||||
function Collector:instruction()
|
||||
local s = self:statement(self.tok)
|
||||
insert(self.statements, s)
|
||||
self:advance()
|
||||
|
||||
while self.tt ~= 'EOL' do
|
||||
local t = self.t
|
||||
if self.tt == 'OPEN' then
|
||||
t = self:deref()
|
||||
t.tt = 'DEREF' -- TODO: should just be returned by :deref
|
||||
insert(s, t)
|
||||
elseif self.tt == 'UNARY' then
|
||||
local peek = self.tokens[self.i + 1]
|
||||
if peek.tt == 'DEFSYM' then
|
||||
t = self:advance()
|
||||
t = Token(t):set('negate')
|
||||
insert(s, t)
|
||||
self:advance()
|
||||
elseif peek.tt == 'EOL' or peek.tt == 'SEP' then
|
||||
local tok = t.tok == 1 and '+' or t.tok == -1 and '-'
|
||||
t = Token(self.fn, self.line, 'RELLABELSYM', tok)
|
||||
insert(s, t)
|
||||
self:advance()
|
||||
else
|
||||
self:error('unexpected token after unary operator', peek.tt)
|
||||
end
|
||||
elseif self.tt == 'SPECIAL' then
|
||||
t = self:basic_special()
|
||||
insert(s, t)
|
||||
self:advance()
|
||||
elseif self.tt == 'SEP' then
|
||||
self:error('extraneous comma')
|
||||
elseif not arg_types[self.tt] then
|
||||
self:error('unexpected argument type in instruction', self.tt)
|
||||
else
|
||||
insert(s, t)
|
||||
self:advance()
|
||||
end
|
||||
self:optional_comma()
|
||||
end
|
||||
|
||||
self:expect_EOL()
|
||||
s:validate()
|
||||
end
|
||||
|
||||
function Collector:collect(tokens, fn)
|
||||
self.tokens = tokens
|
||||
self.fn = fn or '(string)'
|
||||
self.main_fn = self.fn
|
||||
|
||||
self.statements = {}
|
||||
|
||||
self.i = 0 -- set up Muncher iteration
|
||||
self:advance() -- load up the first token
|
||||
while true do
|
||||
if self.tt == 'EOF' then
|
||||
-- don't break if this is an included file's EOF
|
||||
if self.fn == self.main_fn then
|
||||
break
|
||||
end
|
||||
self:advance()
|
||||
elseif self.tt == 'EOL' then
|
||||
-- empty line
|
||||
self:advance()
|
||||
elseif self.tt == 'DEF' then
|
||||
self:variable() -- handles advancing
|
||||
elseif self.tt == 'LABEL' or self.tt == 'RELLABEL' then
|
||||
insert(self.statements, self:statement('!LABEL', self.t))
|
||||
self:advance()
|
||||
elseif self.tt == 'DIR' then
|
||||
self:directive() -- handles advancing
|
||||
elseif self.tt == 'INSTR' then
|
||||
self:instruction() -- handles advancing
|
||||
else
|
||||
self:error('expected starting token for statement', self.tt)
|
||||
end
|
||||
end
|
||||
|
||||
return self.statements
|
||||
end
|
||||
|
||||
return Collector
|
|
@ -17,7 +17,10 @@ local arg_types = {
|
|||
local Muncher = Base:extend()
|
||||
-- no base init method
|
||||
|
||||
function Muncher:error(msg)
|
||||
function Muncher:error(msg, got)
|
||||
if got ~= nil then
|
||||
msg = msg..', got '..tostring(got)
|
||||
end
|
||||
error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2)
|
||||
end
|
||||
|
||||
|
@ -112,8 +115,8 @@ function Muncher:deref()
|
|||
end
|
||||
|
||||
function Muncher:const(relative, no_label)
|
||||
if self.tt ~= 'NUM' and self.tt ~= 'LABELSYM' then
|
||||
self:error('expected constant')
|
||||
if self.tt ~= 'NUM' and self.tt ~= 'DEFSYM' and self.tt ~= 'LABELSYM' then
|
||||
self:error('expected constant', self.tt)
|
||||
end
|
||||
if no_label and self.tt == 'LABELSYM' then
|
||||
self:error('labels are not allowed here')
|
||||
|
|
104
lips/Parser.lua
104
lips/Parser.lua
|
@ -3,13 +3,14 @@ local insert = table.insert
|
|||
local path = string.gsub(..., "[^.]+$", "")
|
||||
local data = require(path.."data")
|
||||
local overrides = require(path.."overrides")
|
||||
local Base = require(path.."Base")
|
||||
local Token = require(path.."Token")
|
||||
local Lexer = require(path.."Lexer")
|
||||
local Dumper = require(path.."Dumper")
|
||||
local Muncher = require(path.."Muncher")
|
||||
local Collector = require(path.."Collector")
|
||||
local Preproc = require(path.."Preproc")
|
||||
local Dumper = require(path.."Dumper")
|
||||
|
||||
local Parser = Muncher:extend()
|
||||
local Parser = Base:extend()
|
||||
function Parser:init(writer, fn, options)
|
||||
self.fn = fn or '(string)'
|
||||
self.main_fn = self.fn
|
||||
|
@ -17,61 +18,7 @@ function Parser:init(writer, fn, options)
|
|||
self.dumper = Dumper(writer, fn, options)
|
||||
end
|
||||
|
||||
function Parser:directive()
|
||||
local name = self.tok
|
||||
self:advance()
|
||||
local function add(...)
|
||||
self.dumper:add_directive(self.fn, self.line, ...)
|
||||
end
|
||||
if name == 'ORG' then
|
||||
add(name, self:number().tok)
|
||||
elseif name == 'ALIGN' or name == 'SKIP' then
|
||||
if self:is_EOL() and name == 'ALIGN' then
|
||||
add(name, 0)
|
||||
else
|
||||
local size = self:number().tok
|
||||
if self:is_EOL() then
|
||||
add(name, size)
|
||||
else
|
||||
self:optional_comma()
|
||||
add(name, size, self:number().tok)
|
||||
end
|
||||
self:expect_EOL()
|
||||
end
|
||||
elseif name == 'BYTE' or name == 'HALFWORD' then
|
||||
add(name, self:number().tok)
|
||||
while not self:is_EOL() do
|
||||
self:advance()
|
||||
self:optional_comma()
|
||||
add(name, self:number().tok)
|
||||
end
|
||||
self:expect_EOL()
|
||||
elseif name == 'WORD' then
|
||||
-- allow labels in word directives
|
||||
add(name, self:const().tok)
|
||||
while not self:is_EOL() do
|
||||
self:advance()
|
||||
self:optional_comma()
|
||||
add(name, self:const().tok)
|
||||
end
|
||||
self:expect_EOL()
|
||||
elseif name == 'INC' or name == 'INCBIN' then
|
||||
-- noop, handled by lexer
|
||||
elseif name == 'ASCII' or name == 'ASCIIZ' then
|
||||
local bytes = self:string()
|
||||
for i, number in ipairs(bytes.tok) do
|
||||
add('BYTE', number)
|
||||
end
|
||||
if name == 'ASCIIZ' then
|
||||
add('BYTE', 0)
|
||||
end
|
||||
self:expect_EOL()
|
||||
elseif name == 'FLOAT' then
|
||||
self:error('unimplemented directive')
|
||||
else
|
||||
self:error('unknown directive')
|
||||
end
|
||||
end
|
||||
--[[
|
||||
|
||||
function Parser:format_in(informat)
|
||||
-- see data.lua for a guide on what all these mean
|
||||
|
@ -222,10 +169,9 @@ function Parser:instruction()
|
|||
end
|
||||
self:expect_EOL()
|
||||
end
|
||||
--]]
|
||||
|
||||
function Parser:tokenize(asm)
|
||||
self.i = 0
|
||||
|
||||
local lexer = Lexer(asm, self.main_fn, self.options)
|
||||
local tokens = {}
|
||||
|
||||
|
@ -242,41 +188,37 @@ function Parser:tokenize(asm)
|
|||
end)
|
||||
end
|
||||
|
||||
local collector = Collector(self.options)
|
||||
local statements = collector:collect(tokens, self.main_fn)
|
||||
|
||||
--[[
|
||||
local preproc = Preproc(self.options)
|
||||
self.tokens = preproc:process(tokens)
|
||||
self.statements = preproc:process(statements)
|
||||
|
||||
-- the lexer guarantees an EOL and EOF for a blank file
|
||||
assert(#self.tokens > 0, 'Internal Error: no tokens after preprocessing')
|
||||
--]]
|
||||
self.statements = statements
|
||||
end
|
||||
|
||||
function Parser:parse(asm)
|
||||
self:tokenize(asm)
|
||||
self:advance() -- load up the first token
|
||||
while true do
|
||||
if self.tt == 'EOF' then
|
||||
-- don't break if this is an included file's EOF
|
||||
if self.fn == self.main_fn then
|
||||
break
|
||||
end
|
||||
self:advance()
|
||||
elseif self.tt == 'EOL' then
|
||||
-- empty line
|
||||
self:advance()
|
||||
elseif self.tt == 'DIR' then
|
||||
self:directive() -- handles advancing
|
||||
elseif self.tt == 'LABEL' then
|
||||
self.dumper:add_label(self.tok)
|
||||
self:advance()
|
||||
elseif self.tt == 'INSTR' then
|
||||
self:instruction() -- handles advancing
|
||||
else
|
||||
self:error('unexpected token (unknown instruction?)')
|
||||
|
||||
-- DEBUG
|
||||
for i, s in ipairs(self.statements) do
|
||||
local values = ''
|
||||
for j, v in ipairs(s) do
|
||||
values = values..'\t'..v.tt
|
||||
end
|
||||
values = values:sub(2)
|
||||
print(i, s.type, values)
|
||||
end
|
||||
--[[
|
||||
if self.options.labels then
|
||||
self.dumper:export_labels(self.options.labels)
|
||||
end
|
||||
return self.dumper:dump()
|
||||
--]]
|
||||
end
|
||||
|
||||
return Parser
|
||||
|
|
53
lips/Statement.lua
Normal file
53
lips/Statement.lua
Normal file
|
@ -0,0 +1,53 @@
|
|||
local path = string.gsub(..., "[^.]+$", "")
|
||||
local util = require(path.."util")
|
||||
local Base = require(path.."Base")
|
||||
local Token = require(path.."Token")
|
||||
|
||||
local Statement = Base:extend()
|
||||
function Statement:init(...)
|
||||
local args = {...}
|
||||
if #args == 1 then
|
||||
local t = args[1]
|
||||
if util.parent(t) ~= Statement then
|
||||
error('Internal Error: 1-arg Statement:init expected a Statement')
|
||||
end
|
||||
if type(t) == 'table' then
|
||||
for k, v in pairs(t) do
|
||||
self[k] = v
|
||||
end
|
||||
end
|
||||
elseif #args >= 3 then
|
||||
self.fn = args[1]
|
||||
self.line = args[2]
|
||||
self.type = args[3]
|
||||
for i, v in ipairs(args) do
|
||||
if i > 3 then
|
||||
self[i - 3] = v
|
||||
end
|
||||
end
|
||||
else
|
||||
error('Internal Error: Statement:init takes 1 or 3+ arguments', 3)
|
||||
end
|
||||
self:validate(1)
|
||||
return self
|
||||
end
|
||||
|
||||
function Statement:validate(n)
|
||||
n = (n or 0) + 3 -- depth for error message
|
||||
if not self.fn then
|
||||
error('Internal Error: statements require a filename', n)
|
||||
end
|
||||
if not self.line then
|
||||
error('Internal Error: statements require a line number', n)
|
||||
end
|
||||
if not self.type then
|
||||
error('Internal Error: statement is missing a type', n)
|
||||
end
|
||||
for i, v in ipairs(self) do
|
||||
if util.parent(v) ~= Token then
|
||||
error(('Internal Error: Statement[%i] is not a Token'):format(i), n)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return Statement
|
|
@ -17,7 +17,31 @@ local function bitrange(x, lower, upper)
|
|||
return floor(x/2^lower) % 2^(upper - lower + 1)
|
||||
end
|
||||
|
||||
local function parent(t)
|
||||
local mt = getmetatable(t)
|
||||
if mt == nil then
|
||||
return nil
|
||||
end
|
||||
return mt.__index
|
||||
end
|
||||
|
||||
-- http://stackoverflow.com/a/9279009
|
||||
local loadcode
|
||||
if setfenv and loadstring then -- 5.1, JIT
|
||||
loadcode = function(code, environment)
|
||||
local f = assert(loadstring(code))
|
||||
setfenv(f, environment)
|
||||
return f
|
||||
end
|
||||
else -- 5.2, 5.3
|
||||
loadcode = function(code, environment)
|
||||
return assert(load(code, nil, 't', environment))
|
||||
end
|
||||
end
|
||||
|
||||
return {
|
||||
readfile = readfile,
|
||||
bitrange = bitrange,
|
||||
parent = parent,
|
||||
loadcode = loadcode,
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue