mirror of
https://github.com/notwa/lips
synced 2024-11-14 09:39:03 -08:00
begin refactor; add statement collection
This commit is contained in:
parent
c47136442e
commit
486ccb99af
5 changed files with 342 additions and 84 deletions
236
lips/Collector.lua
Normal file
236
lips/Collector.lua
Normal file
|
@ -0,0 +1,236 @@
|
||||||
|
local insert = table.insert
|
||||||
|
local unpack = unpack or table.unpack
|
||||||
|
|
||||||
|
local path = string.gsub(..., "[^.]+$", "")
|
||||||
|
local Token = require(path.."Token")
|
||||||
|
local Statement = require(path.."Statement")
|
||||||
|
local Muncher = require(path.."Muncher")
|
||||||
|
|
||||||
|
local arg_types = { -- for instructions
|
||||||
|
NUM = true,
|
||||||
|
REG = true,
|
||||||
|
DEFSYM = true,
|
||||||
|
LABELSYM = true,
|
||||||
|
RELLABELSYM = true,
|
||||||
|
}
|
||||||
|
|
||||||
|
local Collector = Muncher:extend()
|
||||||
|
function Collector:init(options)
|
||||||
|
self.options = options or {}
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:statement(...)
|
||||||
|
local s = Statement(self.fn, self.line, ...)
|
||||||
|
return s
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:format_out(t, args)
|
||||||
|
self:format_out_raw(t[3], t[1], args, t[4], t[5])
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:push_data(data, size)
|
||||||
|
-- FIXME: local 'data' name clashes with lips.data
|
||||||
|
--[[ pseudo-example:
|
||||||
|
Statement{type='!DATA',
|
||||||
|
{tt='BYTES', tok={0, 1, 2}},
|
||||||
|
{tt='HALFWORDS', tok={3, 4, 5}},
|
||||||
|
{tt='WORDS', tok={6, 7, 8}},
|
||||||
|
{tt='LABEL', tok='myLabel'},
|
||||||
|
}
|
||||||
|
--]]
|
||||||
|
|
||||||
|
-- TODO: consider not scrunching data statements, just their tokens
|
||||||
|
|
||||||
|
local last_statement = self.statements[#self.statements]
|
||||||
|
local s
|
||||||
|
if last_statement and last_statement.type == '!DATA' then
|
||||||
|
s = last_statement
|
||||||
|
else
|
||||||
|
s = self:statement('!DATA')
|
||||||
|
insert(self.statements, s)
|
||||||
|
end
|
||||||
|
|
||||||
|
if type(data) == 'string' and size == 'WORD' then
|
||||||
|
-- labels will be assembled to words
|
||||||
|
insert(s, Token('LABEL', data))
|
||||||
|
return
|
||||||
|
end
|
||||||
|
|
||||||
|
if size ~= 'BYTE' and size ~= 'HALFWORD' and size ~= 'WORD' then
|
||||||
|
error('Internal Error: unknown data size argument')
|
||||||
|
end
|
||||||
|
|
||||||
|
local sizes = size..'S'
|
||||||
|
|
||||||
|
local last_token = s[#s]
|
||||||
|
local t
|
||||||
|
if last_token and last_token.tt == sizes then
|
||||||
|
t = last_token
|
||||||
|
else
|
||||||
|
t = self:token(sizes, {})
|
||||||
|
insert(s, t)
|
||||||
|
s:validate()
|
||||||
|
end
|
||||||
|
insert(t.tok, data)
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:variable()
|
||||||
|
local t = self.t
|
||||||
|
local t2 = self:advance()
|
||||||
|
|
||||||
|
local s = self:statement('!DEF', t, t2)
|
||||||
|
insert(self.statements, s)
|
||||||
|
self:advance()
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:directive()
|
||||||
|
local name = self.tok
|
||||||
|
self:advance()
|
||||||
|
local function add(kind, ...)
|
||||||
|
insert(self.statements, self:statement('!'..kind, ...))
|
||||||
|
end
|
||||||
|
if name == 'ORG' then
|
||||||
|
add(name, self:const(false, true))
|
||||||
|
elseif name == 'ALIGN' or name == 'SKIP' then
|
||||||
|
if self:is_EOL() and name == 'ALIGN' then
|
||||||
|
add(name, self:token('NUM', 0))
|
||||||
|
else
|
||||||
|
local size = self:number()
|
||||||
|
if self:is_EOL() then
|
||||||
|
add(name, size)
|
||||||
|
else
|
||||||
|
self:optional_comma()
|
||||||
|
add(name, size, self:number())
|
||||||
|
end
|
||||||
|
self:expect_EOL()
|
||||||
|
end
|
||||||
|
elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then
|
||||||
|
self:push_data(self:const().tok, name)
|
||||||
|
while not self:is_EOL() do
|
||||||
|
self:advance()
|
||||||
|
self:optional_comma()
|
||||||
|
self:push_data(self:const().tok, name)
|
||||||
|
end
|
||||||
|
self:expect_EOL()
|
||||||
|
elseif name == 'INC' or name == 'INCBIN' then
|
||||||
|
-- noop, handled by lexer
|
||||||
|
elseif name == 'ASCII' or name == 'ASCIIZ' then
|
||||||
|
local bytes = self:string()
|
||||||
|
for i, number in ipairs(bytes.tok) do
|
||||||
|
self:push_data(number, 'BYTE')
|
||||||
|
end
|
||||||
|
if name == 'ASCIIZ' then
|
||||||
|
self:push_data(0, 'BYTE')
|
||||||
|
end
|
||||||
|
self:expect_EOL()
|
||||||
|
elseif name == 'FLOAT' then
|
||||||
|
self:error('unimplemented directive', name)
|
||||||
|
else
|
||||||
|
self:error('unknown directive', name)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:basic_special()
|
||||||
|
local name, args = self:special()
|
||||||
|
|
||||||
|
local portion
|
||||||
|
if name == 'hi' then
|
||||||
|
portion = 'upperoff'
|
||||||
|
elseif name == 'up' then
|
||||||
|
portion = 'upper'
|
||||||
|
elseif name == 'lo' then
|
||||||
|
portion = 'lower'
|
||||||
|
else
|
||||||
|
self:error('unknown special', name)
|
||||||
|
end
|
||||||
|
|
||||||
|
if #args ~= 1 then
|
||||||
|
self:error(name..' expected one argument', #args)
|
||||||
|
end
|
||||||
|
|
||||||
|
local t = self:token(args[1]):set('portion', portion)
|
||||||
|
return t
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:instruction()
|
||||||
|
local s = self:statement(self.tok)
|
||||||
|
insert(self.statements, s)
|
||||||
|
self:advance()
|
||||||
|
|
||||||
|
while self.tt ~= 'EOL' do
|
||||||
|
local t = self.t
|
||||||
|
if self.tt == 'OPEN' then
|
||||||
|
t = self:deref()
|
||||||
|
t.tt = 'DEREF' -- TODO: should just be returned by :deref
|
||||||
|
insert(s, t)
|
||||||
|
elseif self.tt == 'UNARY' then
|
||||||
|
local peek = self.tokens[self.i + 1]
|
||||||
|
if peek.tt == 'DEFSYM' then
|
||||||
|
t = self:advance()
|
||||||
|
t = Token(t):set('negate')
|
||||||
|
insert(s, t)
|
||||||
|
self:advance()
|
||||||
|
elseif peek.tt == 'EOL' or peek.tt == 'SEP' then
|
||||||
|
local tok = t.tok == 1 and '+' or t.tok == -1 and '-'
|
||||||
|
t = Token(self.fn, self.line, 'RELLABELSYM', tok)
|
||||||
|
insert(s, t)
|
||||||
|
self:advance()
|
||||||
|
else
|
||||||
|
self:error('unexpected token after unary operator', peek.tt)
|
||||||
|
end
|
||||||
|
elseif self.tt == 'SPECIAL' then
|
||||||
|
t = self:basic_special()
|
||||||
|
insert(s, t)
|
||||||
|
self:advance()
|
||||||
|
elseif self.tt == 'SEP' then
|
||||||
|
self:error('extraneous comma')
|
||||||
|
elseif not arg_types[self.tt] then
|
||||||
|
self:error('unexpected argument type in instruction', self.tt)
|
||||||
|
else
|
||||||
|
insert(s, t)
|
||||||
|
self:advance()
|
||||||
|
end
|
||||||
|
self:optional_comma()
|
||||||
|
end
|
||||||
|
|
||||||
|
self:expect_EOL()
|
||||||
|
s:validate()
|
||||||
|
end
|
||||||
|
|
||||||
|
function Collector:collect(tokens, fn)
|
||||||
|
self.tokens = tokens
|
||||||
|
self.fn = fn or '(string)'
|
||||||
|
self.main_fn = self.fn
|
||||||
|
|
||||||
|
self.statements = {}
|
||||||
|
|
||||||
|
self.i = 0 -- set up Muncher iteration
|
||||||
|
self:advance() -- load up the first token
|
||||||
|
while true do
|
||||||
|
if self.tt == 'EOF' then
|
||||||
|
-- don't break if this is an included file's EOF
|
||||||
|
if self.fn == self.main_fn then
|
||||||
|
break
|
||||||
|
end
|
||||||
|
self:advance()
|
||||||
|
elseif self.tt == 'EOL' then
|
||||||
|
-- empty line
|
||||||
|
self:advance()
|
||||||
|
elseif self.tt == 'DEF' then
|
||||||
|
self:variable() -- handles advancing
|
||||||
|
elseif self.tt == 'LABEL' or self.tt == 'RELLABEL' then
|
||||||
|
insert(self.statements, self:statement('!LABEL', self.t))
|
||||||
|
self:advance()
|
||||||
|
elseif self.tt == 'DIR' then
|
||||||
|
self:directive() -- handles advancing
|
||||||
|
elseif self.tt == 'INSTR' then
|
||||||
|
self:instruction() -- handles advancing
|
||||||
|
else
|
||||||
|
self:error('expected starting token for statement', self.tt)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return self.statements
|
||||||
|
end
|
||||||
|
|
||||||
|
return Collector
|
|
@ -17,7 +17,10 @@ local arg_types = {
|
||||||
local Muncher = Base:extend()
|
local Muncher = Base:extend()
|
||||||
-- no base init method
|
-- no base init method
|
||||||
|
|
||||||
function Muncher:error(msg)
|
function Muncher:error(msg, got)
|
||||||
|
if got ~= nil then
|
||||||
|
msg = msg..', got '..tostring(got)
|
||||||
|
end
|
||||||
error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2)
|
error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -112,8 +115,8 @@ function Muncher:deref()
|
||||||
end
|
end
|
||||||
|
|
||||||
function Muncher:const(relative, no_label)
|
function Muncher:const(relative, no_label)
|
||||||
if self.tt ~= 'NUM' and self.tt ~= 'LABELSYM' then
|
if self.tt ~= 'NUM' and self.tt ~= 'DEFSYM' and self.tt ~= 'LABELSYM' then
|
||||||
self:error('expected constant')
|
self:error('expected constant', self.tt)
|
||||||
end
|
end
|
||||||
if no_label and self.tt == 'LABELSYM' then
|
if no_label and self.tt == 'LABELSYM' then
|
||||||
self:error('labels are not allowed here')
|
self:error('labels are not allowed here')
|
||||||
|
|
104
lips/Parser.lua
104
lips/Parser.lua
|
@ -3,13 +3,14 @@ local insert = table.insert
|
||||||
local path = string.gsub(..., "[^.]+$", "")
|
local path = string.gsub(..., "[^.]+$", "")
|
||||||
local data = require(path.."data")
|
local data = require(path.."data")
|
||||||
local overrides = require(path.."overrides")
|
local overrides = require(path.."overrides")
|
||||||
|
local Base = require(path.."Base")
|
||||||
local Token = require(path.."Token")
|
local Token = require(path.."Token")
|
||||||
local Lexer = require(path.."Lexer")
|
local Lexer = require(path.."Lexer")
|
||||||
local Dumper = require(path.."Dumper")
|
local Collector = require(path.."Collector")
|
||||||
local Muncher = require(path.."Muncher")
|
|
||||||
local Preproc = require(path.."Preproc")
|
local Preproc = require(path.."Preproc")
|
||||||
|
local Dumper = require(path.."Dumper")
|
||||||
|
|
||||||
local Parser = Muncher:extend()
|
local Parser = Base:extend()
|
||||||
function Parser:init(writer, fn, options)
|
function Parser:init(writer, fn, options)
|
||||||
self.fn = fn or '(string)'
|
self.fn = fn or '(string)'
|
||||||
self.main_fn = self.fn
|
self.main_fn = self.fn
|
||||||
|
@ -17,61 +18,7 @@ function Parser:init(writer, fn, options)
|
||||||
self.dumper = Dumper(writer, fn, options)
|
self.dumper = Dumper(writer, fn, options)
|
||||||
end
|
end
|
||||||
|
|
||||||
function Parser:directive()
|
--[[
|
||||||
local name = self.tok
|
|
||||||
self:advance()
|
|
||||||
local function add(...)
|
|
||||||
self.dumper:add_directive(self.fn, self.line, ...)
|
|
||||||
end
|
|
||||||
if name == 'ORG' then
|
|
||||||
add(name, self:number().tok)
|
|
||||||
elseif name == 'ALIGN' or name == 'SKIP' then
|
|
||||||
if self:is_EOL() and name == 'ALIGN' then
|
|
||||||
add(name, 0)
|
|
||||||
else
|
|
||||||
local size = self:number().tok
|
|
||||||
if self:is_EOL() then
|
|
||||||
add(name, size)
|
|
||||||
else
|
|
||||||
self:optional_comma()
|
|
||||||
add(name, size, self:number().tok)
|
|
||||||
end
|
|
||||||
self:expect_EOL()
|
|
||||||
end
|
|
||||||
elseif name == 'BYTE' or name == 'HALFWORD' then
|
|
||||||
add(name, self:number().tok)
|
|
||||||
while not self:is_EOL() do
|
|
||||||
self:advance()
|
|
||||||
self:optional_comma()
|
|
||||||
add(name, self:number().tok)
|
|
||||||
end
|
|
||||||
self:expect_EOL()
|
|
||||||
elseif name == 'WORD' then
|
|
||||||
-- allow labels in word directives
|
|
||||||
add(name, self:const().tok)
|
|
||||||
while not self:is_EOL() do
|
|
||||||
self:advance()
|
|
||||||
self:optional_comma()
|
|
||||||
add(name, self:const().tok)
|
|
||||||
end
|
|
||||||
self:expect_EOL()
|
|
||||||
elseif name == 'INC' or name == 'INCBIN' then
|
|
||||||
-- noop, handled by lexer
|
|
||||||
elseif name == 'ASCII' or name == 'ASCIIZ' then
|
|
||||||
local bytes = self:string()
|
|
||||||
for i, number in ipairs(bytes.tok) do
|
|
||||||
add('BYTE', number)
|
|
||||||
end
|
|
||||||
if name == 'ASCIIZ' then
|
|
||||||
add('BYTE', 0)
|
|
||||||
end
|
|
||||||
self:expect_EOL()
|
|
||||||
elseif name == 'FLOAT' then
|
|
||||||
self:error('unimplemented directive')
|
|
||||||
else
|
|
||||||
self:error('unknown directive')
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
function Parser:format_in(informat)
|
function Parser:format_in(informat)
|
||||||
-- see data.lua for a guide on what all these mean
|
-- see data.lua for a guide on what all these mean
|
||||||
|
@ -222,10 +169,9 @@ function Parser:instruction()
|
||||||
end
|
end
|
||||||
self:expect_EOL()
|
self:expect_EOL()
|
||||||
end
|
end
|
||||||
|
--]]
|
||||||
|
|
||||||
function Parser:tokenize(asm)
|
function Parser:tokenize(asm)
|
||||||
self.i = 0
|
|
||||||
|
|
||||||
local lexer = Lexer(asm, self.main_fn, self.options)
|
local lexer = Lexer(asm, self.main_fn, self.options)
|
||||||
local tokens = {}
|
local tokens = {}
|
||||||
|
|
||||||
|
@ -242,41 +188,37 @@ function Parser:tokenize(asm)
|
||||||
end)
|
end)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local collector = Collector(self.options)
|
||||||
|
local statements = collector:collect(tokens, self.main_fn)
|
||||||
|
|
||||||
|
--[[
|
||||||
local preproc = Preproc(self.options)
|
local preproc = Preproc(self.options)
|
||||||
self.tokens = preproc:process(tokens)
|
self.statements = preproc:process(statements)
|
||||||
|
|
||||||
-- the lexer guarantees an EOL and EOF for a blank file
|
-- the lexer guarantees an EOL and EOF for a blank file
|
||||||
assert(#self.tokens > 0, 'Internal Error: no tokens after preprocessing')
|
assert(#self.tokens > 0, 'Internal Error: no tokens after preprocessing')
|
||||||
|
--]]
|
||||||
|
self.statements = statements
|
||||||
end
|
end
|
||||||
|
|
||||||
function Parser:parse(asm)
|
function Parser:parse(asm)
|
||||||
self:tokenize(asm)
|
self:tokenize(asm)
|
||||||
self:advance() -- load up the first token
|
|
||||||
while true do
|
-- DEBUG
|
||||||
if self.tt == 'EOF' then
|
for i, s in ipairs(self.statements) do
|
||||||
-- don't break if this is an included file's EOF
|
local values = ''
|
||||||
if self.fn == self.main_fn then
|
for j, v in ipairs(s) do
|
||||||
break
|
values = values..'\t'..v.tt
|
||||||
end
|
|
||||||
self:advance()
|
|
||||||
elseif self.tt == 'EOL' then
|
|
||||||
-- empty line
|
|
||||||
self:advance()
|
|
||||||
elseif self.tt == 'DIR' then
|
|
||||||
self:directive() -- handles advancing
|
|
||||||
elseif self.tt == 'LABEL' then
|
|
||||||
self.dumper:add_label(self.tok)
|
|
||||||
self:advance()
|
|
||||||
elseif self.tt == 'INSTR' then
|
|
||||||
self:instruction() -- handles advancing
|
|
||||||
else
|
|
||||||
self:error('unexpected token (unknown instruction?)')
|
|
||||||
end
|
end
|
||||||
|
values = values:sub(2)
|
||||||
|
print(i, s.type, values)
|
||||||
end
|
end
|
||||||
|
--[[
|
||||||
if self.options.labels then
|
if self.options.labels then
|
||||||
self.dumper:export_labels(self.options.labels)
|
self.dumper:export_labels(self.options.labels)
|
||||||
end
|
end
|
||||||
return self.dumper:dump()
|
return self.dumper:dump()
|
||||||
|
--]]
|
||||||
end
|
end
|
||||||
|
|
||||||
return Parser
|
return Parser
|
||||||
|
|
53
lips/Statement.lua
Normal file
53
lips/Statement.lua
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
local path = string.gsub(..., "[^.]+$", "")
|
||||||
|
local util = require(path.."util")
|
||||||
|
local Base = require(path.."Base")
|
||||||
|
local Token = require(path.."Token")
|
||||||
|
|
||||||
|
local Statement = Base:extend()
|
||||||
|
function Statement:init(...)
|
||||||
|
local args = {...}
|
||||||
|
if #args == 1 then
|
||||||
|
local t = args[1]
|
||||||
|
if util.parent(t) ~= Statement then
|
||||||
|
error('Internal Error: 1-arg Statement:init expected a Statement')
|
||||||
|
end
|
||||||
|
if type(t) == 'table' then
|
||||||
|
for k, v in pairs(t) do
|
||||||
|
self[k] = v
|
||||||
|
end
|
||||||
|
end
|
||||||
|
elseif #args >= 3 then
|
||||||
|
self.fn = args[1]
|
||||||
|
self.line = args[2]
|
||||||
|
self.type = args[3]
|
||||||
|
for i, v in ipairs(args) do
|
||||||
|
if i > 3 then
|
||||||
|
self[i - 3] = v
|
||||||
|
end
|
||||||
|
end
|
||||||
|
else
|
||||||
|
error('Internal Error: Statement:init takes 1 or 3+ arguments', 3)
|
||||||
|
end
|
||||||
|
self:validate(1)
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
function Statement:validate(n)
|
||||||
|
n = (n or 0) + 3 -- depth for error message
|
||||||
|
if not self.fn then
|
||||||
|
error('Internal Error: statements require a filename', n)
|
||||||
|
end
|
||||||
|
if not self.line then
|
||||||
|
error('Internal Error: statements require a line number', n)
|
||||||
|
end
|
||||||
|
if not self.type then
|
||||||
|
error('Internal Error: statement is missing a type', n)
|
||||||
|
end
|
||||||
|
for i, v in ipairs(self) do
|
||||||
|
if util.parent(v) ~= Token then
|
||||||
|
error(('Internal Error: Statement[%i] is not a Token'):format(i), n)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
return Statement
|
|
@ -17,7 +17,31 @@ local function bitrange(x, lower, upper)
|
||||||
return floor(x/2^lower) % 2^(upper - lower + 1)
|
return floor(x/2^lower) % 2^(upper - lower + 1)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local function parent(t)
|
||||||
|
local mt = getmetatable(t)
|
||||||
|
if mt == nil then
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
return mt.__index
|
||||||
|
end
|
||||||
|
|
||||||
|
-- http://stackoverflow.com/a/9279009
|
||||||
|
local loadcode
|
||||||
|
if setfenv and loadstring then -- 5.1, JIT
|
||||||
|
loadcode = function(code, environment)
|
||||||
|
local f = assert(loadstring(code))
|
||||||
|
setfenv(f, environment)
|
||||||
|
return f
|
||||||
|
end
|
||||||
|
else -- 5.2, 5.3
|
||||||
|
loadcode = function(code, environment)
|
||||||
|
return assert(load(code, nil, 't', environment))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
return {
|
return {
|
||||||
readfile = readfile,
|
readfile = readfile,
|
||||||
bitrange = bitrange,
|
bitrange = bitrange,
|
||||||
|
parent = parent,
|
||||||
|
loadcode = loadcode,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue