1
0
Fork 0
mirror of https://github.com/notwa/lips synced 2024-04-25 15:03:22 -07:00
lips/lips/Collector.lua

238 lines
6.7 KiB
Lua

local insert = table.insert
local path = string.gsub(..., "[^.]+$", "")
local Base = require(path.."Base")
local Token = require(path.."Token")
local TokenIter = require(path.."TokenIter")
local Statement = require(path.."Statement")
local Collector = Base:extend()
function Collector:init(options)
self.options = options or {}
end
function Collector:statement(...)
local I = self.iter
local s = Statement(I.fn, I.line, ...)
return s
end
function Collector:push_data(datum, size)
local I = self.iter
--[[ pseudo-example:
Statement{type='!DATA',
{tt='BYTES', tok={0, 1, 2}},
{tt='HALFWORDS', tok={3, 4, 5}},
{tt='WORDS', tok={6, 7, 8}},
{tt='LABEL', tok='myLabel'},
}
--]]
-- FIXME: optimize the hell out of this garbage, preferably in the lexer
-- TODO: consider not scrunching data statements, just their tokens
-- TODO: concatenate strings; use !BIN instead of !DATA
if type(datum) == 'number' then
datum = I:token(datum)
end
local last_statement = self.statements[#self.statements]
local s
if last_statement and last_statement.type == '!DATA' then
s = last_statement
else
s = self:statement('!DATA')
insert(self.statements, s)
end
if size ~= 'BYTE' and size ~= 'HALFWORD' and size ~= 'WORD' then
error('Internal Error: unknown data size argument')
end
if datum.tt == 'LABELSYM' then
if size == 'WORD' then
-- labels will be assembled to words
insert(s, datum)
return
else
I:error('labels are too large to be used in this directive')
end
elseif datum.tt == 'VARSYM' then
insert(s, datum:set('size', size))
return
elseif datum.tt ~= 'NUM' then
I:error('unsupported data type', datum.tt)
end
local sizes = size..'S'
local last_token = s[#s]
local t
if last_token and last_token.tt == sizes then
t = last_token
else
t = I:token(sizes, {})
insert(s, t)
s:validate()
end
insert(t.tok, datum.tok)
end
function Collector:directive(name)
local I = self.iter
local function add(kind, ...)
insert(self.statements, self:statement('!'..kind, ...))
end
if name == 'ORG' or name == 'BASE' then
add(name, I:const(nil, 'no labels'))
elseif name == 'PUSH' or name == 'POP' then
add(name, I:const())
while not I:is_EOL() do
I:eat_comma()
add(name, I:const())
end
elseif name == 'ALIGN' or name == 'SKIP' then
if I:is_EOL() and name == 'ALIGN' then
add(name)
else
local size = I:const(nil, 'no label')
if I:is_EOL() then
add(name, size)
else
I:eat_comma()
add(name, size, I:const(nil, 'no label'))
end
end
elseif name == 'BIN' then
-- FIXME: not a real directive, just a workaround
add(name, I:string())
elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then
self:push_data(I:const(), name)
while not I:is_EOL() do
I:eat_comma()
self:push_data(I:const(), name)
end
elseif name == 'HEX' then
if I.tt ~= 'OPEN' then
I:error('expected opening brace for hex directive', I.tt)
end
I:next()
while I.tt ~= 'CLOSE' do
if I.tt == 'EOL' then
I:next()
else
self:push_data(I:const(), 'BYTE')
end
end
I:next()
elseif name == 'INC' or name == 'INCBIN' then
-- noop, handled by lexer
I:string()
elseif name == 'ASCII' or name == 'ASCIIZ' then
local bytes = I:string()
for i, number in ipairs(bytes.tok) do
self:push_data(number, 'BYTE')
end
if name == 'ASCIIZ' then
self:push_data(0, 'BYTE')
end
elseif name == 'FLOAT' then
I:error('unimplemented directive', name)
else
I:error('unknown directive', name)
end
I:expect_EOL()
end
function Collector:instruction(name)
local I = self.iter
local s = self:statement(name)
insert(self.statements, s)
while I.tt ~= 'EOL' do
local t = I.t
if I.tt == 'OPEN' then
insert(s, I:deref())
elseif I.tt == 'UNARY' then
local peek = assert(I:peek())
if peek.tt == 'VARSYM' then
local negate = t.tok == -1
t = I:next()
t = Token(t):set('negate', negate)
insert(s, t)
I:next()
elseif peek.tt == 'EOL' or peek.tt == 'SEP' then
local tok = t.tok == 1 and '+' or t.tok == -1 and '-'
t = Token(I.fn, I.line, 'RELLABELSYM', tok)
insert(s, t)
I:next()
else
I:error('unexpected token after unary operator', peek.tt)
end
elseif I.tt == 'SPECIAL' then
t = I:basic_special()
insert(s, t)
I:next()
elseif I.tt == 'SEP' then
I:error('extraneous comma')
elseif not I.arg_types[I.tt] then
I:error('unexpected argument type in instruction', I.tt)
else
insert(s, t)
I:next()
end
I:eat_comma()
end
I:expect_EOL()
s:validate()
end
function Collector:collect(tokens, fn)
self.iter = TokenIter(tokens)
local I = self.iter
self.statements = {}
-- this works, but probably shouldn't be in this function specifically
if self.options.origin then
local s = Statement('(options)', 0, '!ORG', self.options.origin)
insert(self.statements, s)
end
if self.options.base then
local s = Statement('(options)', 0, '!BASE', self.options.base)
insert(self.statements, s)
end
for t in I do
if t.tt == 'EOF' then
-- noop
elseif t.tt == 'EOL' then
-- noop; empty line
elseif t.tt == 'LABEL' or t.tt == 'RELLABEL' then
insert(self.statements, self:statement('!LABEL', t))
elseif t.tt == 'VAR' then
local t2 = I:next()
I:next()
local s = self:statement('!VAR', t, t2)
insert(self.statements, s)
I:expect_EOL()
elseif t.tt == 'DIR' then
I:next()
self:directive(t.tok)
elseif t.tt == 'INSTR' then
I:next()
self:instruction(t.tok)
else
I:error('expected starting token for statement', t.tt)
end
end
return self.statements
end
return Collector