1
0
Fork 0
mirror of https://github.com/notwa/mm synced 2024-11-05 06:29:02 -08:00
mm/Lua/lib/lips/Lexer.lua

392 lines
11 KiB
Lua

local byte = string.byte
local char = string.char
local find = string.find
local format = string.format
local insert = table.insert
local data = require "lips.data"
local util = require "lips.util"
local simple_escapes = {
['0'] = 0x00,
['\\'] = 0x5C,
['"'] = 0x22,
['a'] = 0x07,
['b'] = 0x08,
['f'] = 0x0C,
['n'] = 0x0A,
['r'] = 0x0D,
['t'] = 0x09,
['v'] = 0x0B,
}
local Lexer = util.Class()
function Lexer:init(asm, fn, options)
self.asm = asm
self.fn = fn or '(string)'
self.options = options or {}
self.pos = 1
self.line = 1
self.EOF = -1
self:nextc()
end
function Lexer:error(msg)
error(format('%s:%d: Error: %s', self.fn, self.line, msg), 2)
end
function Lexer:nextc()
if self.pos > #self.asm then
self.ord = self.EOF
self.ord2 = self.EOF
self.chr = ''
self.chr2 = ''
self.chrchr = ''
return
end
if self.chr == '\n' then
self.line = self.line + 1
end
self.ord = byte(self.asm, self.pos)
self.pos = self.pos + 1
-- handle newlines; translate CRLF to LF
if self.ord == 13 then
if self.pos <= #self.asm and byte(self.asm, self.pos) == 10 then
self.pos = self.pos + 1
end
self.ord = 10
end
self.chr = char(self.ord)
if self.pos <= #self.asm then
self.ord2 = byte(self.asm, self.pos)
self.chr2 = char(self.ord2)
self.chrchr = char(self.ord, self.ord2)
else
self.ord2 = self.EOF
self.chr2 = ''
self.chrchr = self.chr
end
end
function Lexer:skip_to_EOL()
while self.chr ~= '\n' and self.ord ~= self.EOF do
self:nextc()
end
end
function Lexer:read_chars(pattern)
local buff = ''
while find(self.chr, pattern) do
buff = buff..self.chr
self:nextc()
end
return buff
end
function Lexer:read_decimal()
local buff = self:read_chars('%d')
local num = tonumber(buff)
if not num then self:error('invalid decimal number') end
return num
end
function Lexer:read_hex()
local buff = self:read_chars('%x')
local num = tonumber(buff, 16)
if not num then self:error('invalid hex number') end
return num
end
function Lexer:read_octal()
local buff = self:read_chars('[0-7]')
local num = tonumber(buff, 8)
if not num then self:error('invalid octal number') end
return num
end
function Lexer:read_binary()
local buff = self:read_chars('[01]')
local num = tonumber(buff, 2)
if not num then self:error('invalid binary number') end
return num
end
function Lexer:read_number()
if self.chr == '%' then
self:nextc()
return self:read_binary()
elseif self.chr == '$' then
self:nextc()
return self:read_hex()
elseif self.chr:find('%d') then
if self.chr2 == 'x' or self.chr2 == 'X' then
self:nextc()
self:nextc()
return self:read_hex()
elseif self.chr == '0' and self.chr2:find('%d') then
self:nextc()
return self:read_octal()
else
return self:read_decimal()
end
elseif self.chr == '#' then
self:nextc()
return self:read_decimal()
end
end
function Lexer:lex_hex(yield)
local hexmatch = '[0-9A-Fa-f]'
local entered = false
while true do
if self.chr == '\n' then
yield('EOL', '\n')
self:nextc()
elseif self.ord == self.EOF then
self:error('unexpected EOF; incomplete hex directive')
elseif self.chr == ';' then
self:skip_to_EOL()
elseif self.chrchr == '//' then
self:skip_to_EOL()
elseif self.chrchr == '/*' then
self:nextc()
self:nextc()
self:lex_block_comment(yield)
elseif self.chr:find('%s') then
self:nextc()
elseif self.chr == '{' then
if entered then
self:error('unexpected opening brace')
end
self:nextc()
entered = true
elseif self.chr == '}' then
if not entered then
self:error('expected opening brace')
end
self:nextc()
break
elseif self.chr == ',' then
self:error('commas are not allowed in HEX directives')
elseif self.chr:find(hexmatch) and self.chr2:find(hexmatch) then
local num = tonumber(self.chrchr, 16)
self:nextc()
self:nextc()
if self.chr:find(hexmatch) then
self:error('too many hex digits to be a single byte')
end
yield('DIR', 'BYTE')
yield('NUM', num)
elseif self.chr:find(hexmatch) then
self:error('expected two hex digits to make a byte')
else
if entered then
self:error('expected bytes given in hex or closing brace')
else
self:error('expected opening brace')
end
end
end
end
function Lexer:lex_block_comment(yield)
while true do
if self.chr == '\n' then
yield('EOL', '\n')
self:nextc()
elseif self.ord == self.EOF then
self:error('unexpected EOF; incomplete block comment')
elseif self.chrchr == '*/' then
self:nextc()
self:nextc()
break
else
self:nextc()
end
end
end
function Lexer:lex_string(yield)
if self.chr ~= '"' then
self:error('expected opening double quote')
end
self:nextc()
local bytes = {}
while true do
if self.chr == '\n' then
self:error('unimplemented')
yield('EOL', '\n')
self:nextc()
elseif self.ord == self.EOF then
self:nextc()
self:error('unexpected EOF; incomplete string')
elseif self.chr == '"' then
self:nextc()
break
elseif self.chr == '\\' then
self:nextc()
local simple = simple_escapes[self.chr]
if simple then
insert(bytes, simple)
else
self:error('unknown escape sequence')
end
self:nextc()
else
insert(bytes, byte(self.chr))
self:nextc()
end
end
yield('STRING', bytes)
end
function Lexer:lex_string_naive(yield) -- no escape sequences
if self.chr ~= '"' then
self:error('expected opening double quote')
end
self:nextc()
local buff = self:read_chars('[^"\n]')
if self.chr ~= '"' then
self:error('expected closing double quote')
end
self:nextc()
yield('STRING', buff)
end
function Lexer:lex_include(_yield)
self:read_chars('%s')
local fn
self:lex_string_naive(function(tt, tok)
fn = tok
end)
if self.options.path then
fn = self.options.path..fn
end
local sublexer = Lexer(util.readfile(fn), fn, self.options)
sublexer:lex(_yield)
end
function Lexer:lex(_yield)
local function yield(tt, tok)
return _yield(tt, tok, self.fn, self.line)
end
while true do
if self.chr == '\n' then
yield('EOL', '\n')
self:nextc()
elseif self.ord == self.EOF then
yield('EOF', self.EOF)
break
elseif self.chr == ';' then
self:skip_to_EOL()
elseif self.chrchr == '//' then
self:skip_to_EOL()
elseif self.chrchr == '/*' then
self:nextc()
self:nextc()
self:lex_block_comment(yield)
elseif self.chr:find('%s') then
self:nextc()
elseif self.chr == ',' then
self:nextc()
yield('SEP', ',')
elseif self.chr == '[' then
self:nextc()
local buff = self:read_chars('[%w_]')
if self.chr ~= ']' then
self:error('invalid define name')
end
self:nextc()
if self.chr ~= ':' then
self:error('define requires a colon')
end
self:nextc()
yield('DEF', buff)
elseif self.chr == ']' then
self:error('unmatched closing bracket')
elseif self.chr == '(' then
self:nextc()
yield('OPEN', '(')
elseif self.chr == ')' then
self:nextc()
yield('CLOSE', ')')
elseif self.chr == '.' then
self:nextc()
local buff = self:read_chars('[%w]')
local up = buff:upper()
if data.directive_aliases[up] then
up = data.directive_aliases[up]
end
if not data.all_directives[up] then
self:error('unknown directive')
end
if up == 'INC' or up == 'INCASM' or up == 'INCLUDE' then
yield('DIR', 'INC')
self:lex_include(_yield)
else
yield('DIR', up)
end
elseif self.chr == '"' then
self:lex_string(yield)
elseif self.chr == '@' then
self:nextc()
local buff = self:read_chars('[%w_]')
yield('DEFSYM', buff)
elseif self.chr == '%' then
self:nextc()
local call = self:read_chars('[%w_]')
yield('SPECIAL', call)
elseif self.chr:find('[%a_]') then
local buff = self:read_chars('[%w_.]')
local up = buff:upper()
if self.chr == ':' then
if buff:find('%.') then
self:error('labels cannot contain dots')
end
self:nextc()
yield('LABEL', buff)
elseif up == 'HEX' then
self:lex_hex(yield)
elseif data.all_registers[up] then
yield('REG', up)
elseif data.all_instructions[up] then
yield('INSTR', up:gsub('%.', '_'))
else
if buff:find('%.') then
self:error('labels cannot contain dots')
end
yield('LABELSYM', buff)
end
elseif self.chr == '+' or self.chr == '-' then
local sign_chr = self.chr
local sign = sign_chr == '+' and 1 or -1
local buff = self:read_chars('%'..self.chr)
if #buff == 1 and self.chr == ':' then
self:nextc()
yield('RELLABEL', sign_chr)
else
local n = self:read_number()
if n then
yield('NUM', sign*n)
else
yield('RELLABELSYM', sign*#buff)
end
end
else
local n = self:read_number()
if n then
yield('NUM', n)
else
self:error('unknown character or control character')
end
end
end
end
return Lexer