1
0
Fork 0
mirror of https://github.com/notwa/lips synced 2024-05-03 10:03:23 -07:00

remove expressions

This commit is contained in:
Connor Olding 2017-05-24 20:33:11 +00:00
parent b97098a844
commit ac1e52ab09
7 changed files with 7 additions and 441 deletions

View File

@ -63,12 +63,11 @@ currently there is:
dumps the statements table after tokenizing and collecting into statements.
this is after UNARY and RELLABELSYM tokens have been disambiguated.
.debug_pre (default false)
dumps statements after basic preprocessing:
variable substitution, expression parsing,
relative label substitution, etc.
dumps statements after basic preprocessing,
e.g. variable substitution, relative label substitution, etc.
.debug_post (default false)
dumps statements after expanding preprocessor commands:
pseudo-instructions, expression evaluation, etc.
dumps statements after expanding preprocessor commands,
e.g. pseudo-instructions.
.debug_asm (default false)
is arguably the least useful of states to dump in.
this will dump statements after being reduced to
@ -155,9 +154,6 @@ the appropriate files are placed and tokenized inline, not unlike in C.
the `HEX` directive is its own mini-language and thus has its own mini-lexer.
expressions are not parsed nor lexed here.
they are simply extracted as whole strings for later processing.
the `yield` closure wraps around the `_yield` function argument
to pass error-handling metadata: the current filename and line number.
@ -177,7 +173,7 @@ there's a couple TODOs and FIXMEs in here.
collects tokens into statements.
statements are basically our form of an abstract syntax tree,
except statements don't need the depth of a tree (outside of expressions)
except statements don't need the depth of a tree
so they're completely flat.
most of this is just validation of the lexed tokens.
@ -202,7 +198,6 @@ preprocessing is split into two passes:
### pass 1
resolves variables by substitution,
parses and evaluates expressions,
and collects relative labels.
this pass starts by creating a new, empty table of statements to fill.
@ -211,8 +206,7 @@ statements are passed through, possibly modified, or read and left-out.
the reason for the copying is that taking indexes into an array (statements)
that you're removing elements from is A Bad Idea.
all expression tokens are evaluated,
and all variable tokens are substituted.
all variable tokens are substituted.
variable-declaring statements (`!VAR`) are read to a dictionary table
for future substitution of their keys with values.
@ -253,26 +247,7 @@ expansion is kinda messy.
## Expression
handles parsing and evaluation of simple (usually mathematical) expressions.
this class is actually completely independent of the rest of lips,
besides the requirement of the `Base` class, which isn't specific to lips.
### room for improvement
right now, this is just a quick and dirty port of some
C++ code i wrote a while back. so basically, everything could be improved.
bitwise operators need to be implemented.
possibly with LuaJIT and a Lua 5.1 fallback.
maybe that should be its own file?
in the long term,
i'll need to move lexing expressions to the main Lexer class,
and do proper parsing to an AST in Collector.
this will unify the syntax,
and allow for inline expressions, e.g:
`lw s0, 5*4(sp)`.
expressions are not implemented in this branch.
## Dumper

12
TODO
View File

@ -20,22 +20,10 @@ add basic command-line interface (patch.lua)
improve writer performance (just copypaste what you did in patch.lua)
allow generation of shared object files (zelda overlays specifically)
-> http://wiki.cloudmodding.com/oot/Overlays#Relocation_Entry_Format
macros:
add 'em
implement push/pop/jpop as macros
be able to point to specific args of push/pop using variables
procrastinating the heck over:
lex expressions in Lexer instead of its own separate lexer
write tests for everything (try to focus on code paths and edge cases)
make breaking changes to syntax for the sake of macros, expressions etc.
(and keep a branch with the old syntax for good measure, feature-frozen)
low priority:
improve parser terminology
add delay slot warnings
externally document more stuff like syntax
add file-reading directives (e.g. for automatic hook injection macros)

View File

@ -244,10 +244,6 @@ function Dumper:load(statements)
self.fn = s.fn
self.line = s.line
if s.type:sub(1, 1) == '!' then
if s[1] and s[1].tt == 'EXPR' then
self:error('unevaluated expression')
end
if s.type == '!LABEL' then
self.labels[s[1].tok] = self:pc()
elseif s.type == '!DATA' then

View File

@ -1,341 +0,0 @@
local insert = table.insert
local path = string.gsub(..., "[^.]+$", "")
local Base = require(path.."Base")
local Expression = Base:extend()
function Expression:init(variables)
self.variables = variables or {}
end
Expression.precedence = {
-- python-ish precedence
[","] = -1,
["or"] = 0,
["||"] = 0,
["xor"] = 1,
["and"] = 2,
["&&"] = 2,
["unary not"] = 3,
["=="] = 5,
["!="] = 5,
["<"] = 5,
[">"] = 5,
["<="] = 5,
[">="] = 5,
["|"] = 10,
["^"] = 11,
["&"] = 12,
["<<"] = 13,
[">>"] = 13,
["+"] = 20,
["-"] = 20,
["*"] = 21,
["/"] = 21,
["//"] = 21,
["%"] = 21,
["%%"] = 21,
["unary !"] = 30,
["unary ~"] = 30,
["unary +"] = 30,
["unary -"] = 30,
-- note: precedence of 40 is hardcoded for right-left association
-- TODO: also hardcode unary handling on right-hand side of operator
["**"] = 40,
}
Expression.unary_ops = {
["not"] = function(a) return a == 0 end,
["!"] = function(a) return a == 0 end,
-- ["~"] = function(a) return F(~I(a)) end,
["+"] = function(a) return a end,
["-"] = function(a) return -a end,
}
Expression.binary_ops = {
[","] = function(a, b) return b end,
["or"] = function(a, b) return a or b end,
["||"] = function(a, b) return a or b end,
["xor"] = function(a, b) return (a or b) and not (a and b) end,
["and"] = function(a, b) return a and b end,
["&&"] = function(a, b) return a and b end,
["=="] = function(a, b) return a == b end,
["!="] = function(a, b) return a ~= b end,
["<"] = function(a, b) return a < b end,
[">"] = function(a, b) return a > b end,
["<="] = function(a, b) return a <= b end,
[">="] = function(a, b) return a >= b end,
-- ["|"] = function(a, b) return F(I(a) | I(b)) end,
-- ["^"] = function(a, b) return F(I(a) ^ I(b)) end,
-- ["&"] = function(a, b) return F(I(a) & I(b)) end,
-- ["<<"] = function(a, b) return F(I(a) << I(b)) end,
-- [">>"] = function(a, b) return F(I(a) >> I(b)) end,
["+"] = function(a, b) return a + b end,
["-"] = function(a, b) return a - b end,
["*"] = function(a, b) return a * b end,
["/"] = function(a, b) return a / b end,
-- ["//"] = function(a, b) return trunc(a / trunc(b)) end,
-- ["%"] = function(a, b) return fmod(a, b) end,
-- ["%%"] = function(a, b) return trunc(fmod(a, trunc(b))) end,
["**"] = function(a, b) return a^b end,
}
local operators = {}
local operators_maxlen = 0
do
-- reorder operators so we can match the longest strings first
for k, v in pairs(Expression.precedence) do
if operators[#k] == nil then
operators[#k] = {}
end
local op = k:find('^unary ') and k:sub(#'unary ' + 1) or k
insert(operators[#k], op)
if #k > operators_maxlen then
operators_maxlen = #k
end
end
end
local function match_operator(str)
-- returns the operator at the beginning of a string, or nil
for i=operators_maxlen, 1, -1 do
if operators[i] ~= nil then
local substr = str:sub(1, i)
for _, op in ipairs(operators[i]) do
if substr == op then
return substr
end
end
end
end
end
function Expression:lex1(str, tokens)
local pos = 1
local rest = str
local function consume(n)
pos = pos + n
rest = rest:sub(n + 1)
end
local considered = ''
local function consider(pattern)
local start, stop = rest:find('^'..pattern)
if start == nil then
considered = ''
return false
end
considered = rest:sub(start, stop)
return true
end
local function consider_operator()
local op = match_operator(rest)
if op == nil then
considered = ''
return false
end
considered = op
return true
end
while pos <= #str do
local old_pos = pos
local here = " (#"..tostring(pos)..")"
if consider(' +') then
consume(#considered)
elseif consider('[0-9.]') or consider('[%%$#]') then
local num
if consider('((0|[1-9][0-9]*)%.[0-9]*|%.[0-9]+)(e0|e[1-9][0-9]*)?') then
num = tonumber(considered)
elseif consider('(0|[1-9][0-9]*)e(0|[1-9][0-9]*)') then
num = tonumber(considered)
elseif consider('%%[0-9]+') then
if considered:match('[2-9]') then
return "bad binary number: "..considered..here
end
num = tonumber(considered:sub(2), 2)
elseif consider('$[0-9A-Fa-f]+') then
num = tonumber(considered:sub(2), 16)
elseif consider('0x[0-9A-Fa-f]+') then
num = tonumber(considered:sub(3), 16)
elseif consider('0o[0-9]+') then
if considered:match('[89]') then
return "bad octal number: "..considered..here
end
num = tonumber(considered:sub(3), 8)
elseif consider('0b[0-9]+') then
if considered:match('[2-9]') then
return "bad binary number: "..considered..here
end
num = tonumber(considered:sub(3), 2)
elseif consider('0[0-9]+') then
if considered:match('[89]') then
return "bad octal number: "..considered..here
end
num = tonumber(considered:sub(2), 8)
elseif consider('#[0-9]+') then
num = tonumber(considered:sub(2))
elseif consider('[0-9]+') then
num = tonumber(considered)
end
if num == nil then
return "invalid number"..here
end
insert(tokens, {type='number', value=num})
consume(#considered)
elseif consider('[(]') then
insert(tokens, {type='opening', value=considered})
consume(#considered)
elseif consider('[)]') then
insert(tokens, {type='closing', value=considered})
consume(#considered)
elseif consider_operator() then
insert(tokens, {type='operator', value=considered})
consume(#considered)
elseif consider('[%w_]+') then
local num = self.variables[considered]
if num == nil then
return 'undefined variable "'..considered..'"'
end
insert(tokens, {type='number', value=num})
consume(#considered)
else
local chr = rest:sub(1, 1)
return "unexpected character '"..chr.."'"..here
end
if pos == old_pos then
error("Internal Error: expression parser is stuck")
end
end
end
function Expression:lex2(tokens)
-- detect unary operators
-- TODO: this is probably not the best way to do this
local was_numeric = false
local was_closing = false
for i, t in ipairs(tokens) do
if t.type == "operator" and not was_numeric and not was_closing then
t.type = "unary";
end
was_numeric = t.type == 'number'
was_closing = t.type == 'closing'
end
end
function Expression:lex(str)
local tokens = {}
err = self:lex1(str, tokens)
if err then return tokens, err end
err = self:lex2(tokens)
return tokens, err
end
function Expression:shunt(tokens)
-- shunting yard algorithm
local shunted = {}
local stack = {}
local operator_types = {
unary = true,
operator = true,
}
for _, t in ipairs(tokens) do
if t.type == 'number' then
insert(shunted, t)
elseif t.type == 'opening' then
insert(stack, t)
elseif t.type == 'closing' then
while #stack > 0 and stack[#stack].type ~= 'opening' do
insert(shunted, stack[#stack])
stack[#stack] = nil
end
if #stack == 0 then return shunted, 'missing opening parenthesis' end
stack[#stack] = nil
elseif t.type == 'operator' or t.type == 'unary' then
local fullname = t.type == 'unary' and 'unary '..t.value or t.value
local pre = self.precedence[fullname]
if pre == nil then return shunted, 'unknown operator' end
if pre == 40 then pre = pre + 1 end -- right-associative hack
while #stack > 0 do
local tail = stack[#stack]
if not operator_types[tail.type] then break end
local dpre = pre - self.precedence[tail.value]
if dpre > 0 then break end
insert(shunted, tail)
stack[#stack] = nil
end
insert(stack, t)
else
error('Internal Error: unknown type of expression token')
end
end
while #stack > 0 do
local t = stack[#stack]
if t.type == 'opening' then return shunted, 'missing closing parenthesis' end
insert(shunted, t)
stack[#stack] = nil
end
return shunted, nil
end
function Expression:parse(str)
local tokens, err = self:lex(str)
if err then return tokens, err end
tokens, err = self:shunt(tokens)
--for i, v in ipairs(tokens) do print(i, v.type, v.value) end
return tokens, err
end
function Expression:eval(tokens_or_str)
local tokens, err
if type(tokens_or_str) == 'string' then
tokens, err = self:parse(tokens_or_str)
if err then return 0, err end
elseif type(tokens_or_str) == 'table' then
tokens = tokens_or_str
else
return 0, "eval(): argument is neither token table nor string"
end
local stack = {}
local popped
local function pop()
if #stack == 0 then return true end
popped = stack[#stack]
stack[#stack] = nil
return false
end
for i, t in ipairs(tokens) do
if t.type == 'number' then
insert(stack, t.value)
elseif t.type == 'unary' then
if pop() then return 0, "missing arguments for unary" end
local f = self.unary_ops[t.value]
if f == nil then return 0, "unknown unary" end
insert(stack, f(popped))
elseif t.type == 'operator' then
if pop() then return 0, "missing arguments for operator" end
local b = popped
if pop() then return 0, "missing arguments for operator" end
local a = popped
local f = self.binary_ops[t.value]
if f == nil then return 0, "unknown operator" end
insert(stack, f(a, b))
else
return 0, "eval(): unknown token"
end
end
if #stack > 1 then return 0, "too many arguments" end
if #stack == 0 then return 0, "no arguments" end
return stack[1], nil
end
return Expression

View File

@ -341,38 +341,6 @@ function Lexer:lex_include_binary(_yield)
_yield('EOF', self.EOF, self.fn, self.line)
end
function Lexer:lex_expression(yield)
if self.chr ~= '(' then
self:error('expected opening parenthesis for expression')
end
self:nextc()
local expr = ""
local depth = 1
while true do
if self.chr == '\n' then
self:error('unexpected newline; incomplete expression')
elseif self.ord == self.EOF then
self:nextc()
self:error('unexpected EOF; incomplete expression')
elseif self.chr == '(' then
depth = depth + 1
self:nextc()
expr = expr..'('
elseif self.chr == ')' then
depth = depth - 1
self:nextc()
if depth == 0 then break end
expr = expr..')'
else
expr = expr..self.chr
self:nextc()
end
end
yield('EXPR', expr)
end
function Lexer:lex(_yield)
local function yield(tt, tok)
return _yield(tt, tok, self.fn, self.line)
@ -416,12 +384,6 @@ function Lexer:lex(_yield)
self:nextc()
yield('VAR', buff)
self:read_spaces()
if self.chr == '@' then
-- old syntax; nothing to do here
else
buff = self:read_chars('[^;\n]')
yield('EXPR', buff)
end
elseif self.chr == ']' then
self:error('unmatched closing bracket')
elseif self.chr == '(' then
@ -467,8 +429,6 @@ function Lexer:lex(_yield)
end
elseif self.chr:find('[01]') then
yield('NUM', self:read_binary())
elseif self.chr == '(' then
self:lex_expression(yield)
else
self:error('unknown % syntax')
end

View File

@ -4,7 +4,6 @@ local insert = table.insert
local path = string.gsub(..., "[^.]+$", "")
local Base = require(path.."Base")
local Expression = require(path.."Expression")
local util = require(path.."util")
local signs = util.signs
@ -110,14 +109,6 @@ function Preproc:check(s, i, tt)
end
function Preproc:evaluate(t)
if t.tt == 'EXPR' then
local result, err = self.expr:eval(t.tok)
if err then
self:error('failed to evaulate ('..t.tok..')', err)
end
t.tt = 'NUM'
t.tok = result
end
self:lookup(t)
end
@ -125,7 +116,6 @@ function Preproc:process(statements)
self.variables = {}
self.plus_labels = {} -- constructed forwards
self.minus_labels = {} -- constructed backwards
self.expr = Expression(self.variables)
-- first pass: resolve variables and collect relative labels
local new_statements = {}

View File

@ -18,7 +18,6 @@ end
TokenIter.arg_types = {
NUM = true,
EXPR = true,
REG = true,
VARSYM = true,
LABELSYM = true,
@ -157,7 +156,6 @@ end
function TokenIter:const(relative, no_label)
local good = {
NUM = true,
EXPR = true,
VARSYM = true,
LABELSYM = true,
}