mirror of
https://github.com/notwa/lips
synced 2025-03-09 19:32:49 -07:00
remove expressions
This commit is contained in:
parent
b97098a844
commit
ac1e52ab09
7 changed files with 7 additions and 441 deletions
39
NOTES.md
39
NOTES.md
|
@ -63,12 +63,11 @@ currently there is:
|
|||
dumps the statements table after tokenizing and collecting into statements.
|
||||
this is after UNARY and RELLABELSYM tokens have been disambiguated.
|
||||
.debug_pre (default false)
|
||||
dumps statements after basic preprocessing:
|
||||
variable substitution, expression parsing,
|
||||
relative label substitution, etc.
|
||||
dumps statements after basic preprocessing,
|
||||
e.g. variable substitution, relative label substitution, etc.
|
||||
.debug_post (default false)
|
||||
dumps statements after expanding preprocessor commands:
|
||||
pseudo-instructions, expression evaluation, etc.
|
||||
dumps statements after expanding preprocessor commands,
|
||||
e.g. pseudo-instructions.
|
||||
.debug_asm (default false)
|
||||
is arguably the least useful of states to dump in.
|
||||
this will dump statements after being reduced to
|
||||
|
@ -155,9 +154,6 @@ the appropriate files are placed and tokenized inline, not unlike in C.
|
|||
|
||||
the `HEX` directive is its own mini-language and thus has its own mini-lexer.
|
||||
|
||||
expressions are not parsed nor lexed here.
|
||||
they are simply extracted as whole strings for later processing.
|
||||
|
||||
the `yield` closure wraps around the `_yield` function argument
|
||||
to pass error-handling metadata: the current filename and line number.
|
||||
|
||||
|
@ -177,7 +173,7 @@ there's a couple TODOs and FIXMEs in here.
|
|||
|
||||
collects tokens into statements.
|
||||
statements are basically our form of an abstract syntax tree,
|
||||
except statements don't need the depth of a tree (outside of expressions)
|
||||
except statements don't need the depth of a tree
|
||||
so they're completely flat.
|
||||
|
||||
most of this is just validation of the lexed tokens.
|
||||
|
@ -202,7 +198,6 @@ preprocessing is split into two passes:
|
|||
### pass 1
|
||||
|
||||
resolves variables by substitution,
|
||||
parses and evaluates expressions,
|
||||
and collects relative labels.
|
||||
|
||||
this pass starts by creating a new, empty table of statements to fill.
|
||||
|
@ -211,8 +206,7 @@ statements are passed through, possibly modified, or read and left-out.
|
|||
the reason for the copying is that taking indexes into an array (statements)
|
||||
that you're removing elements from is A Bad Idea.
|
||||
|
||||
all expression tokens are evaluated,
|
||||
and all variable tokens are substituted.
|
||||
all variable tokens are substituted.
|
||||
|
||||
variable-declaring statements (`!VAR`) are read to a dictionary table
|
||||
for future substitution of their keys with values.
|
||||
|
@ -253,26 +247,7 @@ expansion is kinda messy.
|
|||
|
||||
## Expression
|
||||
|
||||
handles parsing and evaluation of simple (usually mathematical) expressions.
|
||||
|
||||
this class is actually completely independent of the rest of lips,
|
||||
besides the requirement of the `Base` class, which isn't specific to lips.
|
||||
|
||||
### room for improvement
|
||||
|
||||
right now, this is just a quick and dirty port of some
|
||||
C++ code i wrote a while back. so basically, everything could be improved.
|
||||
|
||||
bitwise operators need to be implemented.
|
||||
possibly with LuaJIT and a Lua 5.1 fallback.
|
||||
maybe that should be its own file?
|
||||
|
||||
in the long term,
|
||||
i'll need to move lexing expressions to the main Lexer class,
|
||||
and do proper parsing to an AST in Collector.
|
||||
this will unify the syntax,
|
||||
and allow for inline expressions, e.g:
|
||||
`lw s0, 5*4(sp)`.
|
||||
expressions are not implemented in this branch.
|
||||
|
||||
## Dumper
|
||||
|
||||
|
|
12
TODO
12
TODO
|
@ -20,22 +20,10 @@ add basic command-line interface (patch.lua)
|
|||
|
||||
improve writer performance (just copypaste what you did in patch.lua)
|
||||
|
||||
allow generation of shared object files (zelda overlays specifically)
|
||||
-> http://wiki.cloudmodding.com/oot/Overlays#Relocation_Entry_Format
|
||||
|
||||
macros:
|
||||
add 'em
|
||||
implement push/pop/jpop as macros
|
||||
be able to point to specific args of push/pop using variables
|
||||
|
||||
procrastinating the heck over:
|
||||
lex expressions in Lexer instead of its own separate lexer
|
||||
write tests for everything (try to focus on code paths and edge cases)
|
||||
make breaking changes to syntax for the sake of macros, expressions etc.
|
||||
(and keep a branch with the old syntax for good measure, feature-frozen)
|
||||
|
||||
low priority:
|
||||
improve parser terminology
|
||||
add delay slot warnings
|
||||
externally document more stuff like syntax
|
||||
add file-reading directives (e.g. for automatic hook injection macros)
|
||||
|
|
|
@ -244,10 +244,6 @@ function Dumper:load(statements)
|
|||
self.fn = s.fn
|
||||
self.line = s.line
|
||||
if s.type:sub(1, 1) == '!' then
|
||||
if s[1] and s[1].tt == 'EXPR' then
|
||||
self:error('unevaluated expression')
|
||||
end
|
||||
|
||||
if s.type == '!LABEL' then
|
||||
self.labels[s[1].tok] = self:pc()
|
||||
elseif s.type == '!DATA' then
|
||||
|
|
|
@ -1,341 +0,0 @@
|
|||
local insert = table.insert
|
||||
|
||||
local path = string.gsub(..., "[^.]+$", "")
|
||||
local Base = require(path.."Base")
|
||||
|
||||
local Expression = Base:extend()
|
||||
function Expression:init(variables)
|
||||
self.variables = variables or {}
|
||||
end
|
||||
|
||||
Expression.precedence = {
|
||||
-- python-ish precedence
|
||||
[","] = -1,
|
||||
["or"] = 0,
|
||||
["||"] = 0,
|
||||
["xor"] = 1,
|
||||
["and"] = 2,
|
||||
["&&"] = 2,
|
||||
["unary not"] = 3,
|
||||
["=="] = 5,
|
||||
["!="] = 5,
|
||||
["<"] = 5,
|
||||
[">"] = 5,
|
||||
["<="] = 5,
|
||||
[">="] = 5,
|
||||
["|"] = 10,
|
||||
["^"] = 11,
|
||||
["&"] = 12,
|
||||
["<<"] = 13,
|
||||
[">>"] = 13,
|
||||
["+"] = 20,
|
||||
["-"] = 20,
|
||||
["*"] = 21,
|
||||
["/"] = 21,
|
||||
["//"] = 21,
|
||||
["%"] = 21,
|
||||
["%%"] = 21,
|
||||
["unary !"] = 30,
|
||||
["unary ~"] = 30,
|
||||
["unary +"] = 30,
|
||||
["unary -"] = 30,
|
||||
-- note: precedence of 40 is hardcoded for right-left association
|
||||
-- TODO: also hardcode unary handling on right-hand side of operator
|
||||
["**"] = 40,
|
||||
}
|
||||
|
||||
Expression.unary_ops = {
|
||||
["not"] = function(a) return a == 0 end,
|
||||
["!"] = function(a) return a == 0 end,
|
||||
-- ["~"] = function(a) return F(~I(a)) end,
|
||||
["+"] = function(a) return a end,
|
||||
["-"] = function(a) return -a end,
|
||||
}
|
||||
|
||||
Expression.binary_ops = {
|
||||
[","] = function(a, b) return b end,
|
||||
["or"] = function(a, b) return a or b end,
|
||||
["||"] = function(a, b) return a or b end,
|
||||
["xor"] = function(a, b) return (a or b) and not (a and b) end,
|
||||
["and"] = function(a, b) return a and b end,
|
||||
["&&"] = function(a, b) return a and b end,
|
||||
["=="] = function(a, b) return a == b end,
|
||||
["!="] = function(a, b) return a ~= b end,
|
||||
["<"] = function(a, b) return a < b end,
|
||||
[">"] = function(a, b) return a > b end,
|
||||
["<="] = function(a, b) return a <= b end,
|
||||
[">="] = function(a, b) return a >= b end,
|
||||
-- ["|"] = function(a, b) return F(I(a) | I(b)) end,
|
||||
-- ["^"] = function(a, b) return F(I(a) ^ I(b)) end,
|
||||
-- ["&"] = function(a, b) return F(I(a) & I(b)) end,
|
||||
-- ["<<"] = function(a, b) return F(I(a) << I(b)) end,
|
||||
-- [">>"] = function(a, b) return F(I(a) >> I(b)) end,
|
||||
["+"] = function(a, b) return a + b end,
|
||||
["-"] = function(a, b) return a - b end,
|
||||
["*"] = function(a, b) return a * b end,
|
||||
["/"] = function(a, b) return a / b end,
|
||||
-- ["//"] = function(a, b) return trunc(a / trunc(b)) end,
|
||||
-- ["%"] = function(a, b) return fmod(a, b) end,
|
||||
-- ["%%"] = function(a, b) return trunc(fmod(a, trunc(b))) end,
|
||||
["**"] = function(a, b) return a^b end,
|
||||
}
|
||||
|
||||
local operators = {}
|
||||
local operators_maxlen = 0
|
||||
do
|
||||
-- reorder operators so we can match the longest strings first
|
||||
for k, v in pairs(Expression.precedence) do
|
||||
if operators[#k] == nil then
|
||||
operators[#k] = {}
|
||||
end
|
||||
local op = k:find('^unary ') and k:sub(#'unary ' + 1) or k
|
||||
insert(operators[#k], op)
|
||||
if #k > operators_maxlen then
|
||||
operators_maxlen = #k
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
local function match_operator(str)
|
||||
-- returns the operator at the beginning of a string, or nil
|
||||
for i=operators_maxlen, 1, -1 do
|
||||
if operators[i] ~= nil then
|
||||
local substr = str:sub(1, i)
|
||||
for _, op in ipairs(operators[i]) do
|
||||
if substr == op then
|
||||
return substr
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function Expression:lex1(str, tokens)
|
||||
local pos = 1
|
||||
local rest = str
|
||||
local function consume(n)
|
||||
pos = pos + n
|
||||
rest = rest:sub(n + 1)
|
||||
end
|
||||
|
||||
local considered = ''
|
||||
local function consider(pattern)
|
||||
local start, stop = rest:find('^'..pattern)
|
||||
if start == nil then
|
||||
considered = ''
|
||||
return false
|
||||
end
|
||||
considered = rest:sub(start, stop)
|
||||
return true
|
||||
end
|
||||
|
||||
local function consider_operator()
|
||||
local op = match_operator(rest)
|
||||
if op == nil then
|
||||
considered = ''
|
||||
return false
|
||||
end
|
||||
considered = op
|
||||
return true
|
||||
end
|
||||
|
||||
while pos <= #str do
|
||||
local old_pos = pos
|
||||
local here = " (#"..tostring(pos)..")"
|
||||
if consider(' +') then
|
||||
consume(#considered)
|
||||
elseif consider('[0-9.]') or consider('[%%$#]') then
|
||||
local num
|
||||
if consider('((0|[1-9][0-9]*)%.[0-9]*|%.[0-9]+)(e0|e[1-9][0-9]*)?') then
|
||||
num = tonumber(considered)
|
||||
elseif consider('(0|[1-9][0-9]*)e(0|[1-9][0-9]*)') then
|
||||
num = tonumber(considered)
|
||||
elseif consider('%%[0-9]+') then
|
||||
if considered:match('[2-9]') then
|
||||
return "bad binary number: "..considered..here
|
||||
end
|
||||
num = tonumber(considered:sub(2), 2)
|
||||
elseif consider('$[0-9A-Fa-f]+') then
|
||||
num = tonumber(considered:sub(2), 16)
|
||||
elseif consider('0x[0-9A-Fa-f]+') then
|
||||
num = tonumber(considered:sub(3), 16)
|
||||
elseif consider('0o[0-9]+') then
|
||||
if considered:match('[89]') then
|
||||
return "bad octal number: "..considered..here
|
||||
end
|
||||
num = tonumber(considered:sub(3), 8)
|
||||
elseif consider('0b[0-9]+') then
|
||||
if considered:match('[2-9]') then
|
||||
return "bad binary number: "..considered..here
|
||||
end
|
||||
num = tonumber(considered:sub(3), 2)
|
||||
elseif consider('0[0-9]+') then
|
||||
if considered:match('[89]') then
|
||||
return "bad octal number: "..considered..here
|
||||
end
|
||||
num = tonumber(considered:sub(2), 8)
|
||||
elseif consider('#[0-9]+') then
|
||||
num = tonumber(considered:sub(2))
|
||||
elseif consider('[0-9]+') then
|
||||
num = tonumber(considered)
|
||||
end
|
||||
if num == nil then
|
||||
return "invalid number"..here
|
||||
end
|
||||
insert(tokens, {type='number', value=num})
|
||||
consume(#considered)
|
||||
elseif consider('[(]') then
|
||||
insert(tokens, {type='opening', value=considered})
|
||||
consume(#considered)
|
||||
elseif consider('[)]') then
|
||||
insert(tokens, {type='closing', value=considered})
|
||||
consume(#considered)
|
||||
elseif consider_operator() then
|
||||
insert(tokens, {type='operator', value=considered})
|
||||
consume(#considered)
|
||||
elseif consider('[%w_]+') then
|
||||
local num = self.variables[considered]
|
||||
if num == nil then
|
||||
return 'undefined variable "'..considered..'"'
|
||||
end
|
||||
insert(tokens, {type='number', value=num})
|
||||
consume(#considered)
|
||||
else
|
||||
local chr = rest:sub(1, 1)
|
||||
return "unexpected character '"..chr.."'"..here
|
||||
end
|
||||
if pos == old_pos then
|
||||
error("Internal Error: expression parser is stuck")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
function Expression:lex2(tokens)
|
||||
-- detect unary operators
|
||||
-- TODO: this is probably not the best way to do this
|
||||
local was_numeric = false
|
||||
local was_closing = false
|
||||
for i, t in ipairs(tokens) do
|
||||
if t.type == "operator" and not was_numeric and not was_closing then
|
||||
t.type = "unary";
|
||||
end
|
||||
was_numeric = t.type == 'number'
|
||||
was_closing = t.type == 'closing'
|
||||
end
|
||||
end
|
||||
|
||||
function Expression:lex(str)
|
||||
local tokens = {}
|
||||
err = self:lex1(str, tokens)
|
||||
if err then return tokens, err end
|
||||
err = self:lex2(tokens)
|
||||
return tokens, err
|
||||
end
|
||||
|
||||
function Expression:shunt(tokens)
|
||||
-- shunting yard algorithm
|
||||
local shunted = {}
|
||||
local stack = {}
|
||||
|
||||
local operator_types = {
|
||||
unary = true,
|
||||
operator = true,
|
||||
}
|
||||
|
||||
for _, t in ipairs(tokens) do
|
||||
if t.type == 'number' then
|
||||
insert(shunted, t)
|
||||
elseif t.type == 'opening' then
|
||||
insert(stack, t)
|
||||
elseif t.type == 'closing' then
|
||||
while #stack > 0 and stack[#stack].type ~= 'opening' do
|
||||
insert(shunted, stack[#stack])
|
||||
stack[#stack] = nil
|
||||
end
|
||||
if #stack == 0 then return shunted, 'missing opening parenthesis' end
|
||||
stack[#stack] = nil
|
||||
elseif t.type == 'operator' or t.type == 'unary' then
|
||||
local fullname = t.type == 'unary' and 'unary '..t.value or t.value
|
||||
local pre = self.precedence[fullname]
|
||||
if pre == nil then return shunted, 'unknown operator' end
|
||||
if pre == 40 then pre = pre + 1 end -- right-associative hack
|
||||
while #stack > 0 do
|
||||
local tail = stack[#stack]
|
||||
if not operator_types[tail.type] then break end
|
||||
local dpre = pre - self.precedence[tail.value]
|
||||
if dpre > 0 then break end
|
||||
insert(shunted, tail)
|
||||
stack[#stack] = nil
|
||||
end
|
||||
insert(stack, t)
|
||||
else
|
||||
error('Internal Error: unknown type of expression token')
|
||||
end
|
||||
end
|
||||
|
||||
while #stack > 0 do
|
||||
local t = stack[#stack]
|
||||
if t.type == 'opening' then return shunted, 'missing closing parenthesis' end
|
||||
insert(shunted, t)
|
||||
stack[#stack] = nil
|
||||
end
|
||||
|
||||
return shunted, nil
|
||||
end
|
||||
|
||||
function Expression:parse(str)
|
||||
local tokens, err = self:lex(str)
|
||||
if err then return tokens, err end
|
||||
tokens, err = self:shunt(tokens)
|
||||
--for i, v in ipairs(tokens) do print(i, v.type, v.value) end
|
||||
return tokens, err
|
||||
end
|
||||
|
||||
function Expression:eval(tokens_or_str)
|
||||
local tokens, err
|
||||
if type(tokens_or_str) == 'string' then
|
||||
tokens, err = self:parse(tokens_or_str)
|
||||
if err then return 0, err end
|
||||
elseif type(tokens_or_str) == 'table' then
|
||||
tokens = tokens_or_str
|
||||
else
|
||||
return 0, "eval(): argument is neither token table nor string"
|
||||
end
|
||||
|
||||
local stack = {}
|
||||
local popped
|
||||
local function pop()
|
||||
if #stack == 0 then return true end
|
||||
popped = stack[#stack]
|
||||
stack[#stack] = nil
|
||||
return false
|
||||
end
|
||||
|
||||
for i, t in ipairs(tokens) do
|
||||
if t.type == 'number' then
|
||||
insert(stack, t.value)
|
||||
elseif t.type == 'unary' then
|
||||
if pop() then return 0, "missing arguments for unary" end
|
||||
local f = self.unary_ops[t.value]
|
||||
if f == nil then return 0, "unknown unary" end
|
||||
insert(stack, f(popped))
|
||||
elseif t.type == 'operator' then
|
||||
if pop() then return 0, "missing arguments for operator" end
|
||||
local b = popped
|
||||
if pop() then return 0, "missing arguments for operator" end
|
||||
local a = popped
|
||||
local f = self.binary_ops[t.value]
|
||||
if f == nil then return 0, "unknown operator" end
|
||||
insert(stack, f(a, b))
|
||||
else
|
||||
return 0, "eval(): unknown token"
|
||||
end
|
||||
end
|
||||
|
||||
if #stack > 1 then return 0, "too many arguments" end
|
||||
if #stack == 0 then return 0, "no arguments" end
|
||||
|
||||
return stack[1], nil
|
||||
end
|
||||
|
||||
return Expression
|
|
@ -341,38 +341,6 @@ function Lexer:lex_include_binary(_yield)
|
|||
_yield('EOF', self.EOF, self.fn, self.line)
|
||||
end
|
||||
|
||||
function Lexer:lex_expression(yield)
|
||||
if self.chr ~= '(' then
|
||||
self:error('expected opening parenthesis for expression')
|
||||
end
|
||||
self:nextc()
|
||||
|
||||
local expr = ""
|
||||
local depth = 1
|
||||
while true do
|
||||
if self.chr == '\n' then
|
||||
self:error('unexpected newline; incomplete expression')
|
||||
elseif self.ord == self.EOF then
|
||||
self:nextc()
|
||||
self:error('unexpected EOF; incomplete expression')
|
||||
elseif self.chr == '(' then
|
||||
depth = depth + 1
|
||||
self:nextc()
|
||||
expr = expr..'('
|
||||
elseif self.chr == ')' then
|
||||
depth = depth - 1
|
||||
self:nextc()
|
||||
if depth == 0 then break end
|
||||
expr = expr..')'
|
||||
else
|
||||
expr = expr..self.chr
|
||||
self:nextc()
|
||||
end
|
||||
end
|
||||
|
||||
yield('EXPR', expr)
|
||||
end
|
||||
|
||||
function Lexer:lex(_yield)
|
||||
local function yield(tt, tok)
|
||||
return _yield(tt, tok, self.fn, self.line)
|
||||
|
@ -416,12 +384,6 @@ function Lexer:lex(_yield)
|
|||
self:nextc()
|
||||
yield('VAR', buff)
|
||||
self:read_spaces()
|
||||
if self.chr == '@' then
|
||||
-- old syntax; nothing to do here
|
||||
else
|
||||
buff = self:read_chars('[^;\n]')
|
||||
yield('EXPR', buff)
|
||||
end
|
||||
elseif self.chr == ']' then
|
||||
self:error('unmatched closing bracket')
|
||||
elseif self.chr == '(' then
|
||||
|
@ -467,8 +429,6 @@ function Lexer:lex(_yield)
|
|||
end
|
||||
elseif self.chr:find('[01]') then
|
||||
yield('NUM', self:read_binary())
|
||||
elseif self.chr == '(' then
|
||||
self:lex_expression(yield)
|
||||
else
|
||||
self:error('unknown % syntax')
|
||||
end
|
||||
|
|
|
@ -4,7 +4,6 @@ local insert = table.insert
|
|||
|
||||
local path = string.gsub(..., "[^.]+$", "")
|
||||
local Base = require(path.."Base")
|
||||
local Expression = require(path.."Expression")
|
||||
local util = require(path.."util")
|
||||
|
||||
local signs = util.signs
|
||||
|
@ -110,14 +109,6 @@ function Preproc:check(s, i, tt)
|
|||
end
|
||||
|
||||
function Preproc:evaluate(t)
|
||||
if t.tt == 'EXPR' then
|
||||
local result, err = self.expr:eval(t.tok)
|
||||
if err then
|
||||
self:error('failed to evaulate ('..t.tok..')', err)
|
||||
end
|
||||
t.tt = 'NUM'
|
||||
t.tok = result
|
||||
end
|
||||
self:lookup(t)
|
||||
end
|
||||
|
||||
|
@ -125,7 +116,6 @@ function Preproc:process(statements)
|
|||
self.variables = {}
|
||||
self.plus_labels = {} -- constructed forwards
|
||||
self.minus_labels = {} -- constructed backwards
|
||||
self.expr = Expression(self.variables)
|
||||
|
||||
-- first pass: resolve variables and collect relative labels
|
||||
local new_statements = {}
|
||||
|
|
|
@ -18,7 +18,6 @@ end
|
|||
|
||||
TokenIter.arg_types = {
|
||||
NUM = true,
|
||||
EXPR = true,
|
||||
REG = true,
|
||||
VARSYM = true,
|
||||
LABELSYM = true,
|
||||
|
@ -157,7 +156,6 @@ end
|
|||
function TokenIter:const(relative, no_label)
|
||||
local good = {
|
||||
NUM = true,
|
||||
EXPR = true,
|
||||
VARSYM = true,
|
||||
LABELSYM = true,
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue