diff --git a/NOTES.md b/NOTES.md index 12d3306..42c0bae 100644 --- a/NOTES.md +++ b/NOTES.md @@ -63,12 +63,11 @@ currently there is: dumps the statements table after tokenizing and collecting into statements. this is after UNARY and RELLABELSYM tokens have been disambiguated. .debug_pre (default false) - dumps statements after basic preprocessing: - variable substitution, expression parsing, - relative label substitution, etc. + dumps statements after basic preprocessing, + e.g. variable substitution, relative label substitution, etc. .debug_post (default false) - dumps statements after expanding preprocessor commands: - pseudo-instructions, expression evaluation, etc. + dumps statements after expanding preprocessor commands, + e.g. pseudo-instructions. .debug_asm (default false) is arguably the least useful of states to dump in. this will dump statements after being reduced to @@ -155,9 +154,6 @@ the appropriate files are placed and tokenized inline, not unlike in C. the `HEX` directive is its own mini-language and thus has its own mini-lexer. -expressions are not parsed nor lexed here. -they are simply extracted as whole strings for later processing. - the `yield` closure wraps around the `_yield` function argument to pass error-handling metadata: the current filename and line number. @@ -177,7 +173,7 @@ there's a couple TODOs and FIXMEs in here. collects tokens into statements. statements are basically our form of an abstract syntax tree, -except statements don't need the depth of a tree (outside of expressions) +except statements don't need the depth of a tree so they're completely flat. most of this is just validation of the lexed tokens. @@ -202,7 +198,6 @@ preprocessing is split into two passes: ### pass 1 resolves variables by substitution, -parses and evaluates expressions, and collects relative labels. this pass starts by creating a new, empty table of statements to fill. @@ -211,8 +206,7 @@ statements are passed through, possibly modified, or read and left-out. the reason for the copying is that taking indexes into an array (statements) that you're removing elements from is A Bad Idea. -all expression tokens are evaluated, -and all variable tokens are substituted. +all variable tokens are substituted. variable-declaring statements (`!VAR`) are read to a dictionary table for future substitution of their keys with values. @@ -253,26 +247,7 @@ expansion is kinda messy. ## Expression -handles parsing and evaluation of simple (usually mathematical) expressions. - -this class is actually completely independent of the rest of lips, -besides the requirement of the `Base` class, which isn't specific to lips. - -### room for improvement - -right now, this is just a quick and dirty port of some -C++ code i wrote a while back. so basically, everything could be improved. - -bitwise operators need to be implemented. -possibly with LuaJIT and a Lua 5.1 fallback. -maybe that should be its own file? - -in the long term, -i'll need to move lexing expressions to the main Lexer class, -and do proper parsing to an AST in Collector. -this will unify the syntax, -and allow for inline expressions, e.g: -`lw s0, 5*4(sp)`. +expressions are not implemented in this branch. ## Dumper diff --git a/TODO b/TODO index c91cf90..cc4ef05 100644 --- a/TODO +++ b/TODO @@ -20,22 +20,10 @@ add basic command-line interface (patch.lua) improve writer performance (just copypaste what you did in patch.lua) -allow generation of shared object files (zelda overlays specifically) - -> http://wiki.cloudmodding.com/oot/Overlays#Relocation_Entry_Format - -macros: - add 'em - implement push/pop/jpop as macros - be able to point to specific args of push/pop using variables - procrastinating the heck over: - lex expressions in Lexer instead of its own separate lexer write tests for everything (try to focus on code paths and edge cases) - make breaking changes to syntax for the sake of macros, expressions etc. - (and keep a branch with the old syntax for good measure, feature-frozen) low priority: improve parser terminology add delay slot warnings externally document more stuff like syntax - add file-reading directives (e.g. for automatic hook injection macros) diff --git a/lips/Dumper.lua b/lips/Dumper.lua index 7a69553..82dd7ff 100644 --- a/lips/Dumper.lua +++ b/lips/Dumper.lua @@ -244,10 +244,6 @@ function Dumper:load(statements) self.fn = s.fn self.line = s.line if s.type:sub(1, 1) == '!' then - if s[1] and s[1].tt == 'EXPR' then - self:error('unevaluated expression') - end - if s.type == '!LABEL' then self.labels[s[1].tok] = self:pc() elseif s.type == '!DATA' then diff --git a/lips/Expression.lua b/lips/Expression.lua deleted file mode 100644 index 5634c3a..0000000 --- a/lips/Expression.lua +++ /dev/null @@ -1,341 +0,0 @@ -local insert = table.insert - -local path = string.gsub(..., "[^.]+$", "") -local Base = require(path.."Base") - -local Expression = Base:extend() -function Expression:init(variables) - self.variables = variables or {} -end - -Expression.precedence = { - -- python-ish precedence - [","] = -1, - ["or"] = 0, - ["||"] = 0, - ["xor"] = 1, - ["and"] = 2, - ["&&"] = 2, - ["unary not"] = 3, - ["=="] = 5, - ["!="] = 5, - ["<"] = 5, - [">"] = 5, - ["<="] = 5, - [">="] = 5, - ["|"] = 10, - ["^"] = 11, - ["&"] = 12, - ["<<"] = 13, - [">>"] = 13, - ["+"] = 20, - ["-"] = 20, - ["*"] = 21, - ["/"] = 21, - ["//"] = 21, - ["%"] = 21, - ["%%"] = 21, - ["unary !"] = 30, - ["unary ~"] = 30, - ["unary +"] = 30, - ["unary -"] = 30, - -- note: precedence of 40 is hardcoded for right-left association - -- TODO: also hardcode unary handling on right-hand side of operator - ["**"] = 40, -} - -Expression.unary_ops = { - ["not"] = function(a) return a == 0 end, - ["!"] = function(a) return a == 0 end, --- ["~"] = function(a) return F(~I(a)) end, - ["+"] = function(a) return a end, - ["-"] = function(a) return -a end, -} - -Expression.binary_ops = { - [","] = function(a, b) return b end, - ["or"] = function(a, b) return a or b end, - ["||"] = function(a, b) return a or b end, - ["xor"] = function(a, b) return (a or b) and not (a and b) end, - ["and"] = function(a, b) return a and b end, - ["&&"] = function(a, b) return a and b end, - ["=="] = function(a, b) return a == b end, - ["!="] = function(a, b) return a ~= b end, - ["<"] = function(a, b) return a < b end, - [">"] = function(a, b) return a > b end, - ["<="] = function(a, b) return a <= b end, - [">="] = function(a, b) return a >= b end, --- ["|"] = function(a, b) return F(I(a) | I(b)) end, --- ["^"] = function(a, b) return F(I(a) ^ I(b)) end, --- ["&"] = function(a, b) return F(I(a) & I(b)) end, --- ["<<"] = function(a, b) return F(I(a) << I(b)) end, --- [">>"] = function(a, b) return F(I(a) >> I(b)) end, - ["+"] = function(a, b) return a + b end, - ["-"] = function(a, b) return a - b end, - ["*"] = function(a, b) return a * b end, - ["/"] = function(a, b) return a / b end, --- ["//"] = function(a, b) return trunc(a / trunc(b)) end, --- ["%"] = function(a, b) return fmod(a, b) end, --- ["%%"] = function(a, b) return trunc(fmod(a, trunc(b))) end, - ["**"] = function(a, b) return a^b end, -} - -local operators = {} -local operators_maxlen = 0 -do - -- reorder operators so we can match the longest strings first - for k, v in pairs(Expression.precedence) do - if operators[#k] == nil then - operators[#k] = {} - end - local op = k:find('^unary ') and k:sub(#'unary ' + 1) or k - insert(operators[#k], op) - if #k > operators_maxlen then - operators_maxlen = #k - end - end -end - -local function match_operator(str) - -- returns the operator at the beginning of a string, or nil - for i=operators_maxlen, 1, -1 do - if operators[i] ~= nil then - local substr = str:sub(1, i) - for _, op in ipairs(operators[i]) do - if substr == op then - return substr - end - end - end - end -end - -function Expression:lex1(str, tokens) - local pos = 1 - local rest = str - local function consume(n) - pos = pos + n - rest = rest:sub(n + 1) - end - - local considered = '' - local function consider(pattern) - local start, stop = rest:find('^'..pattern) - if start == nil then - considered = '' - return false - end - considered = rest:sub(start, stop) - return true - end - - local function consider_operator() - local op = match_operator(rest) - if op == nil then - considered = '' - return false - end - considered = op - return true - end - - while pos <= #str do - local old_pos = pos - local here = " (#"..tostring(pos)..")" - if consider(' +') then - consume(#considered) - elseif consider('[0-9.]') or consider('[%%$#]') then - local num - if consider('((0|[1-9][0-9]*)%.[0-9]*|%.[0-9]+)(e0|e[1-9][0-9]*)?') then - num = tonumber(considered) - elseif consider('(0|[1-9][0-9]*)e(0|[1-9][0-9]*)') then - num = tonumber(considered) - elseif consider('%%[0-9]+') then - if considered:match('[2-9]') then - return "bad binary number: "..considered..here - end - num = tonumber(considered:sub(2), 2) - elseif consider('$[0-9A-Fa-f]+') then - num = tonumber(considered:sub(2), 16) - elseif consider('0x[0-9A-Fa-f]+') then - num = tonumber(considered:sub(3), 16) - elseif consider('0o[0-9]+') then - if considered:match('[89]') then - return "bad octal number: "..considered..here - end - num = tonumber(considered:sub(3), 8) - elseif consider('0b[0-9]+') then - if considered:match('[2-9]') then - return "bad binary number: "..considered..here - end - num = tonumber(considered:sub(3), 2) - elseif consider('0[0-9]+') then - if considered:match('[89]') then - return "bad octal number: "..considered..here - end - num = tonumber(considered:sub(2), 8) - elseif consider('#[0-9]+') then - num = tonumber(considered:sub(2)) - elseif consider('[0-9]+') then - num = tonumber(considered) - end - if num == nil then - return "invalid number"..here - end - insert(tokens, {type='number', value=num}) - consume(#considered) - elseif consider('[(]') then - insert(tokens, {type='opening', value=considered}) - consume(#considered) - elseif consider('[)]') then - insert(tokens, {type='closing', value=considered}) - consume(#considered) - elseif consider_operator() then - insert(tokens, {type='operator', value=considered}) - consume(#considered) - elseif consider('[%w_]+') then - local num = self.variables[considered] - if num == nil then - return 'undefined variable "'..considered..'"' - end - insert(tokens, {type='number', value=num}) - consume(#considered) - else - local chr = rest:sub(1, 1) - return "unexpected character '"..chr.."'"..here - end - if pos == old_pos then - error("Internal Error: expression parser is stuck") - end - end -end - -function Expression:lex2(tokens) - -- detect unary operators - -- TODO: this is probably not the best way to do this - local was_numeric = false - local was_closing = false - for i, t in ipairs(tokens) do - if t.type == "operator" and not was_numeric and not was_closing then - t.type = "unary"; - end - was_numeric = t.type == 'number' - was_closing = t.type == 'closing' - end -end - -function Expression:lex(str) - local tokens = {} - err = self:lex1(str, tokens) - if err then return tokens, err end - err = self:lex2(tokens) - return tokens, err -end - -function Expression:shunt(tokens) - -- shunting yard algorithm - local shunted = {} - local stack = {} - - local operator_types = { - unary = true, - operator = true, - } - - for _, t in ipairs(tokens) do - if t.type == 'number' then - insert(shunted, t) - elseif t.type == 'opening' then - insert(stack, t) - elseif t.type == 'closing' then - while #stack > 0 and stack[#stack].type ~= 'opening' do - insert(shunted, stack[#stack]) - stack[#stack] = nil - end - if #stack == 0 then return shunted, 'missing opening parenthesis' end - stack[#stack] = nil - elseif t.type == 'operator' or t.type == 'unary' then - local fullname = t.type == 'unary' and 'unary '..t.value or t.value - local pre = self.precedence[fullname] - if pre == nil then return shunted, 'unknown operator' end - if pre == 40 then pre = pre + 1 end -- right-associative hack - while #stack > 0 do - local tail = stack[#stack] - if not operator_types[tail.type] then break end - local dpre = pre - self.precedence[tail.value] - if dpre > 0 then break end - insert(shunted, tail) - stack[#stack] = nil - end - insert(stack, t) - else - error('Internal Error: unknown type of expression token') - end - end - - while #stack > 0 do - local t = stack[#stack] - if t.type == 'opening' then return shunted, 'missing closing parenthesis' end - insert(shunted, t) - stack[#stack] = nil - end - - return shunted, nil -end - -function Expression:parse(str) - local tokens, err = self:lex(str) - if err then return tokens, err end - tokens, err = self:shunt(tokens) - --for i, v in ipairs(tokens) do print(i, v.type, v.value) end - return tokens, err -end - -function Expression:eval(tokens_or_str) - local tokens, err - if type(tokens_or_str) == 'string' then - tokens, err = self:parse(tokens_or_str) - if err then return 0, err end - elseif type(tokens_or_str) == 'table' then - tokens = tokens_or_str - else - return 0, "eval(): argument is neither token table nor string" - end - - local stack = {} - local popped - local function pop() - if #stack == 0 then return true end - popped = stack[#stack] - stack[#stack] = nil - return false - end - - for i, t in ipairs(tokens) do - if t.type == 'number' then - insert(stack, t.value) - elseif t.type == 'unary' then - if pop() then return 0, "missing arguments for unary" end - local f = self.unary_ops[t.value] - if f == nil then return 0, "unknown unary" end - insert(stack, f(popped)) - elseif t.type == 'operator' then - if pop() then return 0, "missing arguments for operator" end - local b = popped - if pop() then return 0, "missing arguments for operator" end - local a = popped - local f = self.binary_ops[t.value] - if f == nil then return 0, "unknown operator" end - insert(stack, f(a, b)) - else - return 0, "eval(): unknown token" - end - end - - if #stack > 1 then return 0, "too many arguments" end - if #stack == 0 then return 0, "no arguments" end - - return stack[1], nil -end - -return Expression diff --git a/lips/Lexer.lua b/lips/Lexer.lua index a8617a6..a4f3cca 100644 --- a/lips/Lexer.lua +++ b/lips/Lexer.lua @@ -341,38 +341,6 @@ function Lexer:lex_include_binary(_yield) _yield('EOF', self.EOF, self.fn, self.line) end -function Lexer:lex_expression(yield) - if self.chr ~= '(' then - self:error('expected opening parenthesis for expression') - end - self:nextc() - - local expr = "" - local depth = 1 - while true do - if self.chr == '\n' then - self:error('unexpected newline; incomplete expression') - elseif self.ord == self.EOF then - self:nextc() - self:error('unexpected EOF; incomplete expression') - elseif self.chr == '(' then - depth = depth + 1 - self:nextc() - expr = expr..'(' - elseif self.chr == ')' then - depth = depth - 1 - self:nextc() - if depth == 0 then break end - expr = expr..')' - else - expr = expr..self.chr - self:nextc() - end - end - - yield('EXPR', expr) -end - function Lexer:lex(_yield) local function yield(tt, tok) return _yield(tt, tok, self.fn, self.line) @@ -416,12 +384,6 @@ function Lexer:lex(_yield) self:nextc() yield('VAR', buff) self:read_spaces() - if self.chr == '@' then - -- old syntax; nothing to do here - else - buff = self:read_chars('[^;\n]') - yield('EXPR', buff) - end elseif self.chr == ']' then self:error('unmatched closing bracket') elseif self.chr == '(' then @@ -467,8 +429,6 @@ function Lexer:lex(_yield) end elseif self.chr:find('[01]') then yield('NUM', self:read_binary()) - elseif self.chr == '(' then - self:lex_expression(yield) else self:error('unknown % syntax') end diff --git a/lips/Preproc.lua b/lips/Preproc.lua index 8c4c515..ed55066 100644 --- a/lips/Preproc.lua +++ b/lips/Preproc.lua @@ -4,7 +4,6 @@ local insert = table.insert local path = string.gsub(..., "[^.]+$", "") local Base = require(path.."Base") -local Expression = require(path.."Expression") local util = require(path.."util") local signs = util.signs @@ -110,14 +109,6 @@ function Preproc:check(s, i, tt) end function Preproc:evaluate(t) - if t.tt == 'EXPR' then - local result, err = self.expr:eval(t.tok) - if err then - self:error('failed to evaulate ('..t.tok..')', err) - end - t.tt = 'NUM' - t.tok = result - end self:lookup(t) end @@ -125,7 +116,6 @@ function Preproc:process(statements) self.variables = {} self.plus_labels = {} -- constructed forwards self.minus_labels = {} -- constructed backwards - self.expr = Expression(self.variables) -- first pass: resolve variables and collect relative labels local new_statements = {} diff --git a/lips/TokenIter.lua b/lips/TokenIter.lua index a63032f..67c8ede 100644 --- a/lips/TokenIter.lua +++ b/lips/TokenIter.lua @@ -18,7 +18,6 @@ end TokenIter.arg_types = { NUM = true, - EXPR = true, REG = true, VARSYM = true, LABELSYM = true, @@ -157,7 +156,6 @@ end function TokenIter:const(relative, no_label) local good = { NUM = true, - EXPR = true, VARSYM = true, LABELSYM = true, }