From cdc0f8edb2ef7c5cf9f0399d26b4a37a2a06dfee Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Fri, 14 Oct 2016 09:19:25 -0700 Subject: [PATCH] add a barebones expression parser at the moment, this probably only works in directives. some of the operators are still unimplemented, and the errors are poor. there will be support for accessing variables in the future. --- lips/Dumper.lua | 7 + lips/Expression.lua | 310 ++++++++++++++++++++++++++++++++++++++++++++ lips/Lexer.lua | 34 +++++ lips/Muncher.lua | 8 +- lips/Preproc.lua | 21 ++- 5 files changed, 378 insertions(+), 2 deletions(-) create mode 100644 lips/Expression.lua diff --git a/lips/Dumper.lua b/lips/Dumper.lua index 0347de6..195c9ba 100644 --- a/lips/Dumper.lua +++ b/lips/Dumper.lua @@ -243,6 +243,10 @@ function Dumper:load(statements) self.fn = s.fn self.line = s.line if s.type:sub(1, 1) == '!' then + if s[1] and s[1].tt == 'EXPR' then + self:error('unevaluated expression') + end + if s.type == '!LABEL' then self.labels[s[1].tok] = self:pc() elseif s.type == '!DATA' then @@ -255,6 +259,7 @@ function Dumper:load(statements) elseif s.type == '!BASE' then self.base = s[1].tok insert(new_statements, s) + elseif s.type == '!PUSH' or s.type == '!POP' then local thistype = s.type:sub(2):lower() for i, t in ipairs(s) do @@ -300,6 +305,7 @@ function Dumper:load(statements) insert(new_statements, s) end end + elseif s.type == '!ALIGN' or s.type == '!SKIP' then local length, content if s.type == '!ALIGN' then @@ -332,6 +338,7 @@ function Dumper:load(statements) else -- length is 0, noop end + else error('Internal Error: unknown statement, got '..s.type) end diff --git a/lips/Expression.lua b/lips/Expression.lua new file mode 100644 index 0000000..4d4cfcb --- /dev/null +++ b/lips/Expression.lua @@ -0,0 +1,310 @@ +local insert = table.insert + +local path = string.gsub(..., "[^.]+$", "") +local Base = require(path.."Base") + +local Expression = Base:extend() + +Expression.precedence = { + -- python-ish precedence + [","] = -1, + ["or"] = 0, + ["||"] = 0, + ["xor"] = 1, + ["and"] = 2, + ["&&"] = 2, + ["unary not"] = 3, + ["=="] = 5, + ["!="] = 5, + ["<"] = 5, + [">"] = 5, + ["<="] = 5, + [">="] = 5, + ["|"] = 10, + ["^"] = 11, + ["&"] = 12, + ["<<"] = 13, + [">>"] = 13, + ["+"] = 20, + ["-"] = 20, + ["*"] = 21, + ["/"] = 21, + ["//"] = 21, + ["%"] = 21, + ["%%"] = 21, + ["unary !"] = 30, + ["unary ~"] = 30, + ["unary +"] = 30, + ["unary -"] = 30, + -- note: precedence of 40 is hardcoded for right-left association + -- TODO: also hardcode unary handling on right-hand side of operator + ["**"] = 40, +} + +Expression.unary_ops = { + ["not"] = function(a) return a == 0 end, + ["!"] = function(a) return a == 0 end, +-- ["~"] = function(a) return F(~I(a)) end, + ["+"] = function(a) return a end, + ["-"] = function(a) return -a end, +} + +Expression.binary_ops = { + [","] = function(a, b) return b end, + ["or"] = function(a, b) return a or b end, + ["||"] = function(a, b) return a or b end, + ["xor"] = function(a, b) return (a or b) and not (a and b) end, + ["and"] = function(a, b) return a and b end, + ["&&"] = function(a, b) return a and b end, + ["=="] = function(a, b) return a == b end, + ["!="] = function(a, b) return a ~= b end, + ["<"] = function(a, b) return a < b end, + [">"] = function(a, b) return a > b end, + ["<="] = function(a, b) return a <= b end, + [">="] = function(a, b) return a >= b end, +-- ["|"] = function(a, b) return F(I(a) | I(b)) end, +-- ["^"] = function(a, b) return F(I(a) ^ I(b)) end, +-- ["&"] = function(a, b) return F(I(a) & I(b)) end, +-- ["<<"] = function(a, b) return F(I(a) << I(b)) end, +-- [">>"] = function(a, b) return F(I(a) >> I(b)) end, + ["+"] = function(a, b) return a + b end, + ["-"] = function(a, b) return a - b end, + ["*"] = function(a, b) return a * b end, + ["/"] = function(a, b) return a / b end, +-- ["//"] = function(a, b) return trunc(a / trunc(b)) end, +-- ["%"] = function(a, b) return fmod(a, b) end, +-- ["%%"] = function(a, b) return trunc(fmod(a, trunc(b))) end, + ["**"] = function(a, b) return a^b end, +} + +local operators = {} +local operators_maxlen = 0 +do + for k, v in pairs(Expression.precedence) do + if operators[#k] == nil then + operators[#k] = {} + end + local op = k:find('^unary ') and k:sub(#'unary ' + 1) or k + insert(operators[#k], op) + if #k > operators_maxlen then + operators_maxlen = #k + end + end +end + +local function match_operator(str) + -- returns the operator at the beginning of a string, or nil + for i=operators_maxlen, 1, -1 do + if operators[i] ~= nil then + local substr = str:sub(1, i) + for _, op in ipairs(operators[i]) do + if substr == op then + return substr + end + end + end + end +end + +function Expression:lex1(str, tokens) + local pos = 1 + local rest = str + local function consume(n) + pos = pos + n + rest = rest:sub(n + 1) + end + + local considered = '' + local function consider(pattern) + local start, stop = rest:find('^'..pattern) + if start == nil then + considered = '' + return false + end + considered = rest:sub(start, stop) + return true + end + + local function consider_operator() + local op = match_operator(rest) + if op == nil then + considered = '' + return false + end + considered = op + return true + end + + while pos <= #str do + local old_pos = pos + local here = " (#"..tostring(pos)..")" + if consider(' +') then + consume(#considered) + elseif consider('[0-9.]') then + local num + if consider('((0|[1-9][0-9]*)%.[0-9]*|%.[0-9]+)(e0|e[1-9][0-9]*)?') then + num = tonumber(considered) + elseif consider('(0|[1-9][0-9]*)e(0|[1-9][0-9]*)') then + num = tonumber(considered) + elseif consider('[0-1]+b') then + num = tonumber(considered, 2) + elseif consider('0x[0-9A-Fa-f]+') then + num = tonumber(considered, 16) + elseif consider('0[0-7]+') then + num = tonumber(considered, 8) + elseif consider('[1-9][0-9]*') then + num = tonumber(considered) + end + if num == nil then + return "invalid number"..here + end + insert(tokens, {type='number', value=num}) + consume(#considered) + elseif consider('[(]') then + insert(tokens, {type='opening', value=considered}) + consume(#considered) + elseif consider('[)]') then + insert(tokens, {type='closing', value=considered}) + consume(#considered) + elseif consider_operator() then + insert(tokens, {type='operator', value=considered}) + consume(#considered) + else + local chr = rest:sub(1, 1) + return "unexpected character '"..chr.."'"..here + end + if pos == old_pos then + error("Internal Error: expression parser is stuck") + end + end +end + +function Expression:lex2(tokens) + -- detect unary operators + -- TODO: this is probably not the best way to do this + local was_numeric = false + local was_closing = false + for i, t in ipairs(tokens) do + if t.type == "operator" and not was_numeric and not was_closing then + t.type = "unary"; + end + was_numeric = t.type == 'number' + was_closing = t.type == 'closing' + end +end + +function Expression:lex(str) + local tokens = {} + err = self:lex1(str, tokens) + if err then return tokens, err end + err = self:lex2(tokens) + return tokens, err +end + +function Expression:shunt(tokens) + -- shunting yard algorithm + local shunted = {} + local stack = {} + + local operator_types = { + unary = true, + operator = true, + } + + for _, t in ipairs(tokens) do + if t.type == 'number' then + insert(shunted, t) + elseif t.type == 'opening' then + insert(stack, t) + elseif t.type == 'closing' then + while #stack > 0 and stack[#stack].type ~= 'opening' do + insert(shunted, stack[#stack]) + stack[#stack] = nil + end + if #stack == 0 then return shunted, 'missing opening parenthesis' end + stack[#stack] = nil + elseif t.type == 'operator' or t.type == 'unary' then + local fullname = t.type == 'unary' and 'unary '..t.value or t.value + local pre = self.precedence[fullname] + if pre == nil then return shunted, 'unknown operator' end + if pre == 40 then pre = pre + 1 end -- right-associative hack + while #stack > 0 do + local tail = stack[#stack] + if not operator_types[tail.type] then break end + local dpre = pre - self.precedence[tail.value] + if dpre > 0 then break end + insert(shunted, tail) + stack[#stack] = nil + end + insert(stack, t) + else + error('Internal Error: unknown type of expression token') + end + end + + while #stack > 0 do + local t = stack[#stack] + if t.type == 'opening' then return shunted, 'missing closing parenthesis' end + insert(shunted, t) + stack[#stack] = nil + end + + return shunted, nil +end + +function Expression:parse(str) + local tokens, err = self:lex(str) + if err then return tokens, err end + tokens, err = self:shunt(tokens) + --for i, v in ipairs(tokens) do print(i, v.type, v.value) end + return tokens, err +end + +function Expression:eval(tokens_or_str) + local tokens, err + if type(tokens_or_str) == 'string' then + tokens, err = self:parse(tokens_or_str) + if err then return 0, err end + elseif type(tokens_or_str) == 'table' then + tokens = tokens_or_str + else + return 0, "eval(): argument is neither token table nor string" + end + + local stack = {} + local popped + local function pop() + if #stack == 0 then return true end + popped = stack[#stack] + stack[#stack] = nil + return false + end + + for i, t in ipairs(tokens) do + if t.type == 'number' then + insert(stack, t.value) + elseif t.type == 'unary' then + if pop() then return 0, "missing arguments for unary" end + local f = self.unary_ops[t.value] + if f == nil then return 0, "unknown unary" end + insert(stack, f(popped)) + elseif t.type == 'operator' then + if pop() then return 0, "missing arguments for operator" end + local b = popped + if pop() then return 0, "missing arguments for operator" end + local a = popped + local f = self.binary_ops[t.value] + if f == nil then return 0, "unknown operator" end + insert(stack, f(a, b)) + else + return 0, "eval(): unknown token" + end + end + + if #stack > 1 then return 0, "too many arguments" end + if #stack == 0 then return 0, "no arguments" end + + return stack[1], nil +end + +return Expression diff --git a/lips/Lexer.lua b/lips/Lexer.lua index 91c8fb8..fe239e1 100644 --- a/lips/Lexer.lua +++ b/lips/Lexer.lua @@ -329,6 +329,38 @@ function Lexer:lex_include_binary(_yield) end end +function Lexer:lex_expression(yield) + if self.chr ~= '(' then + self:error('expected opening parenthesis for expression') + end + self:nextc() + + local expr = "" + local depth = 1 + while true do + if self.chr == '\n' then + self:error('unexpected newline; incomplete expression') + elseif self.ord == self.EOF then + self:nextc() + self:error('unexpected EOF; incomplete expression') + elseif self.chr == '(' then + depth = depth + 1 + self:nextc() + expr = expr..'(' + elseif self.chr == ')' then + depth = depth - 1 + self:nextc() + if depth == 0 then break end + expr = expr..')' + else + expr = expr..self.chr + self:nextc() + end + end + + yield('EXPR', expr) +end + function Lexer:lex(_yield) local function yield(tt, tok) return _yield(tt, tok, self.fn, self.line) @@ -410,6 +442,8 @@ function Lexer:lex(_yield) end elseif self.chr:find('[01]') then yield('NUM', self:read_binary()) + elseif self.chr == '(' then + self:lex_expression(yield) else self:error('unknown % syntax') end diff --git a/lips/Muncher.lua b/lips/Muncher.lua index a9794ed..984eeef 100644 --- a/lips/Muncher.lua +++ b/lips/Muncher.lua @@ -115,7 +115,13 @@ function Muncher:deref() end function Muncher:const(relative, no_label) - if self.tt ~= 'NUM' and self.tt ~= 'VARSYM' and self.tt ~= 'LABELSYM' then + local good = { + NUM = true, + EXPR = true, + VARSYM = true, + LABELSYM = true, + } + if not good[self.tt] then self:error('expected constant', self.tt) end if no_label and self.tt == 'LABELSYM' then diff --git a/lips/Preproc.lua b/lips/Preproc.lua index c38bf7f..3f5dd96 100644 --- a/lips/Preproc.lua +++ b/lips/Preproc.lua @@ -5,6 +5,7 @@ local data = require(path.."data") local overrides = require(path.."overrides") local Statement = require(path.."Statement") local Reader = require(path.."Reader") +local Expression = require(path.."Expression") local abs = math.abs @@ -172,6 +173,24 @@ function Preproc:process(statements) end end + -- third pass: evaluate constant expressions + for i=1, #new_statements do + local s = new_statements[i] + self.fn = s.fn + self.line = s.line + for j, t in ipairs(s) do + if t.tt == 'EXPR' then + local expr = Expression() + local result, err = expr:eval(t.tok) + if err then + self:error('failed to evaulate ('..t.tok..')', err) + end + t.tt = 'NUM' + t.tok = result + end + end + end + return new_statements end @@ -214,7 +233,7 @@ function Preproc:pop(kind) end function Preproc:expand(statements) - -- third pass: expand pseudo-instructions and register arguments + -- fourth pass: expand pseudo-instructions and register arguments self.statements = {} for i=1, #statements do local s = statements[i]