From e1ae1fdd64be525f5efac383d693df118445669b Mon Sep 17 00:00:00 2001
From: Connor Olding <cloningdonor@gmail.com>
Date: Wed, 23 Nov 2016 12:12:37 -0800
Subject: [PATCH] optimize incbin via string passthru

---
 NOTES.md           |  2 +-
 lips/Collector.lua |  4 ++++
 lips/Dumper.lua    | 14 ++++++++++++++
 lips/Lexer.lua     |  9 ++-------
 4 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/NOTES.md b/NOTES.md
index 71f2cac..e2c8980 100644
--- a/NOTES.md
+++ b/NOTES.md
@@ -72,7 +72,7 @@ currently there is:
 .debug_asm (default false)
     is arguably the least useful of states to dump in.
     this will dump statements after being reduced to
-    !ORG and !DATA statements. anything else is a bug.
+    !ORG and !DATA and !BIN statements. anything else is a bug.
     the values of the !BYTE statements are not printed.
 ```
 
diff --git a/lips/Collector.lua b/lips/Collector.lua
index b258186..4e1f470 100644
--- a/lips/Collector.lua
+++ b/lips/Collector.lua
@@ -33,6 +33,7 @@ function Collector:push_data(datum, size)
     }
     --]]
 
+    -- FIXME: optimize the hell out of this garbage, preferably in the lexer
     -- TODO: consider not scrunching data statements, just their tokens
 
     if type(datum) == 'number' then
@@ -116,6 +117,9 @@ function Collector:directive()
                 add(name, size, self:const(nil, 'no label'))
             end
         end
+    elseif name == 'BIN' then
+        -- FIXME: not a real directive, just a workaround
+        add(name, self:string())
     elseif name == 'BYTE' or name == 'HALFWORD' or name == 'WORD' then
         self:push_data(self:const(), name)
         while not self:is_EOL() do
diff --git a/lips/Dumper.lua b/lips/Dumper.lua
index 195c9ba..7a69553 100644
--- a/lips/Dumper.lua
+++ b/lips/Dumper.lua
@@ -1,3 +1,4 @@
+local byte = string.byte
 local floor = math.floor
 local format = string.format
 local insert = table.insert
@@ -253,6 +254,10 @@ function Dumper:load(statements)
                 s.length = util.measure_data(s) -- cache for next pass
                 self.pos = self.pos + s.length
                 insert(new_statements, s)
+            elseif s.type == '!BIN' then
+                s.length = #s[1].tok
+                self.pos = self.pos + s.length
+                insert(new_statements, s)
             elseif s.type == '!ORG' then
                 self.pos = s[1].tok
                 insert(new_statements, s)
@@ -378,6 +383,9 @@ function Dumper:load(statements)
             end
             self.pos = self.pos + (s.length or util.measure_data(s))
             insert(new_statements, s)
+        elseif s.type == '!BIN' then
+            self.pos = self.pos + s.length
+            insert(new_statements, s)
         elseif s.type == '!ORG' then
             self.pos = s[1].tok
             insert(new_statements, s)
@@ -423,6 +431,12 @@ function Dumper:dump()
                     error('Internal Error: unknown !DATA token')
                 end
             end
+        elseif s.type == '!BIN' then
+            local data = s[1].tok
+            for i=1, #data do
+                self.writer(self.pos, byte(data, i))
+                self.pos = self.pos + 1
+            end
         elseif s.type == '!ORG' then
             self.pos = s[1].tok
         else
diff --git a/lips/Lexer.lua b/lips/Lexer.lua
index fe239e1..f479fbb 100644
--- a/lips/Lexer.lua
+++ b/lips/Lexer.lua
@@ -320,13 +320,8 @@ function Lexer:lex_include_binary(_yield)
         fn = self.options.path..fn
     end
     local data = util.readfile(fn, true)
-
-    -- FIXME: this allocates a table for each byte.
-    --        this could easily cause performance issues on big files.
-    _yield('DIR', 'BYTE', fn, 0)
-    for b in string.gfind(data, '.') do
-        _yield('NUM', string.byte(b), fn, 0)
-    end
+    _yield('DIR', 'BIN', fn, 0)
+    _yield('STRING', data, fn, 0)
 end
 
 function Lexer:lex_expression(yield)