port compressor to C for speed
This commit is contained in:
parent
dd61083334
commit
e489f7cb3f
2 changed files with 221 additions and 4 deletions
193
compressor.c
Normal file
193
compressor.c
Normal file
|
@ -0,0 +1,193 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
//#include <assert.h>
|
||||
|
||||
typedef unsigned char u8;
|
||||
typedef unsigned int u32;
|
||||
|
||||
#define MIN(a, b) ((a) <= (b) ? (a) : (b))
|
||||
#define MAX(a, b) ((a) >= (b) ? (a) : (b))
|
||||
#define PMOD(a, b) (((a) + (b)) % (b))
|
||||
|
||||
#define BUF_LEN 1024
|
||||
#define MIN_LEN 3
|
||||
#define MAX_LEN 66
|
||||
#define BUF_START 0x3BE
|
||||
|
||||
char *program_name;
|
||||
|
||||
long compress(const u8 *bufi, long size, u8 *bufo) {
|
||||
u8 buf[BUF_LEN] = {0};
|
||||
int buf_i = BUF_START;
|
||||
|
||||
int shift = 0;
|
||||
int shifted = 0;
|
||||
int last_shift_i = -1;
|
||||
long written = 0;
|
||||
|
||||
int i = 0;
|
||||
while (i < size) {
|
||||
const u8 *sub = bufi + i;
|
||||
int sub_len = MIN(MAX_LEN, size - i);
|
||||
int best_i = -1;
|
||||
int best_len = -1;
|
||||
|
||||
if (sub_len >= MIN_LEN) {
|
||||
for (int j = 0; j < BUF_LEN; j++) {
|
||||
int match_i = PMOD(buf_i - j, BUF_LEN);
|
||||
int match_len = 0;
|
||||
for (;;) {
|
||||
int buf_off = (match_i + match_len) % BUF_LEN;
|
||||
u8 b = buf[buf_off];
|
||||
if (b != sub[match_len]) { break; }
|
||||
// TODO: handle pseudo-writes to buffer.
|
||||
if (buf_off == buf_i) { break; }
|
||||
match_len++;
|
||||
if (match_len == MAX_LEN) { break; }
|
||||
if (match_len == sub_len) { break; }
|
||||
}
|
||||
|
||||
if (match_len < MIN_LEN) { continue; }
|
||||
if (match_len > best_len) {
|
||||
best_i = match_i;
|
||||
best_len = match_len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (last_shift_i < 0) {
|
||||
last_shift_i = written;
|
||||
bufo[written++] = 0;
|
||||
shift = 0;
|
||||
shifted = 0;
|
||||
}
|
||||
|
||||
if (best_i < 0 || best_len < 0) {
|
||||
shift >>= 1;
|
||||
shift |= 0x80;
|
||||
shifted++;
|
||||
|
||||
bufo[written++] = sub[0];
|
||||
buf[buf_i] = sub[0];
|
||||
buf_i = (buf_i + 1) % BUF_LEN;
|
||||
i++;
|
||||
} else {
|
||||
shift >>= 1;
|
||||
shifted++;
|
||||
|
||||
u8 a = best_i & 0xFF;
|
||||
u8 b = ((best_i & 0x300) >> 2) | (best_len - 3);
|
||||
bufo[written++] = a;
|
||||
bufo[written++] = b;
|
||||
|
||||
for (int j = 0; j < best_len; j++) {
|
||||
buf[buf_i] = sub[j];
|
||||
buf_i = (buf_i + 1) % BUF_LEN;
|
||||
}
|
||||
i += best_len;
|
||||
}
|
||||
|
||||
if (shifted >= 8) {
|
||||
//assert(last_shift_i != -1);
|
||||
bufo[last_shift_i] = shift;
|
||||
shift = 0;
|
||||
shifted = 0;
|
||||
last_shift_i = -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (last_shift_i >= 0) {
|
||||
bufo[last_shift_i] = shift >> (8 - shifted);
|
||||
}
|
||||
|
||||
//assert(i == size);
|
||||
|
||||
return written;
|
||||
}
|
||||
|
||||
int compress_file(const char *fp) {
|
||||
// TODO: better error handling.
|
||||
// functions here that can fail:
|
||||
// fclose
|
||||
// fopen
|
||||
// fseek
|
||||
// ftell
|
||||
// fwrite
|
||||
// malloc
|
||||
// calloc
|
||||
// free?
|
||||
|
||||
FILE *f = fopen(fp, "rb");
|
||||
if (f == NULL) {
|
||||
perror(program_name);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
long size = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
if (size == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
u8 *bufi = (u8 *)malloc(size);
|
||||
if (bufi == NULL) {
|
||||
fprintf(stderr, "failed to malloc %li bytes\n", size);
|
||||
free(bufi);
|
||||
return 2;
|
||||
}
|
||||
fread(bufi, 1, size, f);
|
||||
fclose(f);
|
||||
|
||||
// allocate enough for the worst case scenario.
|
||||
size_t bufo_size = size * 9 / 8 + 8;
|
||||
u8 *bufo = (u8 *)malloc(bufo_size);
|
||||
|
||||
long new_size = compress(bufi, size, bufo);
|
||||
free(bufi);
|
||||
//assert(new_size > 0 && (size_t)new_size < bufo_size);
|
||||
|
||||
f = fopen(fp, "wb");
|
||||
if (f == NULL) {
|
||||
perror(program_name);
|
||||
free(bufo);
|
||||
return 3;
|
||||
}
|
||||
|
||||
char fs_buf[4] = {0};
|
||||
fs_buf[0] = (size >> 24) & 0xFF;
|
||||
fs_buf[1] = (size >> 16) & 0xFF;
|
||||
fs_buf[2] = (size >> 8) & 0xFF;
|
||||
fs_buf[3] = size & 0xFF;
|
||||
fwrite(fs_buf, 1, 4, f);
|
||||
|
||||
fwrite(bufo, 1, new_size, f);
|
||||
fclose(f);
|
||||
free(bufo);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc <= 0 || argv == NULL || argv[0] == NULL) {
|
||||
fprintf(stderr, "You've met with a terrible fate.\n");
|
||||
exit(1);
|
||||
}
|
||||
program_name = argv[0];
|
||||
|
||||
if (argc == 1) {
|
||||
fprintf(stderr, "usage: %s {file}\n", program_name);
|
||||
exit(2);
|
||||
} else if (argc == 2) {
|
||||
const char *fp = argv[1];
|
||||
int ret = compress_file(fp);
|
||||
if (ret != 0) {
|
||||
exit(ret + 3);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "too many arguments\n");
|
||||
exit(3);
|
||||
}
|
||||
}
|
32
fs.py
32
fs.py
|
@ -39,6 +39,28 @@ def hexdump(data):
|
|||
butts = data[i * 16:i * 16 + 16]
|
||||
print(("{:06X} |" + " {:02X}" * len(butts)).format(i * 16, *butts))
|
||||
|
||||
def compress_fast(data, mode="best"):
|
||||
assert mode == "best", "only \"best\" mode is implemented for compress_fast"
|
||||
from poopen import poopen
|
||||
from platform import machine
|
||||
|
||||
exe = "compressor"
|
||||
exe += "64" if machine().endswith("64") else "32"
|
||||
exe += (".exe" if os.name == "nt" else "")
|
||||
exe = os.path.join(heresay, exe)
|
||||
assert os.path.isfile(exe), "missing executable: " + exe
|
||||
|
||||
tmp_fn = "compressing.tmp"
|
||||
|
||||
with open(tmp_fn, "wb") as f:
|
||||
f.write(data)
|
||||
|
||||
poopen([exe, tmp_fn])
|
||||
|
||||
with open(tmp_fn, "rb") as f:
|
||||
new_data = f.read()
|
||||
return new_data
|
||||
|
||||
def compress(data, mode="greedy"):
|
||||
assert mode in "worst greedy best".split(), f"unknown mode: {mode}"
|
||||
|
||||
|
@ -70,7 +92,6 @@ def compress(data, mode="greedy"):
|
|||
if match_len == len(sub):
|
||||
break
|
||||
if match_len < min_len:
|
||||
match_i, match_len = None, None
|
||||
continue
|
||||
if best_len is None or match_len > best_len:
|
||||
best_i = match_i
|
||||
|
@ -264,7 +285,8 @@ def create_rom(d):
|
|||
if fi == 0 and di != 14 or di == 14 and fi in skip_14:
|
||||
new_data = data
|
||||
else:
|
||||
new_data = compress(data, "best" if di == 14 else "greedy")
|
||||
#new_data = compress(data, "best" if di == 14 else "greedy")
|
||||
new_data = compress_fast(data)
|
||||
fmt = "compressed {:02}-{:03}.bin from {} bytes into {} ({:.2%})"
|
||||
percent = len(new_data) / len(data) if len(data) > 0 else 1
|
||||
print(fmt.format(di, fi, len(data), len(new_data), percent))
|
||||
|
@ -273,7 +295,8 @@ def create_rom(d):
|
|||
offset += size
|
||||
files[fi] = new_data
|
||||
|
||||
#if fi != 0: break # DEBUG
|
||||
if DEBUG and fi != 0:
|
||||
break
|
||||
|
||||
while f.tell() & 0xFFFF < 0x2008:
|
||||
f.write(W4(0xFFFFFFFF))
|
||||
|
@ -285,7 +308,8 @@ def create_rom(d):
|
|||
|
||||
assert f.tell() - block_offset < block_size
|
||||
|
||||
#break # DEBUG
|
||||
if DEBUG:
|
||||
break
|
||||
|
||||
def dump_files(f):
|
||||
# TODO:
|
||||
|
|
Loading…
Add table
Reference in a new issue