diff --git a/compressor.c b/compressor.c new file mode 100644 index 0000000..a03fa00 --- /dev/null +++ b/compressor.c @@ -0,0 +1,193 @@ +#include +#include +#include +//#include + +typedef unsigned char u8; +typedef unsigned int u32; + +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) +#define MAX(a, b) ((a) >= (b) ? (a) : (b)) +#define PMOD(a, b) (((a) + (b)) % (b)) + +#define BUF_LEN 1024 +#define MIN_LEN 3 +#define MAX_LEN 66 +#define BUF_START 0x3BE + +char *program_name; + +long compress(const u8 *bufi, long size, u8 *bufo) { + u8 buf[BUF_LEN] = {0}; + int buf_i = BUF_START; + + int shift = 0; + int shifted = 0; + int last_shift_i = -1; + long written = 0; + + int i = 0; + while (i < size) { + const u8 *sub = bufi + i; + int sub_len = MIN(MAX_LEN, size - i); + int best_i = -1; + int best_len = -1; + + if (sub_len >= MIN_LEN) { + for (int j = 0; j < BUF_LEN; j++) { + int match_i = PMOD(buf_i - j, BUF_LEN); + int match_len = 0; + for (;;) { + int buf_off = (match_i + match_len) % BUF_LEN; + u8 b = buf[buf_off]; + if (b != sub[match_len]) { break; } + // TODO: handle pseudo-writes to buffer. + if (buf_off == buf_i) { break; } + match_len++; + if (match_len == MAX_LEN) { break; } + if (match_len == sub_len) { break; } + } + + if (match_len < MIN_LEN) { continue; } + if (match_len > best_len) { + best_i = match_i; + best_len = match_len; + } + } + } + + if (last_shift_i < 0) { + last_shift_i = written; + bufo[written++] = 0; + shift = 0; + shifted = 0; + } + + if (best_i < 0 || best_len < 0) { + shift >>= 1; + shift |= 0x80; + shifted++; + + bufo[written++] = sub[0]; + buf[buf_i] = sub[0]; + buf_i = (buf_i + 1) % BUF_LEN; + i++; + } else { + shift >>= 1; + shifted++; + + u8 a = best_i & 0xFF; + u8 b = ((best_i & 0x300) >> 2) | (best_len - 3); + bufo[written++] = a; + bufo[written++] = b; + + for (int j = 0; j < best_len; j++) { + buf[buf_i] = sub[j]; + buf_i = (buf_i + 1) % BUF_LEN; + } + i += best_len; + } + + if (shifted >= 8) { + //assert(last_shift_i != -1); + bufo[last_shift_i] = shift; + shift = 0; + shifted = 0; + last_shift_i = -1; + } + } + + if (last_shift_i >= 0) { + bufo[last_shift_i] = shift >> (8 - shifted); + } + + //assert(i == size); + + return written; +} + +int compress_file(const char *fp) { + // TODO: better error handling. + // functions here that can fail: + // fclose + // fopen + // fseek + // ftell + // fwrite + // malloc + // calloc + // free? + + FILE *f = fopen(fp, "rb"); + if (f == NULL) { + perror(program_name); + return 1; + } + + fseek(f, 0, SEEK_END); + long size = ftell(f); + fseek(f, 0, SEEK_SET); + + if (size == 0) { + return 0; + } + + u8 *bufi = (u8 *)malloc(size); + if (bufi == NULL) { + fprintf(stderr, "failed to malloc %li bytes\n", size); + free(bufi); + return 2; + } + fread(bufi, 1, size, f); + fclose(f); + + // allocate enough for the worst case scenario. + size_t bufo_size = size * 9 / 8 + 8; + u8 *bufo = (u8 *)malloc(bufo_size); + + long new_size = compress(bufi, size, bufo); + free(bufi); + //assert(new_size > 0 && (size_t)new_size < bufo_size); + + f = fopen(fp, "wb"); + if (f == NULL) { + perror(program_name); + free(bufo); + return 3; + } + + char fs_buf[4] = {0}; + fs_buf[0] = (size >> 24) & 0xFF; + fs_buf[1] = (size >> 16) & 0xFF; + fs_buf[2] = (size >> 8) & 0xFF; + fs_buf[3] = size & 0xFF; + fwrite(fs_buf, 1, 4, f); + + fwrite(bufo, 1, new_size, f); + fclose(f); + free(bufo); + + return 0; +} + +int main(int argc, char *argv[]) { + if (argc <= 0 || argv == NULL || argv[0] == NULL) { + fprintf(stderr, "You've met with a terrible fate.\n"); + exit(1); + } + program_name = argv[0]; + + if (argc == 1) { + fprintf(stderr, "usage: %s {file}\n", program_name); + exit(2); + } else if (argc == 2) { + const char *fp = argv[1]; + int ret = compress_file(fp); + if (ret != 0) { + exit(ret + 3); + } + } else { + fprintf(stderr, "too many arguments\n"); + exit(3); + } +} diff --git a/fs.py b/fs.py index fa002d9..9ba9314 100644 --- a/fs.py +++ b/fs.py @@ -39,6 +39,28 @@ def hexdump(data): butts = data[i * 16:i * 16 + 16] print(("{:06X} |" + " {:02X}" * len(butts)).format(i * 16, *butts)) +def compress_fast(data, mode="best"): + assert mode == "best", "only \"best\" mode is implemented for compress_fast" + from poopen import poopen + from platform import machine + + exe = "compressor" + exe += "64" if machine().endswith("64") else "32" + exe += (".exe" if os.name == "nt" else "") + exe = os.path.join(heresay, exe) + assert os.path.isfile(exe), "missing executable: " + exe + + tmp_fn = "compressing.tmp" + + with open(tmp_fn, "wb") as f: + f.write(data) + + poopen([exe, tmp_fn]) + + with open(tmp_fn, "rb") as f: + new_data = f.read() + return new_data + def compress(data, mode="greedy"): assert mode in "worst greedy best".split(), f"unknown mode: {mode}" @@ -70,7 +92,6 @@ def compress(data, mode="greedy"): if match_len == len(sub): break if match_len < min_len: - match_i, match_len = None, None continue if best_len is None or match_len > best_len: best_i = match_i @@ -264,7 +285,8 @@ def create_rom(d): if fi == 0 and di != 14 or di == 14 and fi in skip_14: new_data = data else: - new_data = compress(data, "best" if di == 14 else "greedy") + #new_data = compress(data, "best" if di == 14 else "greedy") + new_data = compress_fast(data) fmt = "compressed {:02}-{:03}.bin from {} bytes into {} ({:.2%})" percent = len(new_data) / len(data) if len(data) > 0 else 1 print(fmt.format(di, fi, len(data), len(new_data), percent)) @@ -273,7 +295,8 @@ def create_rom(d): offset += size files[fi] = new_data - #if fi != 0: break # DEBUG + if DEBUG and fi != 0: + break while f.tell() & 0xFFFF < 0x2008: f.write(W4(0xFFFFFFFF)) @@ -285,7 +308,8 @@ def create_rom(d): assert f.tell() - block_offset < block_size - #break # DEBUG + if DEBUG: + break def dump_files(f): # TODO: