add 6502_name_codec
This commit is contained in:
parent
760e60f814
commit
5d16339d59
9 changed files with 1564 additions and 0 deletions
7
6502_name_codec/README.md
Normal file
7
6502_name_codec/README.md
Normal file
|
@ -0,0 +1,7 @@
|
|||
squeeze simple north-american names into fewer bytes.
|
||||
|
||||
i made this with NES/SNES games in mind i.e. for player-given character names in JRPGs.
|
||||
you will need [m6502.h](https://github.com/floooh/chips/blob/c011ef1/chips/m6502.h)
|
||||
to compile the test program. as of writing, v1 is the fastest, but v6 is the smallest.
|
||||
|
||||
someday, i want to extend this to use a non-constant LUT. (hidden markov models?)
|
184
6502_name_codec/decode-idk.c
Normal file
184
6502_name_codec/decode-idk.c
Normal file
|
@ -0,0 +1,184 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define CHIPS_IMPL
|
||||
#include "m6502.h"
|
||||
|
||||
#define lament(...) fprintf(stderr, __VA_ARGS__)
|
||||
#define error_when(cond, ...) do { \
|
||||
if ((cond) || errno) { \
|
||||
lament(__VA_ARGS__); \
|
||||
lament(": %s\n", strerror(errno)); \
|
||||
goto error; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// setup 64 kBytes of memory
|
||||
#define MEMSIZE 65536
|
||||
static uint8_t mem[MEMSIZE] = {0};
|
||||
|
||||
// NOTE: renamed KIL to JAM for consistency.
|
||||
static const char instrnames[] =
|
||||
"BRK\0ORA\0JAM\0SLO\0NOP\0ORA\0ASL\0SLO\0PHP\0ORA\0ASL\0ANC\0NOP\0ORA\0ASL\0SLO\0"
|
||||
"BPL\0ORA\0JAM\0SLO\0NOP\0ORA\0ASL\0SLO\0CLC\0ORA\0NOP\0SLO\0NOP\0ORA\0ASL\0SLO\0"
|
||||
"JSR\0AND\0JAM\0RLA\0BIT\0AND\0ROL\0RLA\0PLP\0AND\0ROL\0ANC\0BIT\0AND\0ROL\0RLA\0"
|
||||
"BMI\0AND\0JAM\0RLA\0NOP\0AND\0ROL\0RLA\0SEC\0AND\0NOP\0RLA\0NOP\0AND\0ROL\0RLA\0"
|
||||
"RTI\0EOR\0JAM\0SRE\0NOP\0EOR\0LSR\0SRE\0PHA\0EOR\0LSR\0ALR\0JMP\0EOR\0LSR\0SRE\0"
|
||||
"BVC\0EOR\0JAM\0SRE\0NOP\0EOR\0LSR\0SRE\0CLI\0EOR\0NOP\0SRE\0NOP\0EOR\0LSR\0SRE\0"
|
||||
"RTS\0ADC\0JAM\0RRA\0NOP\0ADC\0ROR\0RRA\0PLA\0ADC\0ROR\0ARR\0JMP\0ADC\0ROR\0RRA\0"
|
||||
"BVS\0ADC\0JAM\0RRA\0NOP\0ADC\0ROR\0RRA\0SEI\0ADC\0NOP\0RRA\0NOP\0ADC\0ROR\0RRA\0"
|
||||
"NOP\0STA\0NOP\0SAX\0STY\0STA\0STX\0SAX\0DEY\0NOP\0TXA\0XAA\0STY\0STA\0STX\0SAX\0"
|
||||
"BCC\0STA\0JAM\0AHX\0STY\0STA\0STX\0SAX\0TYA\0STA\0TXS\0TAS\0SHY\0STA\0SHX\0AHX\0"
|
||||
"LDY\0LDA\0LDX\0LAX\0LDY\0LDA\0LDX\0LAX\0TAY\0LDA\0TAX\0LAX\0LDY\0LDA\0LDX\0LAX\0"
|
||||
"BCS\0LDA\0JAM\0LAX\0LDY\0LDA\0LDX\0LAX\0CLV\0LDA\0TSX\0LAS\0LDY\0LDA\0LDX\0LAX\0"
|
||||
"CPY\0CMP\0NOP\0DCP\0CPY\0CMP\0DEC\0DCP\0INY\0CMP\0DEX\0AXS\0CPY\0CMP\0DEC\0DCP\0"
|
||||
"BNE\0CMP\0JAM\0DCP\0NOP\0CMP\0DEC\0DCP\0CLD\0CMP\0NOP\0DCP\0NOP\0CMP\0DEC\0DCP\0"
|
||||
"CPX\0SBC\0NOP\0ISC\0CPX\0SBC\0INC\0ISC\0INX\0SBC\0NOP\0SBC\0CPX\0SBC\0INC\0ISC\0"
|
||||
"BEQ\0SBC\0JAM\0ISC\0NOP\0SBC\0INC\0ISC\0SED\0SBC\0NOP\0ISC\0NOP\0SBC\0INC\0ISC";
|
||||
|
||||
static const char documented[] = {
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
|
||||
1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
|
||||
0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
|
||||
1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0,
|
||||
1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
|
||||
1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0
|
||||
};
|
||||
|
||||
static int loadmem(const char *fp) {
|
||||
FILE *f = NULL;
|
||||
long size = MEMSIZE;
|
||||
|
||||
errno = 0;
|
||||
f = fopen(fp, "rb");
|
||||
error_when(f == NULL, "Error opening file: %s", fp);
|
||||
error_when(fread(mem, 1, size, f) != (size_t)size, "Error reading %li bytes from file: %s", size, fp);
|
||||
error_when(fclose(f) != 0, "Error closing file: %s", fp);
|
||||
return 0;
|
||||
|
||||
error:
|
||||
//return 65; // EX_DATAERR
|
||||
return 66; // EX_NOINPUT
|
||||
}
|
||||
|
||||
static void memdebug(const m6502_t cpu, long instrs) {
|
||||
uint64_t pins = cpu.PINS;
|
||||
uint16_t pc = cpu.PC;
|
||||
uint8_t instr = cpu.IR >> 3;
|
||||
int ic = cpu.IR & 7;
|
||||
uint16_t addr = M6502_GET_ADDR(pins);
|
||||
const char *mode = (pins & M6502_RW) ? "READ " : "WRITE ";
|
||||
uint8_t value = (pins & M6502_RW) ? mem[addr] : M6502_GET_DATA(pins);
|
||||
const char *instrname = instrnames + instr * 4;
|
||||
const char *ok = documented[instr] ? "..." : "!!!";
|
||||
lament("[%4li.%i:$%04X:$%02X (%s %s)] %s mem[0x%04X]=0x%02X;\n",
|
||||
instrs, ic, pc, instr, instrname, ok, mode, addr, value);
|
||||
}
|
||||
|
||||
static void xxd(const uint8_t *start, int length) {
|
||||
while (length >= 16) {
|
||||
printf("%08x: ", (unsigned int)(start - mem));
|
||||
for (int i = 0; i < 16; i += 2) {
|
||||
printf("%02x%02x ", start[i], start[i + 1]);
|
||||
}
|
||||
printf(" ");
|
||||
for (int i = 0; i < 16; i++) {
|
||||
uint8_t value = start[i];
|
||||
if (value < 0x20 || value >= 0x7F) {
|
||||
value = '.';
|
||||
}
|
||||
printf("%c", value);
|
||||
}
|
||||
printf("\n");
|
||||
start += 16; // FIXME: can technically invoke undefined behavior on boundary.
|
||||
length -= 16;
|
||||
}
|
||||
// TODO: handle the remainder.
|
||||
//for (int i = 0; i < length; i += 2) {
|
||||
//}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
char *name = NULL;
|
||||
long cycle = 0;
|
||||
long oldcycle = 0;
|
||||
long instrs = 0;
|
||||
long instr_limit = 1000;
|
||||
uint64_t pins;
|
||||
m6502_t cpu;
|
||||
m6502_desc_t desc = {0};
|
||||
|
||||
if (argc <= 0 || argv == NULL || argv[0] == NULL) {
|
||||
lament("You've met with a terrible fate.\n");
|
||||
return 64; // EX_USAGE
|
||||
}
|
||||
|
||||
name = argv[0];
|
||||
if (argc != 2 && argc != 3) {
|
||||
lament("usage: %s {ram.bin} [instructions]\n", name);
|
||||
return 64; // EX_USAGE
|
||||
}
|
||||
|
||||
if (argc == 3) {
|
||||
instr_limit = strtol(argv[2], NULL, 0); // can be negative, i guess.
|
||||
if (errno) {
|
||||
lament("%s: failed to parse integer: %s\n", name, argv[2]);
|
||||
return 64; // EX_USAGE
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int res = 0;
|
||||
if ((res = loadmem(argv[1]))) return res;
|
||||
}
|
||||
|
||||
// initialize the CPU
|
||||
desc.bcd_disabled = true; // TODO: do this from instructions?
|
||||
pins = m6502_init(&cpu, &desc);
|
||||
for (;; cycle++) {
|
||||
// run the CPU emulation for one tick
|
||||
pins = m6502_tick(&cpu, pins);
|
||||
// extract 16-bit address from pin mask
|
||||
const uint16_t addr = M6502_GET_ADDR(pins);
|
||||
// perform memory access
|
||||
if (pins & M6502_RW) {
|
||||
// a memory read
|
||||
uint8_t value = mem[addr];
|
||||
memdebug(cpu, instrs);
|
||||
M6502_SET_DATA(pins, value);
|
||||
} else {
|
||||
// a memory write
|
||||
uint8_t value = M6502_GET_DATA(pins);
|
||||
memdebug(cpu, instrs);
|
||||
mem[addr] = value;
|
||||
}
|
||||
if (cycle >= oldcycle + 8) {
|
||||
lament("CPU is locked up!\n");
|
||||
break;
|
||||
}
|
||||
if (pins & M6502_SYNC) {
|
||||
instrs++;
|
||||
oldcycle = cycle;
|
||||
if (instrs >= instr_limit) break;
|
||||
}
|
||||
}
|
||||
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
printf("cpu.PC=0x%04X, cpu.A=0x%02X, cpu.X=0x%02X, cpu.Y=0x%02X, cpu.S=0x%02X, cpu.P=0x%02X;\n",
|
||||
cpu.PC, cpu.A, cpu.X, cpu.Y, cpu.S, cpu.P);
|
||||
lament("exiting after %li instructions and %li cycles.\n", instrs, cycle + 1);
|
||||
xxd(mem, 0x100);
|
||||
return 0;
|
||||
}
|
145
6502_name_codec/decode.asm
Normal file
145
6502_name_codec/decode.asm
Normal file
|
@ -0,0 +1,145 @@
|
|||
arch nes.cpu
|
||||
|
||||
output "decode.bin", create
|
||||
fill 65536
|
||||
origin 0
|
||||
|
||||
macro align(size) { // Align Byte Amount
|
||||
while (pc() % {size}) {
|
||||
db 0
|
||||
}
|
||||
}
|
||||
|
||||
macro nops(new_pc) {
|
||||
if (pc() > {new_pc}) {
|
||||
error "PC is already past the point specified"
|
||||
}
|
||||
while (pc() < {new_pc}) {
|
||||
print "adding a byte of padding at "
|
||||
print pc()
|
||||
print "\n"
|
||||
nop
|
||||
}
|
||||
}
|
||||
|
||||
define version(6)
|
||||
|
||||
fill 8, $02 // jams
|
||||
|
||||
start:
|
||||
cld // clear BCD flag
|
||||
clc // clear carry
|
||||
clv // clear overflow
|
||||
// TODO: what i actually should be doing is an RTI
|
||||
pla
|
||||
pla
|
||||
tax
|
||||
tay
|
||||
nop //php // push processor status just to advance stack a bit
|
||||
|
||||
macro when_to_stop() {
|
||||
if 1 {
|
||||
lda $02
|
||||
cmp #(name1 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name2 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name3 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name4 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name5 & 0xFF)
|
||||
beq decode_exit
|
||||
} else {
|
||||
// FIXME: doesn't work because output has already advanced to a null byte.
|
||||
ldy #0
|
||||
lda #$20 // ascii space
|
||||
eor ($00),y // load from output
|
||||
beq decode_begin_next
|
||||
lda $02
|
||||
cmp #(name5 & 0xFF)
|
||||
beq decode_exit
|
||||
}
|
||||
}
|
||||
|
||||
// decode subroutine arguments:
|
||||
lda #(names_out)
|
||||
sta $00
|
||||
lda #(names_out >> 8)
|
||||
sta $01
|
||||
lda #(names)
|
||||
sta $02
|
||||
lda #(names >> 8)
|
||||
sta $03
|
||||
jsr decode
|
||||
db $D2 // jam (D for Done, i guess)
|
||||
|
||||
align(16)
|
||||
// reorder() { python3 -c 's=__import__("sys").argv[1];print("".join(s[int(bin(len(s)|i)[-1:2:-1],2)] for i in range(len(s))))' "$@"; }
|
||||
decode_lut0xxx:
|
||||
db "ETAOINSH"
|
||||
//db "EIASTNOH"
|
||||
decode_lut10xx:
|
||||
db "RDLU"
|
||||
//db "RDLU"
|
||||
if {version} >= 3 && {version} <= 6 {
|
||||
db "????"
|
||||
}
|
||||
decode_lut11xxxx:
|
||||
db "CMFPGWYBVKXJQZ. "
|
||||
//db "CVGQFXY.MKWZPJB "
|
||||
|
||||
//origin 0x100 - 6 * 5 * 2
|
||||
align(16)
|
||||
names_out:
|
||||
fill 6 * 5 * 2
|
||||
|
||||
db $FF
|
||||
|
||||
origin 0x200
|
||||
names:
|
||||
name0:
|
||||
// FIXME: names are truncated unless they end in a padding byte!
|
||||
db $0A,$4F,$4B,$70,$17,$FF // Elizabeth
|
||||
name1:
|
||||
db $C4,$A3,$6F,$FF,$FF,$FF // Mary 110001 00.10 1000 11.0110
|
||||
name2:
|
||||
db $CC,$86,$13,$04,$2F,$FF // Patricia
|
||||
name3:
|
||||
db $EC,$B1,$06,$FF,$FF,$FF // James 111011 00.10 110001. 0000 0110.
|
||||
name4:
|
||||
db $83,$DC,$20,$7F,$FF,$FF // Robert
|
||||
name5:
|
||||
|
||||
origin 0x300
|
||||
if {version} == 1 { ; include "decode_v1.asm"
|
||||
} else if {version} == 2 { ;include "decode_v2.asm"
|
||||
} else if {version} == 3 { ;include "decode_v3.asm"
|
||||
} else if {version} == 4 { ;include "decode_v4.asm"
|
||||
} else if {version} == 5 { ;include "decode_v5.asm"
|
||||
} else if {version} == 6 { ;include "decode_v6.asm"
|
||||
}
|
||||
|
||||
align(16)
|
||||
db "DONE: "
|
||||
dw done
|
||||
|
||||
origin 0xFFFC
|
||||
db start
|
||||
db start >> 8
|
||||
|
||||
//macro revbit(variable x) {
|
||||
// evaluate lo(((x&(1<<7))>>7)|((x&(1<<6))>>5)|((x&(1<<5))>>3)|((x&(1<<4))>>1))
|
||||
// evaluate hi(((x&(1<<3))<<1)|((x&(1<<2))<<3)|((x&(1<<1))<<5)|((x&(1<<0))<<7))
|
||||
// db {lo}|{hi}
|
||||
//}
|
||||
//macro makename(variable a, variable b, variable c, variable d, variable e) {
|
||||
// revbit(a)
|
||||
// revbit(b)
|
||||
// revbit(c)
|
||||
// revbit(d)
|
||||
// revbit(e)
|
||||
//}
|
||||
//makename($01,$23,$45,$67,$89)
|
||||
|
||||
// vim:ft=snes_bass
|
240
6502_name_codec/decode_v1.asm
Normal file
240
6502_name_codec/decode_v1.asm
Normal file
|
@ -0,0 +1,240 @@
|
|||
// included by decode.asm
|
||||
// cycles on extra-padded "Elizabeth Mary Patricia James Robert":
|
||||
// 3485-55=3430 (includes jsr and rts)
|
||||
// program size: 0xFB
|
||||
// instructions: 1248-17=1231 (includes jsr and rts)
|
||||
|
||||
decode:
|
||||
// NOTE: output/input pointers cannot cross page boundaries.
|
||||
// that means the effective longest lengths of output/input are 256/192 bytes.
|
||||
|
||||
decode00: // decode from offset 0, unknown code length (READS A BYTE)
|
||||
ldy #$00
|
||||
lda ($02),y // load from input
|
||||
|
||||
tax // stash for after branch
|
||||
eor #$C0 // TODO: just use a cmp instruction?
|
||||
and #$C0
|
||||
beq decode06 // branch when mask is fully set
|
||||
// fallthru decode04
|
||||
|
||||
decode04: // decode from offset 0, 4-bit code (then 4 under)
|
||||
txa
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode40 // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
decode06: // decode from offset 0, 6-bit code (then 2 under)
|
||||
txa
|
||||
lsr
|
||||
lsr
|
||||
and #$0F
|
||||
tay
|
||||
lda decode_lut11xxxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode60 // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
decode20: // decode from offset 2, unknown code length
|
||||
txa
|
||||
eor #$30
|
||||
and #$30
|
||||
beq decode26 // branch when mask is fully set
|
||||
|
||||
decode24: // decode from offset 2, 4-bit code (then 2 under)
|
||||
txa
|
||||
lsr
|
||||
lsr
|
||||
and #$0F
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode60 // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
decode26: // decode from offset 2, 6-bit code (then aligned)
|
||||
txa
|
||||
and #$0F
|
||||
tay
|
||||
lda decode_lut11xxxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
//inc $02 // advance input
|
||||
//beq die // never branch (unless page boundary)
|
||||
jsr decode_advance_input
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode00 // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
decode40: // decode from offset 4, unknown code length
|
||||
//inc $02 // advance input
|
||||
//beq die // never branch (unless page boundary)
|
||||
jsr decode_advance_input
|
||||
txa
|
||||
eor #$0C // TODO: just use a cmp instruction?
|
||||
and #$0C
|
||||
beq decode46 // branch when mask is fully set
|
||||
// fallthru decode04
|
||||
|
||||
decode44: // decode from offset 4, 4-bit code (then aligned)
|
||||
txa
|
||||
and #$0F
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode00 // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
decode46: // decode from offset 4, 6-bit code (then 2 over) (READS A BYTE)
|
||||
txa
|
||||
and #$03
|
||||
asl
|
||||
asl
|
||||
sta $04
|
||||
ldy #$00
|
||||
lda ($02),y // load from input
|
||||
tax
|
||||
rol
|
||||
rol
|
||||
rol
|
||||
and #$03
|
||||
ora $04
|
||||
tay
|
||||
lda decode_lut11xxxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode20 // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
decode60: // decode from offset 6, unknown code length (READS A BYTE)
|
||||
//inc $02 // advance input
|
||||
//beq die // never branch (unless page boundary)
|
||||
jsr decode_advance_input
|
||||
ldy #$00
|
||||
lda ($02),y // load from input
|
||||
tay
|
||||
txa
|
||||
eor #$03
|
||||
and #$03
|
||||
beq decode66 // branch when mask is fully set
|
||||
// fallthru decode64
|
||||
|
||||
decode64: // decode from offset 6, 4-bit code (then 2 over)
|
||||
txa
|
||||
and #$03
|
||||
asl
|
||||
asl
|
||||
sta $04
|
||||
tya // load in (restore) the new input
|
||||
tax // and put it in X like the rest of the code expects
|
||||
rol
|
||||
rol
|
||||
rol
|
||||
and #$03
|
||||
ora $04
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode20_shim // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
die:
|
||||
db $F2 // um lammer jammy
|
||||
|
||||
decode20_shim:
|
||||
jmp decode20
|
||||
|
||||
decode66: // decode from offset 6, 6-bit code (then 4 over)
|
||||
tya // load in (restore) the new input
|
||||
tax // and put it in X like the rest of the code expects
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
tay
|
||||
lda decode_lut11xxxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
|
||||
inc $00 // advance output
|
||||
//jsr decode_advance_output
|
||||
bne decode40 // always branch (unless page boundary)
|
||||
db $02 // jam
|
||||
|
||||
decode_advance_input:
|
||||
// do not modify X or Y here
|
||||
inc $02
|
||||
beq die // never branch (unless page boundary)
|
||||
when_to_stop()
|
||||
rts
|
||||
|
||||
if 0 {
|
||||
decode_advance_output:
|
||||
// do not modify X or Y here
|
||||
lda $02
|
||||
cmp #(name1 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name2 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name3 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name4 & 0xFF)
|
||||
beq decode_begin_next
|
||||
cmp #(name5 & 0xFF)
|
||||
beq decode_exit
|
||||
inc $00 // advance output
|
||||
//beq die // never branch (unless page boundary)
|
||||
rts
|
||||
}
|
||||
|
||||
decode_begin_next:
|
||||
//inc $00 // advance output
|
||||
//beq die // never branch (unless page boundary)
|
||||
pla
|
||||
pla
|
||||
jmp decode00
|
||||
|
||||
decode_exit:
|
||||
pla
|
||||
pla
|
||||
rts
|
||||
//db $D2 // jam (D is for Done, i guess)
|
||||
|
||||
// //
|
||||
done:
|
||||
db $02 // jam
|
||||
// //
|
||||
|
||||
// vim:ft=snes_bass
|
176
6502_name_codec/decode_v2.asm
Normal file
176
6502_name_codec/decode_v2.asm
Normal file
|
@ -0,0 +1,176 @@
|
|||
// included by decode.asm
|
||||
// cycles on extra-padded "Elizabeth Mary Patricia James Robert":
|
||||
// 4035-55=3980 (includes jsr and rts)
|
||||
// program size: 0xCF
|
||||
// instructions: 1358-17=1341 (includes jsr and rts)
|
||||
|
||||
decode_advance_input:
|
||||
// do not modify X or Y here
|
||||
inc $02
|
||||
beq die // never branch (unless page boundary)
|
||||
when_to_stop()
|
||||
rts
|
||||
|
||||
decode_exit:
|
||||
pla
|
||||
pla
|
||||
rts
|
||||
//db $D2 // jam (D is for Done, i guess)
|
||||
|
||||
decode_common:
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
inc $00 // advance output
|
||||
beq die // never branch (unless page boundary)
|
||||
rts
|
||||
|
||||
decode_uncommon:
|
||||
tay
|
||||
lda decode_lut11xxxx,y
|
||||
ldy #$00
|
||||
sta ($00),y // write to output
|
||||
inc $00 // advance output
|
||||
beq die // never branch (unless page boundary)
|
||||
rts
|
||||
|
||||
die:
|
||||
db $F2 // um lammer jammy
|
||||
|
||||
decode_begin_next:
|
||||
//inc $00 // advance output
|
||||
//beq die // never branch (unless page boundary)
|
||||
pla
|
||||
pla
|
||||
|
||||
decode:
|
||||
// NOTE: output/input pointers cannot cross page boundaries.
|
||||
// that means the effective longest lengths of output/input are 256/192 bytes.
|
||||
|
||||
decode00: // decode from offset 0, unknown code length (READS A BYTE)
|
||||
ldy #$00
|
||||
lda ($02),y // load from input
|
||||
|
||||
tax // stash for after branch
|
||||
eor #$C0 // TODO: just use a cmp instruction?
|
||||
and #$C0
|
||||
beq decode06 // branch when mask is fully set
|
||||
// fallthru decode04
|
||||
|
||||
decode04: // decode from offset 0, 4-bit code (then 4 under)
|
||||
txa
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
jsr decode_common
|
||||
bne decode40 // always branch (unless page boundary)
|
||||
|
||||
decode06: // decode from offset 0, 6-bit code (then 2 under)
|
||||
txa
|
||||
lsr
|
||||
lsr
|
||||
and #$0F
|
||||
jsr decode_uncommon
|
||||
bne decode60 // always branch (unless page boundary)
|
||||
|
||||
decode20: // decode from offset 2, unknown code length
|
||||
txa
|
||||
eor #$30
|
||||
and #$30
|
||||
beq decode26 // branch when mask is fully set
|
||||
|
||||
decode24: // decode from offset 2, 4-bit code (then 2 under)
|
||||
txa
|
||||
lsr
|
||||
lsr
|
||||
and #$0F
|
||||
jsr decode_common
|
||||
bne decode60 // always branch (unless page boundary)
|
||||
|
||||
decode26: // decode from offset 2, 6-bit code (then aligned)
|
||||
jsr decode_advance_input
|
||||
txa
|
||||
and #$0F
|
||||
jsr decode_uncommon
|
||||
bne decode00 // always branch (unless page boundary)
|
||||
|
||||
decode40: // decode from offset 4, unknown code length
|
||||
//inc $02 // advance input
|
||||
//beq die // never branch (unless page boundary)
|
||||
jsr decode_advance_input
|
||||
txa
|
||||
eor #$0C // TODO: just use a cmp instruction?
|
||||
and #$0C
|
||||
beq decode46 // branch when mask is fully set
|
||||
// fallthru decode04
|
||||
|
||||
decode44: // decode from offset 4, 4-bit code (then aligned)
|
||||
txa
|
||||
and #$0F
|
||||
jsr decode_common
|
||||
bne decode00 // always branch (unless page boundary)
|
||||
|
||||
decode46: // decode from offset 4, 6-bit code (then 2 over) (READS A BYTE)
|
||||
txa
|
||||
and #$03
|
||||
asl
|
||||
asl
|
||||
sta $04
|
||||
ldy #$00
|
||||
lda ($02),y // load from input
|
||||
tax
|
||||
rol
|
||||
rol
|
||||
rol
|
||||
and #$03
|
||||
ora $04
|
||||
jsr decode_uncommon
|
||||
bne decode20 // always branch (unless page boundary)
|
||||
|
||||
decode60: // decode from offset 6, unknown code length (READS A BYTE)
|
||||
//inc $02 // advance input
|
||||
//beq die // never branch (unless page boundary)
|
||||
jsr decode_advance_input
|
||||
ldy #$00
|
||||
lda ($02),y // load from input
|
||||
tay
|
||||
txa
|
||||
eor #$03
|
||||
and #$03
|
||||
beq decode66 // branch when mask is fully set
|
||||
// fallthru decode64
|
||||
|
||||
decode64: // decode from offset 6, 4-bit code (then 2 over)
|
||||
txa
|
||||
and #$03
|
||||
asl
|
||||
asl
|
||||
sta $04
|
||||
tya // load in (restore) the new input
|
||||
tax // and put it in X like the rest of the code expects
|
||||
rol
|
||||
rol
|
||||
rol
|
||||
and #$03
|
||||
ora $04
|
||||
jsr decode_common
|
||||
bne decode20 // always branch (unless page boundary)
|
||||
|
||||
decode66: // decode from offset 6, 6-bit code (then 4 over)
|
||||
tya // load in (restore) the new input
|
||||
tax // and put it in X like the rest of the code expects
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
jsr decode_uncommon
|
||||
bne decode40 // always branch (unless page boundary)
|
||||
|
||||
// //
|
||||
done:
|
||||
db $02 // jam
|
||||
// //
|
||||
|
||||
// vim:ft=snes_bass
|
210
6502_name_codec/decode_v3.asm
Normal file
210
6502_name_codec/decode_v3.asm
Normal file
|
@ -0,0 +1,210 @@
|
|||
// included by decode.asm
|
||||
// cycles on extra-padded "Elizabeth Mary Patricia James Robert":
|
||||
// with double-dec: (dec per bit)
|
||||
// 7359-55=7304 (includes jsr and rts)
|
||||
// program size: 0xE0
|
||||
// with single-dec: (dec per pair)
|
||||
// 6759-55=6704 (includes jsr and rts)
|
||||
// program size: 0xDA
|
||||
// with tax reduction:
|
||||
// program size: 0xD4
|
||||
// with single death:
|
||||
// program size: 0xD3
|
||||
// with TYA instead of LDA #0:
|
||||
// program size: 0xD2
|
||||
// with reduced ORA:
|
||||
// 6612-55=6557 (includes jsr and rts)
|
||||
// program size: 0xD0
|
||||
// with stuff crammed into decode_advance:
|
||||
// program size: 0xBE
|
||||
// without a shim to jump back to decode_xx from decode_nextbytew:
|
||||
// program size: 0xBD
|
||||
// without any JMPs:
|
||||
// 6564-55=6509 (includes jsr and rts)
|
||||
// program size: 0xBC
|
||||
// without any extraneous DBs:
|
||||
// program size: 0xBB
|
||||
// instructions: 2359-17=2342 (includes jsr and rts)
|
||||
// with sty instead of tya + sta:
|
||||
// cycles: 6466-55=6411
|
||||
// instrs: 2310-17=2293
|
||||
// program size: 0xBA (186, 22 of which are the end-of-string comparisons)
|
||||
|
||||
decode_advance:
|
||||
inc $02
|
||||
beq die // never branch (unless page boundary)
|
||||
|
||||
when_to_stop()
|
||||
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
tax // stash for after branch
|
||||
lda #4 // pairs remaining
|
||||
sta $04 // write pairs remaining
|
||||
rts
|
||||
|
||||
decode_exit:
|
||||
pla
|
||||
pla
|
||||
rts
|
||||
|
||||
decode_begin_next:
|
||||
pla
|
||||
pla
|
||||
|
||||
decode:
|
||||
// NOTE: output/input pointers cannot cross page boundaries.
|
||||
// that means the effective longest lengths of output/input are 256/192 bytes.
|
||||
|
||||
lda #4 // pairs remaining
|
||||
sta $04 // write pairs remaining
|
||||
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
tax // stash for after branch
|
||||
|
||||
decode_xx: // decode from offset 0, unknown code length
|
||||
//tya // lda #0
|
||||
sty $05 // write data so far (nothing)
|
||||
|
||||
txa
|
||||
asl
|
||||
bcs decode_1x
|
||||
|
||||
decode_0x:
|
||||
asl
|
||||
tax
|
||||
bcs decode_01
|
||||
// fallthru to decode_00
|
||||
|
||||
decode_00:
|
||||
lda #%00000000
|
||||
bpl decode_read2 // always branch
|
||||
|
||||
decode_01:
|
||||
lda #%00000100
|
||||
bpl decode_read2 // always branch
|
||||
|
||||
decode_1x:
|
||||
asl
|
||||
tax
|
||||
bcs decode_11
|
||||
// fallthru to decode_10
|
||||
|
||||
decode_10:
|
||||
lda #%00001000
|
||||
bpl decode_read2 // always branch
|
||||
|
||||
decode_11:
|
||||
lda #%00010000
|
||||
bpl decode_read4 // always branch
|
||||
|
||||
die:
|
||||
db $F2
|
||||
|
||||
decode_nextbytew:
|
||||
jsr decode_advance
|
||||
bpl decode_xx // always branch
|
||||
|
||||
decode_write:
|
||||
ora $05
|
||||
// decode_common stuff:
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #0
|
||||
sta ($00),y // write to output
|
||||
inc $00 // advance output
|
||||
beq die // never branch (unless page boundary)
|
||||
dec $04 // decrement pairs remaining
|
||||
bne decode_xx // branch if we're good, otherwise...
|
||||
beq decode_nextbytew // (always) branch if we need more pairs
|
||||
|
||||
decode_read2_and_ora:
|
||||
ora $05
|
||||
|
||||
decode_read2:
|
||||
sta $05
|
||||
dec $04 // decrement pairs remaining
|
||||
beq decode_nextbyte2
|
||||
|
||||
decode_read2_again:
|
||||
// we have at least one pair left to read from X
|
||||
txa
|
||||
asl
|
||||
bcs decode_read2_1x
|
||||
|
||||
decode_read2_0x:
|
||||
asl
|
||||
tax
|
||||
bcs decode_read2_01
|
||||
|
||||
decode_read2_00:
|
||||
lda #%00000000
|
||||
bpl decode_write // always branch
|
||||
|
||||
decode_read2_01:
|
||||
lda #%00000001
|
||||
bpl decode_write // always branch
|
||||
|
||||
decode_read2_1x:
|
||||
asl
|
||||
tax
|
||||
bcs decode_read2_11
|
||||
|
||||
decode_read2_10:
|
||||
lda #%00000010
|
||||
bpl decode_write // always branch
|
||||
|
||||
decode_read2_11:
|
||||
lda #%00000011
|
||||
bpl decode_write // always branch
|
||||
|
||||
decode_read4:
|
||||
sta $05
|
||||
dec $04 // decrement pairs remaining
|
||||
beq decode_nextbyte4
|
||||
|
||||
decode_read4_again:
|
||||
// we have at least one pair left to read from X
|
||||
txa
|
||||
asl
|
||||
bcs decode_read4_1x
|
||||
|
||||
decode_read4_0x:
|
||||
asl
|
||||
tax
|
||||
bcs decode_read4_01
|
||||
|
||||
decode_read4_00:
|
||||
lda #%00000000
|
||||
bpl decode_read2_and_ora // always branch
|
||||
|
||||
decode_read4_01:
|
||||
lda #%00000100
|
||||
bpl decode_read2_and_ora // always branch
|
||||
|
||||
decode_read4_1x:
|
||||
asl
|
||||
tax
|
||||
bcs decode_read4_11
|
||||
|
||||
decode_read4_10:
|
||||
lda #%00001000
|
||||
bpl decode_read2_and_ora // always branch
|
||||
|
||||
decode_read4_11:
|
||||
lda #%00001100
|
||||
bpl decode_read2_and_ora // always branch
|
||||
|
||||
decode_nextbyte2:
|
||||
jsr decode_advance
|
||||
bpl decode_read2_again // always branch
|
||||
|
||||
decode_nextbyte4:
|
||||
jsr decode_advance
|
||||
bpl decode_read4_again // always branch
|
||||
|
||||
done:
|
||||
db $F2
|
||||
|
||||
// vim:ft=snes_bass
|
191
6502_name_codec/decode_v4.asm
Normal file
191
6502_name_codec/decode_v4.asm
Normal file
|
@ -0,0 +1,191 @@
|
|||
// included by decode.asm
|
||||
// cycles on extra-padded "Elizabeth Mary Patricia James Robert":
|
||||
// without any interleaving of instructions and LUT:
|
||||
// cycles: 7305-55=7250
|
||||
// instrs: 2665-17=2648
|
||||
// program size: 0x96 (150, 22 of which are the end-of-string comparisons)
|
||||
// with JSRs branched-over instead of branched-to-and-back:
|
||||
// cycles: 7293-55=7238
|
||||
// instrs: 2649-17=2632
|
||||
// program size: 0x92 (146, 22 of which are the end-of-string comparisons)
|
||||
// with BPL instead of CLC+BCC:
|
||||
// cycles: 7151-55=7096
|
||||
// instrs: 2578-17=2561
|
||||
// program size: 0x90 (144, 22 of which are the end-of-string comparisons)
|
||||
// with interleaved instructions and LUT:
|
||||
// cycles: 7151-55=7096
|
||||
// instrs: 2578-17=2561
|
||||
// program size: 0xC3 (note that this will *always* be 0xC3 with this method)
|
||||
|
||||
// so like, same as v3, except
|
||||
// instead of branching based on the high bits (through asl or ror),
|
||||
// we mask out the two bits being used, and ORA them in through Absolute,X mode.
|
||||
|
||||
// so that means, at these memory positions (possibly offset by X, on another page),
|
||||
// we need to occupy a byte:
|
||||
// actually, wait, in v3, the bits we branch on are always the top two.
|
||||
// so what i could do is use X, both to offset to the end of the page,
|
||||
// and also for each decoding case.
|
||||
// X = A & %11000000
|
||||
// A = %00111100[X]
|
||||
|
||||
align(0x100)
|
||||
decode_ilut: // internal look-up table
|
||||
db $00,$00,$00
|
||||
|
||||
decode_advance:
|
||||
inc $02 // advance input
|
||||
beq die // never branch (unless page boundary)
|
||||
when_to_stop()
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
tax // stash for after branch
|
||||
lda #4 // pairs remaining
|
||||
sta $04 // write pairs remaining
|
||||
rts
|
||||
|
||||
decode_exit:
|
||||
pla
|
||||
pla
|
||||
rts
|
||||
|
||||
nops(decode_ilut + 0x40)
|
||||
db $04,$01,$04
|
||||
|
||||
decode_begin_next:
|
||||
pla
|
||||
pla
|
||||
|
||||
decode:
|
||||
// NOTE: output/input pointers cannot cross page boundaries.
|
||||
// that means the effective longest lengths of output/input are 256/192 bytes.
|
||||
|
||||
lda #4 // pairs remaining
|
||||
sta $04 // write pairs remaining
|
||||
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
tax // stash for after branch
|
||||
|
||||
decode_xx: // decode from offset 0, unknown code length
|
||||
// two things need to be done here:
|
||||
|
||||
// 1. zp[0x05] |= {%0, %100, %1000, %10000}[A >> 6]
|
||||
txa // TODO: unnecessary?
|
||||
and #$C0
|
||||
tay
|
||||
lda decode_ilut+0,y // would be ORA, but this is our first data point
|
||||
sta $05
|
||||
|
||||
// 2. branch to decode_read4 is both bits were set, decode_read2 otherwise.
|
||||
tya
|
||||
eor #$C0
|
||||
beq decode_read4
|
||||
bne decode_read2 // always branch
|
||||
|
||||
die:
|
||||
db $F2
|
||||
|
||||
decode_nextbytew:
|
||||
jsr decode_advance
|
||||
bpl decode_xx // always branch
|
||||
|
||||
decode_write:
|
||||
lda $05 // TODO: unnecessary?
|
||||
// decode_common stuff:
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #0
|
||||
sta ($00),y // write to output
|
||||
inc $00 // advance output
|
||||
beq die // never branch (unless page boundary)
|
||||
txa
|
||||
asl
|
||||
asl
|
||||
tax
|
||||
dec $04 // decrement pairs remaining
|
||||
bne decode_xx // branch if we're good, otherwise...
|
||||
beq decode_nextbytew // (always) branch if we need more pairs
|
||||
|
||||
nops(decode_ilut + 0x80)
|
||||
db $08,$02,$08
|
||||
|
||||
decode_read2:
|
||||
txa
|
||||
asl
|
||||
asl
|
||||
tax
|
||||
|
||||
dec $04 // decrement pairs remaining
|
||||
bne +
|
||||
jsr decode_advance
|
||||
+;
|
||||
|
||||
decode_read2_again:
|
||||
// we have at least one pair left to read from X
|
||||
|
||||
// zp[0x05] |= {%0, %1, %10, %11}[A >> 6]
|
||||
txa // TODO: unnecessary?
|
||||
and #$C0
|
||||
tay
|
||||
lda $05
|
||||
ora decode_ilut+1,y
|
||||
sta $05
|
||||
|
||||
bpl decode_write // always branch
|
||||
|
||||
decode_read4:
|
||||
txa
|
||||
asl
|
||||
asl
|
||||
tax
|
||||
|
||||
dec $04 // decrement pairs remaining
|
||||
bne +
|
||||
jsr decode_advance
|
||||
+;
|
||||
// we have at least one pair left to read from X
|
||||
|
||||
// zp[0x05] |= {%0, %1, %10, %11}[A >> 6]
|
||||
txa // TODO: unnecessary?
|
||||
and #$C0
|
||||
tay
|
||||
lda $05
|
||||
ora decode_ilut+2,y
|
||||
sta $05
|
||||
|
||||
bpl decode_read2 // always branch
|
||||
|
||||
nops(decode_ilut + 0xC0)
|
||||
db $10,$03,$0C
|
||||
|
||||
done:
|
||||
db $F2
|
||||
|
||||
if 0 {
|
||||
align(0x100)
|
||||
decode_ilut: // internal look-up table
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $04,$01, $04,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $08,$02, $08,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $10,$03, $0C,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
||||
}
|
||||
|
||||
// decode_ilut[0b00000000] = 0b00000000
|
||||
// decode_ilut[0b01000000] = 0b00000100
|
||||
// decode_ilut[0b10000000] = 0b00001000
|
||||
// decode_ilut[0b11000000] = 0b00010000
|
||||
|
||||
// vim:ft=snes_bass
|
264
6502_name_codec/decode_v5.asm
Normal file
264
6502_name_codec/decode_v5.asm
Normal file
|
@ -0,0 +1,264 @@
|
|||
// included by decode.asm
|
||||
// stats on extra-padded "Elizabeth Mary Patricia James Robert":
|
||||
// cycles: 7403-55=7348
|
||||
// instrs: 2702-17=2685
|
||||
// program size: 0xA3
|
||||
// with reorganization: (first and second program parts swapped, no need for long jump)
|
||||
// cycles: 7391-55=7336
|
||||
// instrs: 2698-17=2681
|
||||
// program size: 0xA0
|
||||
// with inlined decode_nextbytew:
|
||||
// 7358 2687 0xA0
|
||||
// with rearranged decode_write:
|
||||
// exiting after 2678 instructions and 7351 cycles. 0xA0
|
||||
// with removed txa from decode_xx:
|
||||
// exiting after 2629 instructions and 7253 cycles. 0xA0
|
||||
// with more txa/tax simplification:
|
||||
// exiting after 2613 instructions and 7221 cycles. 0x9F
|
||||
// without the lda $05 in decode_write:
|
||||
// exiting after 2567 instructions and 7083 cycles. 0x9F
|
||||
// without the sda $05 in decode_read2_again:
|
||||
// exiting after 2521 instructions and 6945 cycles. 0x9F
|
||||
// with one of the `txa; asl; asl; tax` blocks rearranged in branches:
|
||||
// exiting after 2483 instructions and 6879 cycles. 0x9F
|
||||
// with the other one too: (the one from decode_read4)
|
||||
// exiting after 2461 instructions and 6841 cycles. 0xA1
|
||||
// with redundant taxes removed after those rearrangements:
|
||||
// exiting after 2406 instructions and 6731 cycles. 0xA0
|
||||
// with merged decode_read2 and decode_read4:
|
||||
// exiting after 2381 instructions and 6706 cycles. 0x86
|
||||
// cycles: 6706-55=6651
|
||||
// instrs: 2381-17=2364
|
||||
// program size: 0x86 (134, 22 of which are the end-of-string comparisons)
|
||||
|
||||
constant decode_alt(1)
|
||||
constant very_alt(0) // see decode_v6.asm for an expansion of this idea
|
||||
|
||||
align(0x100)
|
||||
decode_ilut: // internal look-up table
|
||||
if very_alt {
|
||||
db $00
|
||||
} else {
|
||||
db $00,$00,$00
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
decode_write:
|
||||
// decode_common stuff:
|
||||
tay
|
||||
lda decode_lut0xxx,y
|
||||
ldy #0
|
||||
sta ($00),y // write to output
|
||||
inc $00 // advance output
|
||||
beq die // never branch (unless page boundary)
|
||||
dec $04 // decrement pairs remaining
|
||||
beq + // branch if we need more pairs
|
||||
|
||||
txa
|
||||
asl
|
||||
asl
|
||||
bvc decode_xx // always branch
|
||||
|
||||
+;
|
||||
jsr decode_advance
|
||||
bpl decode_xx // always branch
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
if decode_alt {
|
||||
decode_exit:
|
||||
pla
|
||||
pla
|
||||
rts
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
nops(decode_ilut + 0x20)
|
||||
if very_alt {
|
||||
db $04
|
||||
} else {
|
||||
db $04,$01,$04
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
die:
|
||||
db $F2
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
decode_begin_next:
|
||||
pla
|
||||
pla
|
||||
|
||||
decode:
|
||||
// NOTE: output/input pointers cannot cross page boundaries.
|
||||
// that means the effective longest lengths of output/input are 256/192 bytes.
|
||||
|
||||
lda #4 // pairs remaining
|
||||
sta $04 // write pairs remaining
|
||||
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
|
||||
decode_xx: // decode from offset 0, unknown code length
|
||||
// NOTE: Y is always 0 here, if that helps at all.
|
||||
tax // stash for after branch
|
||||
|
||||
// two things need to be done here:
|
||||
|
||||
// 1. zp[0x05] |= {%0, %100, %1000, %10000}[A >> 6]
|
||||
and #%11000000
|
||||
lsr
|
||||
tay
|
||||
lda decode_ilut+0,y // would be ORA, but this is our first data point
|
||||
|
||||
if decode_alt {
|
||||
bvc decode_read_either // always branch
|
||||
|
||||
decode_read4_after:
|
||||
if very_alt {
|
||||
and #%00000011
|
||||
asl
|
||||
asl
|
||||
ora #%00010000
|
||||
bpl decode_read_either // always branch
|
||||
} else {
|
||||
and #%01111111
|
||||
// read4
|
||||
ora decode_ilut+2,y
|
||||
bpl decode_read_either // always branch
|
||||
}
|
||||
|
||||
} else {
|
||||
sta $05 // still part of 1.
|
||||
|
||||
// 2. branch to decode_read4 is both bits were set, decode_read2 otherwise.
|
||||
tya
|
||||
eor #$60
|
||||
beq decode_read4
|
||||
bne decode_read2 // always branch
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
nops(decode_ilut + 0x40)
|
||||
if very_alt {
|
||||
db $08
|
||||
} else {
|
||||
db $08,$02,$08
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
macro decode_read_common() { // common between the subroutines, not WRT probability
|
||||
dec $04 // decrement pairs remaining
|
||||
bne +
|
||||
jsr decode_advance
|
||||
bpl ++ // always branch
|
||||
|
||||
// we have at least one pair left to read from X
|
||||
|
||||
+;
|
||||
txa
|
||||
asl
|
||||
asl
|
||||
|
||||
+; // decode_read2_again or decode_read4_again
|
||||
tax // TODO: unnecessary?
|
||||
if very_alt {
|
||||
// zp[0x05] |= {%0, %1, %10, %11}[A >> 6]
|
||||
rol
|
||||
rol
|
||||
rol
|
||||
and #%00000011
|
||||
ora $05
|
||||
// then elsewhere, ora $05, instead of the lda $05 normally
|
||||
// should be slightly faster for the read2 case
|
||||
} else {
|
||||
and #%11000000
|
||||
lsr
|
||||
tay
|
||||
lda $05
|
||||
}
|
||||
}
|
||||
|
||||
if decode_alt {
|
||||
decode_read_either:
|
||||
sta $05 // still part of 1.
|
||||
|
||||
decode_read_common()
|
||||
bmi decode_read4_after
|
||||
|
||||
// read2
|
||||
if very_alt {
|
||||
;
|
||||
} else {
|
||||
ora decode_ilut+1,y
|
||||
}
|
||||
bpl decode_write // always branch
|
||||
|
||||
} else {
|
||||
decode_read2:
|
||||
decode_read_common()
|
||||
ora decode_ilut+1,y
|
||||
bpl decode_write // always branch
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
if decode_alt {
|
||||
; // nothing else to add
|
||||
} else {
|
||||
decode_exit:
|
||||
pla
|
||||
pla
|
||||
rts
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
nops(decode_ilut + 0x60)
|
||||
if decode_alt {
|
||||
if very_alt {
|
||||
db $90
|
||||
} else {
|
||||
db $90,$03,$0C
|
||||
}
|
||||
} else {
|
||||
db $10,$03,$0C
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
if decode_alt {
|
||||
; // nothing else to do
|
||||
|
||||
} else {
|
||||
decode_read4:
|
||||
decode_read_common()
|
||||
ora decode_ilut+2,y
|
||||
sta $05
|
||||
bpl decode_read2 // always branch
|
||||
}
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
decode_advance:
|
||||
inc $02 // advance input
|
||||
beq die // never branch (unless page boundary)
|
||||
when_to_stop()
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
ldx #4 // pairs remaining
|
||||
stx $04 // write pairs remaining
|
||||
rts // A = *input; X = 4; Y = 0
|
||||
|
||||
// === BOUNDARY ===
|
||||
|
||||
done:
|
||||
db $F2 // === BOUNDARY ===
|
||||
|
||||
// vim:ft=snes_bass
|
147
6502_name_codec/decode_v6.asm
Normal file
147
6502_name_codec/decode_v6.asm
Normal file
|
@ -0,0 +1,147 @@
|
|||
// included by decode.asm
|
||||
// stats on extra-padded "Elizabeth Mary Patricia James Robert":
|
||||
// exiting after 2456 instructions and 6714 cycles. 0x84
|
||||
// with a bunch of optimization: (yeah i forgot)
|
||||
// exiting after 2483 instructions and 6599 cycles. 0x74
|
||||
// cycles: 6599-55=6544
|
||||
// instrs: 2483-17=2466
|
||||
// program size: 0x74 (116, 22 of which are the end-of-string comparisons)
|
||||
// with decode_read4_after moved further down:
|
||||
// exiting after 2459 instructions and 6527 cycles. 0x74
|
||||
// cycles: 6527-55=6472
|
||||
// instrs: 2459-17=2442
|
||||
// program size: 0x74 (116, 22 of which are the end-of-string comparisons)
|
||||
|
||||
constant decode_internalize(1)
|
||||
|
||||
if decode_internalize {
|
||||
decode_ilut:
|
||||
db "ETAOINSHRDLU????CMFPGWYBVKXJQZ. "
|
||||
}
|
||||
|
||||
decode_begin_next:
|
||||
pla
|
||||
pla
|
||||
|
||||
decode:
|
||||
// NOTE: output/input pointers cannot cross page boundaries.
|
||||
// that means the effective longest lengths of output/input are 256/192 bytes.
|
||||
|
||||
lda #4 // pairs remaining
|
||||
sta $04 // write pairs remaining
|
||||
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
|
||||
decode_xx:
|
||||
// NOTE: Y is always 0 here, if that helps at all.
|
||||
tax // stash for after branch
|
||||
|
||||
and #%11000000
|
||||
lsr
|
||||
lsr
|
||||
lsr
|
||||
cmp #%00011000
|
||||
ror
|
||||
//bcc decode_read_either // always branch
|
||||
|
||||
decode_read_either:
|
||||
sta $05
|
||||
|
||||
dec $04 // decrement pairs remaining
|
||||
bne +
|
||||
jsr decode_advance
|
||||
bpl decode_read_again // always branch
|
||||
|
||||
// we have at least one pair left to read from X
|
||||
|
||||
+;
|
||||
txa
|
||||
asl
|
||||
asl
|
||||
|
||||
decode_read_again:
|
||||
tax
|
||||
|
||||
rol
|
||||
rol
|
||||
rol
|
||||
and #%00000011
|
||||
ora $05
|
||||
|
||||
bmi decode_read4_after
|
||||
//cmp #%00011100
|
||||
//bcs decode_read4_after
|
||||
|
||||
// read2, nothing left to do, so just write it
|
||||
//bpl decode_write // always branch
|
||||
|
||||
decode_write:
|
||||
// decode_common stuff:
|
||||
tay
|
||||
if decode_internalize {
|
||||
lda decode_ilut,y
|
||||
} else {
|
||||
lda decode_lut0xxx,y
|
||||
}
|
||||
ldy #0
|
||||
sta ($00),y // write to output
|
||||
inc $00 // advance output
|
||||
beq die // never branch (unless page boundary)
|
||||
dec $04 // decrement pairs remaining
|
||||
beq + // branch if we need more pairs
|
||||
|
||||
txa
|
||||
asl
|
||||
asl
|
||||
bvc decode_xx // always branch (FIXME: might depend on output address)
|
||||
|
||||
+;
|
||||
if 1 {
|
||||
jsr decode_advance
|
||||
bpl decode_xx // always branch
|
||||
} else {
|
||||
// 1 cycle(?) slower and 1 byte larger
|
||||
lda #(decode_xx - 1) >> 8
|
||||
pha
|
||||
lda #(decode_xx - 1) & 0xFF
|
||||
pha
|
||||
// fallthru to decode_advance
|
||||
}
|
||||
|
||||
decode_read4_after:
|
||||
and #%00000011
|
||||
asl
|
||||
asl
|
||||
ora #%00010000
|
||||
bpl decode_read_either // always branch
|
||||
|
||||
decode_advance:
|
||||
inc $02 // advance input
|
||||
beq die // never branch (unless page boundary)
|
||||
when_to_stop()
|
||||
ldy #0
|
||||
lda ($02),y // load from input
|
||||
ldx #4 // pairs remaining
|
||||
stx $04 // write pairs remaining
|
||||
rts // A = *input; X = 4; Y = 0
|
||||
|
||||
decode_exit:
|
||||
if 1 {
|
||||
// 4+4=8 cycles, 2 bytes:
|
||||
pla
|
||||
pla
|
||||
} else {
|
||||
// 3+2=5 cycles, 3 bytes, also penalty for having to set up $06 in the first place:
|
||||
ldx $06
|
||||
txs
|
||||
}
|
||||
rts
|
||||
|
||||
die:
|
||||
db $F2
|
||||
|
||||
done:
|
||||
db $D2
|
||||
|
||||
// vim:ft=snes_bass
|
Loading…
Reference in a new issue