191 lines
5 KiB
NASM
191 lines
5 KiB
NASM
// included by decode.asm
|
|
// cycles on extra-padded "Elizabeth Mary Patricia James Robert":
|
|
// without any interleaving of instructions and LUT:
|
|
// cycles: 7305-55=7250
|
|
// instrs: 2665-17=2648
|
|
// program size: 0x96 (150, 22 of which are the end-of-string comparisons)
|
|
// with JSRs branched-over instead of branched-to-and-back:
|
|
// cycles: 7293-55=7238
|
|
// instrs: 2649-17=2632
|
|
// program size: 0x92 (146, 22 of which are the end-of-string comparisons)
|
|
// with BPL instead of CLC+BCC:
|
|
// cycles: 7151-55=7096
|
|
// instrs: 2578-17=2561
|
|
// program size: 0x90 (144, 22 of which are the end-of-string comparisons)
|
|
// with interleaved instructions and LUT:
|
|
// cycles: 7151-55=7096
|
|
// instrs: 2578-17=2561
|
|
// program size: 0xC3 (note that this will *always* be 0xC3 with this method)
|
|
|
|
// so like, same as v3, except
|
|
// instead of branching based on the high bits (through asl or ror),
|
|
// we mask out the two bits being used, and ORA them in through Absolute,X mode.
|
|
|
|
// so that means, at these memory positions (possibly offset by X, on another page),
|
|
// we need to occupy a byte:
|
|
// actually, wait, in v3, the bits we branch on are always the top two.
|
|
// so what i could do is use X, both to offset to the end of the page,
|
|
// and also for each decoding case.
|
|
// X = A & %11000000
|
|
// A = %00111100[X]
|
|
|
|
align(0x100)
|
|
decode_ilut: // internal look-up table
|
|
db $00,$00,$00
|
|
|
|
decode_advance:
|
|
inc $02 // advance input
|
|
beq die // never branch (unless page boundary)
|
|
when_to_stop()
|
|
ldy #0
|
|
lda ($02),y // load from input
|
|
tax // stash for after branch
|
|
lda #4 // pairs remaining
|
|
sta $04 // write pairs remaining
|
|
rts
|
|
|
|
decode_exit:
|
|
pla
|
|
pla
|
|
rts
|
|
|
|
nops(decode_ilut + 0x40)
|
|
db $04,$01,$04
|
|
|
|
decode_begin_next:
|
|
pla
|
|
pla
|
|
|
|
decode:
|
|
// NOTE: output/input pointers cannot cross page boundaries.
|
|
// that means the effective longest lengths of output/input are 256/192 bytes.
|
|
|
|
lda #4 // pairs remaining
|
|
sta $04 // write pairs remaining
|
|
|
|
ldy #0
|
|
lda ($02),y // load from input
|
|
tax // stash for after branch
|
|
|
|
decode_xx: // decode from offset 0, unknown code length
|
|
// two things need to be done here:
|
|
|
|
// 1. zp[0x05] |= {%0, %100, %1000, %10000}[A >> 6]
|
|
txa // TODO: unnecessary?
|
|
and #$C0
|
|
tay
|
|
lda decode_ilut+0,y // would be ORA, but this is our first data point
|
|
sta $05
|
|
|
|
// 2. branch to decode_read4 is both bits were set, decode_read2 otherwise.
|
|
tya
|
|
eor #$C0
|
|
beq decode_read4
|
|
bne decode_read2 // always branch
|
|
|
|
die:
|
|
db $F2
|
|
|
|
decode_nextbytew:
|
|
jsr decode_advance
|
|
bpl decode_xx // always branch
|
|
|
|
decode_write:
|
|
lda $05 // TODO: unnecessary?
|
|
// decode_common stuff:
|
|
tay
|
|
lda decode_lut0xxx,y
|
|
ldy #0
|
|
sta ($00),y // write to output
|
|
inc $00 // advance output
|
|
beq die // never branch (unless page boundary)
|
|
txa
|
|
asl
|
|
asl
|
|
tax
|
|
dec $04 // decrement pairs remaining
|
|
bne decode_xx // branch if we're good, otherwise...
|
|
beq decode_nextbytew // (always) branch if we need more pairs
|
|
|
|
nops(decode_ilut + 0x80)
|
|
db $08,$02,$08
|
|
|
|
decode_read2:
|
|
txa
|
|
asl
|
|
asl
|
|
tax
|
|
|
|
dec $04 // decrement pairs remaining
|
|
bne +
|
|
jsr decode_advance
|
|
+;
|
|
|
|
decode_read2_again:
|
|
// we have at least one pair left to read from X
|
|
|
|
// zp[0x05] |= {%0, %1, %10, %11}[A >> 6]
|
|
txa // TODO: unnecessary?
|
|
and #$C0
|
|
tay
|
|
lda $05
|
|
ora decode_ilut+1,y
|
|
sta $05
|
|
|
|
bpl decode_write // always branch
|
|
|
|
decode_read4:
|
|
txa
|
|
asl
|
|
asl
|
|
tax
|
|
|
|
dec $04 // decrement pairs remaining
|
|
bne +
|
|
jsr decode_advance
|
|
+;
|
|
// we have at least one pair left to read from X
|
|
|
|
// zp[0x05] |= {%0, %1, %10, %11}[A >> 6]
|
|
txa // TODO: unnecessary?
|
|
and #$C0
|
|
tay
|
|
lda $05
|
|
ora decode_ilut+2,y
|
|
sta $05
|
|
|
|
bpl decode_read2 // always branch
|
|
|
|
nops(decode_ilut + 0xC0)
|
|
db $10,$03,$0C
|
|
|
|
done:
|
|
db $F2
|
|
|
|
if 0 {
|
|
align(0x100)
|
|
decode_ilut: // internal look-up table
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $04,$01, $04,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $08,$02, $08,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $10,$03, $0C,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
db $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00, $00,$00
|
|
}
|
|
|
|
// decode_ilut[0b00000000] = 0b00000000
|
|
// decode_ilut[0b01000000] = 0b00000100
|
|
// decode_ilut[0b10000000] = 0b00001000
|
|
// decode_ilut[0b11000000] = 0b00010000
|
|
|
|
// vim:ft=snes_bass
|