148 lines
2.7 KiB
NASM
148 lines
2.7 KiB
NASM
|
// included by decode.asm
|
||
|
// stats on extra-padded "Elizabeth Mary Patricia James Robert":
|
||
|
// exiting after 2456 instructions and 6714 cycles. 0x84
|
||
|
// with a bunch of optimization: (yeah i forgot)
|
||
|
// exiting after 2483 instructions and 6599 cycles. 0x74
|
||
|
// cycles: 6599-55=6544
|
||
|
// instrs: 2483-17=2466
|
||
|
// program size: 0x74 (116, 22 of which are the end-of-string comparisons)
|
||
|
// with decode_read4_after moved further down:
|
||
|
// exiting after 2459 instructions and 6527 cycles. 0x74
|
||
|
// cycles: 6527-55=6472
|
||
|
// instrs: 2459-17=2442
|
||
|
// program size: 0x74 (116, 22 of which are the end-of-string comparisons)
|
||
|
|
||
|
constant decode_internalize(1)
|
||
|
|
||
|
if decode_internalize {
|
||
|
decode_ilut:
|
||
|
db "ETAOINSHRDLU????CMFPGWYBVKXJQZ. "
|
||
|
}
|
||
|
|
||
|
decode_begin_next:
|
||
|
pla
|
||
|
pla
|
||
|
|
||
|
decode:
|
||
|
// NOTE: output/input pointers cannot cross page boundaries.
|
||
|
// that means the effective longest lengths of output/input are 256/192 bytes.
|
||
|
|
||
|
lda #4 // pairs remaining
|
||
|
sta $04 // write pairs remaining
|
||
|
|
||
|
ldy #0
|
||
|
lda ($02),y // load from input
|
||
|
|
||
|
decode_xx:
|
||
|
// NOTE: Y is always 0 here, if that helps at all.
|
||
|
tax // stash for after branch
|
||
|
|
||
|
and #%11000000
|
||
|
lsr
|
||
|
lsr
|
||
|
lsr
|
||
|
cmp #%00011000
|
||
|
ror
|
||
|
//bcc decode_read_either // always branch
|
||
|
|
||
|
decode_read_either:
|
||
|
sta $05
|
||
|
|
||
|
dec $04 // decrement pairs remaining
|
||
|
bne +
|
||
|
jsr decode_advance
|
||
|
bpl decode_read_again // always branch
|
||
|
|
||
|
// we have at least one pair left to read from X
|
||
|
|
||
|
+;
|
||
|
txa
|
||
|
asl
|
||
|
asl
|
||
|
|
||
|
decode_read_again:
|
||
|
tax
|
||
|
|
||
|
rol
|
||
|
rol
|
||
|
rol
|
||
|
and #%00000011
|
||
|
ora $05
|
||
|
|
||
|
bmi decode_read4_after
|
||
|
//cmp #%00011100
|
||
|
//bcs decode_read4_after
|
||
|
|
||
|
// read2, nothing left to do, so just write it
|
||
|
//bpl decode_write // always branch
|
||
|
|
||
|
decode_write:
|
||
|
// decode_common stuff:
|
||
|
tay
|
||
|
if decode_internalize {
|
||
|
lda decode_ilut,y
|
||
|
} else {
|
||
|
lda decode_lut0xxx,y
|
||
|
}
|
||
|
ldy #0
|
||
|
sta ($00),y // write to output
|
||
|
inc $00 // advance output
|
||
|
beq die // never branch (unless page boundary)
|
||
|
dec $04 // decrement pairs remaining
|
||
|
beq + // branch if we need more pairs
|
||
|
|
||
|
txa
|
||
|
asl
|
||
|
asl
|
||
|
bvc decode_xx // always branch (FIXME: might depend on output address)
|
||
|
|
||
|
+;
|
||
|
if 1 {
|
||
|
jsr decode_advance
|
||
|
bpl decode_xx // always branch
|
||
|
} else {
|
||
|
// 1 cycle(?) slower and 1 byte larger
|
||
|
lda #(decode_xx - 1) >> 8
|
||
|
pha
|
||
|
lda #(decode_xx - 1) & 0xFF
|
||
|
pha
|
||
|
// fallthru to decode_advance
|
||
|
}
|
||
|
|
||
|
decode_read4_after:
|
||
|
and #%00000011
|
||
|
asl
|
||
|
asl
|
||
|
ora #%00010000
|
||
|
bpl decode_read_either // always branch
|
||
|
|
||
|
decode_advance:
|
||
|
inc $02 // advance input
|
||
|
beq die // never branch (unless page boundary)
|
||
|
when_to_stop()
|
||
|
ldy #0
|
||
|
lda ($02),y // load from input
|
||
|
ldx #4 // pairs remaining
|
||
|
stx $04 // write pairs remaining
|
||
|
rts // A = *input; X = 4; Y = 0
|
||
|
|
||
|
decode_exit:
|
||
|
if 1 {
|
||
|
// 4+4=8 cycles, 2 bytes:
|
||
|
pla
|
||
|
pla
|
||
|
} else {
|
||
|
// 3+2=5 cycles, 3 bytes, also penalty for having to set up $06 in the first place:
|
||
|
ldx $06
|
||
|
txs
|
||
|
}
|
||
|
rts
|
||
|
|
||
|
die:
|
||
|
db $F2
|
||
|
|
||
|
done:
|
||
|
db $D2
|
||
|
|
||
|
// vim:ft=snes_bass
|