backyard/6502_name_codec/decode_v5.asm

265 lines
5.3 KiB
NASM

// included by decode.asm
// stats on extra-padded "Elizabeth Mary Patricia James Robert":
// cycles: 7403-55=7348
// instrs: 2702-17=2685
// program size: 0xA3
// with reorganization: (first and second program parts swapped, no need for long jump)
// cycles: 7391-55=7336
// instrs: 2698-17=2681
// program size: 0xA0
// with inlined decode_nextbytew:
// 7358 2687 0xA0
// with rearranged decode_write:
// exiting after 2678 instructions and 7351 cycles. 0xA0
// with removed txa from decode_xx:
// exiting after 2629 instructions and 7253 cycles. 0xA0
// with more txa/tax simplification:
// exiting after 2613 instructions and 7221 cycles. 0x9F
// without the lda $05 in decode_write:
// exiting after 2567 instructions and 7083 cycles. 0x9F
// without the sda $05 in decode_read2_again:
// exiting after 2521 instructions and 6945 cycles. 0x9F
// with one of the `txa; asl; asl; tax` blocks rearranged in branches:
// exiting after 2483 instructions and 6879 cycles. 0x9F
// with the other one too: (the one from decode_read4)
// exiting after 2461 instructions and 6841 cycles. 0xA1
// with redundant taxes removed after those rearrangements:
// exiting after 2406 instructions and 6731 cycles. 0xA0
// with merged decode_read2 and decode_read4:
// exiting after 2381 instructions and 6706 cycles. 0x86
// cycles: 6706-55=6651
// instrs: 2381-17=2364
// program size: 0x86 (134, 22 of which are the end-of-string comparisons)
constant decode_alt(1)
constant very_alt(0) // see decode_v6.asm for an expansion of this idea
align(0x100)
decode_ilut: // internal look-up table
if very_alt {
db $00
} else {
db $00,$00,$00
}
// === BOUNDARY ===
decode_write:
// decode_common stuff:
tay
lda decode_lut0xxx,y
ldy #0
sta ($00),y // write to output
inc $00 // advance output
beq die // never branch (unless page boundary)
dec $04 // decrement pairs remaining
beq + // branch if we need more pairs
txa
asl
asl
bvc decode_xx // always branch
+;
jsr decode_advance
bpl decode_xx // always branch
// === BOUNDARY ===
if decode_alt {
decode_exit:
pla
pla
rts
}
// === BOUNDARY ===
nops(decode_ilut + 0x20)
if very_alt {
db $04
} else {
db $04,$01,$04
}
// === BOUNDARY ===
die:
db $F2
// === BOUNDARY ===
decode_begin_next:
pla
pla
decode:
// NOTE: output/input pointers cannot cross page boundaries.
// that means the effective longest lengths of output/input are 256/192 bytes.
lda #4 // pairs remaining
sta $04 // write pairs remaining
ldy #0
lda ($02),y // load from input
decode_xx: // decode from offset 0, unknown code length
// NOTE: Y is always 0 here, if that helps at all.
tax // stash for after branch
// two things need to be done here:
// 1. zp[0x05] |= {%0, %100, %1000, %10000}[A >> 6]
and #%11000000
lsr
tay
lda decode_ilut+0,y // would be ORA, but this is our first data point
if decode_alt {
bvc decode_read_either // always branch
decode_read4_after:
if very_alt {
and #%00000011
asl
asl
ora #%00010000
bpl decode_read_either // always branch
} else {
and #%01111111
// read4
ora decode_ilut+2,y
bpl decode_read_either // always branch
}
} else {
sta $05 // still part of 1.
// 2. branch to decode_read4 is both bits were set, decode_read2 otherwise.
tya
eor #$60
beq decode_read4
bne decode_read2 // always branch
}
// === BOUNDARY ===
nops(decode_ilut + 0x40)
if very_alt {
db $08
} else {
db $08,$02,$08
}
// === BOUNDARY ===
macro decode_read_common() { // common between the subroutines, not WRT probability
dec $04 // decrement pairs remaining
bne +
jsr decode_advance
bpl ++ // always branch
// we have at least one pair left to read from X
+;
txa
asl
asl
+; // decode_read2_again or decode_read4_again
tax // TODO: unnecessary?
if very_alt {
// zp[0x05] |= {%0, %1, %10, %11}[A >> 6]
rol
rol
rol
and #%00000011
ora $05
// then elsewhere, ora $05, instead of the lda $05 normally
// should be slightly faster for the read2 case
} else {
and #%11000000
lsr
tay
lda $05
}
}
if decode_alt {
decode_read_either:
sta $05 // still part of 1.
decode_read_common()
bmi decode_read4_after
// read2
if very_alt {
;
} else {
ora decode_ilut+1,y
}
bpl decode_write // always branch
} else {
decode_read2:
decode_read_common()
ora decode_ilut+1,y
bpl decode_write // always branch
}
// === BOUNDARY ===
if decode_alt {
; // nothing else to add
} else {
decode_exit:
pla
pla
rts
}
// === BOUNDARY ===
nops(decode_ilut + 0x60)
if decode_alt {
if very_alt {
db $90
} else {
db $90,$03,$0C
}
} else {
db $10,$03,$0C
}
// === BOUNDARY ===
if decode_alt {
; // nothing else to do
} else {
decode_read4:
decode_read_common()
ora decode_ilut+2,y
sta $05
bpl decode_read2 // always branch
}
// === BOUNDARY ===
decode_advance:
inc $02 // advance input
beq die // never branch (unless page boundary)
when_to_stop()
ldy #0
lda ($02),y // load from input
ldx #4 // pairs remaining
stx $04 // write pairs remaining
rts // A = *input; X = 4; Y = 0
// === BOUNDARY ===
done:
db $F2 // === BOUNDARY ===
// vim:ft=snes_bass