// included by decode.asm // stats on extra-padded "Elizabeth Mary Patricia James Robert": // cycles: 7403-55=7348 // instrs: 2702-17=2685 // program size: 0xA3 // with reorganization: (first and second program parts swapped, no need for long jump) // cycles: 7391-55=7336 // instrs: 2698-17=2681 // program size: 0xA0 // with inlined decode_nextbytew: // 7358 2687 0xA0 // with rearranged decode_write: // exiting after 2678 instructions and 7351 cycles. 0xA0 // with removed txa from decode_xx: // exiting after 2629 instructions and 7253 cycles. 0xA0 // with more txa/tax simplification: // exiting after 2613 instructions and 7221 cycles. 0x9F // without the lda $05 in decode_write: // exiting after 2567 instructions and 7083 cycles. 0x9F // without the sda $05 in decode_read2_again: // exiting after 2521 instructions and 6945 cycles. 0x9F // with one of the `txa; asl; asl; tax` blocks rearranged in branches: // exiting after 2483 instructions and 6879 cycles. 0x9F // with the other one too: (the one from decode_read4) // exiting after 2461 instructions and 6841 cycles. 0xA1 // with redundant taxes removed after those rearrangements: // exiting after 2406 instructions and 6731 cycles. 0xA0 // with merged decode_read2 and decode_read4: // exiting after 2381 instructions and 6706 cycles. 0x86 // cycles: 6706-55=6651 // instrs: 2381-17=2364 // program size: 0x86 (134, 22 of which are the end-of-string comparisons) constant decode_alt(1) constant very_alt(0) // see decode_v6.asm for an expansion of this idea align(0x100) decode_ilut: // internal look-up table if very_alt { db $00 } else { db $00,$00,$00 } // === BOUNDARY === decode_write: // decode_common stuff: tay lda decode_lut0xxx,y ldy #0 sta ($00),y // write to output inc $00 // advance output beq die // never branch (unless page boundary) dec $04 // decrement pairs remaining beq + // branch if we need more pairs txa asl asl bvc decode_xx // always branch +; jsr decode_advance bpl decode_xx // always branch // === BOUNDARY === if decode_alt { decode_exit: pla pla rts } // === BOUNDARY === nops(decode_ilut + 0x20) if very_alt { db $04 } else { db $04,$01,$04 } // === BOUNDARY === die: db $F2 // === BOUNDARY === decode_begin_next: pla pla decode: // NOTE: output/input pointers cannot cross page boundaries. // that means the effective longest lengths of output/input are 256/192 bytes. lda #4 // pairs remaining sta $04 // write pairs remaining ldy #0 lda ($02),y // load from input decode_xx: // decode from offset 0, unknown code length // NOTE: Y is always 0 here, if that helps at all. tax // stash for after branch // two things need to be done here: // 1. zp[0x05] |= {%0, %100, %1000, %10000}[A >> 6] and #%11000000 lsr tay lda decode_ilut+0,y // would be ORA, but this is our first data point if decode_alt { bvc decode_read_either // always branch decode_read4_after: if very_alt { and #%00000011 asl asl ora #%00010000 bpl decode_read_either // always branch } else { and #%01111111 // read4 ora decode_ilut+2,y bpl decode_read_either // always branch } } else { sta $05 // still part of 1. // 2. branch to decode_read4 is both bits were set, decode_read2 otherwise. tya eor #$60 beq decode_read4 bne decode_read2 // always branch } // === BOUNDARY === nops(decode_ilut + 0x40) if very_alt { db $08 } else { db $08,$02,$08 } // === BOUNDARY === macro decode_read_common() { // common between the subroutines, not WRT probability dec $04 // decrement pairs remaining bne + jsr decode_advance bpl ++ // always branch // we have at least one pair left to read from X +; txa asl asl +; // decode_read2_again or decode_read4_again tax // TODO: unnecessary? if very_alt { // zp[0x05] |= {%0, %1, %10, %11}[A >> 6] rol rol rol and #%00000011 ora $05 // then elsewhere, ora $05, instead of the lda $05 normally // should be slightly faster for the read2 case } else { and #%11000000 lsr tay lda $05 } } if decode_alt { decode_read_either: sta $05 // still part of 1. decode_read_common() bmi decode_read4_after // read2 if very_alt { ; } else { ora decode_ilut+1,y } bpl decode_write // always branch } else { decode_read2: decode_read_common() ora decode_ilut+1,y bpl decode_write // always branch } // === BOUNDARY === if decode_alt { ; // nothing else to add } else { decode_exit: pla pla rts } // === BOUNDARY === nops(decode_ilut + 0x60) if decode_alt { if very_alt { db $90 } else { db $90,$03,$0C } } else { db $10,$03,$0C } // === BOUNDARY === if decode_alt { ; // nothing else to do } else { decode_read4: decode_read_common() ora decode_ilut+2,y sta $05 bpl decode_read2 // always branch } // === BOUNDARY === decode_advance: inc $02 // advance input beq die // never branch (unless page boundary) when_to_stop() ldy #0 lda ($02),y // load from input ldx #4 // pairs remaining stx $04 // write pairs remaining rts // A = *input; X = 4; Y = 0 // === BOUNDARY === done: db $F2 // === BOUNDARY === // vim:ft=snes_bass