From a612461e8707e2e235471144906468936a8810cd Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Mon, 20 Aug 2018 02:13:11 +0200 Subject: [PATCH] major work towards getting F3DZEX working --- inc/kernel.inc | 5 +- inc/main.inc | 15 +++-- inc/n64.inc | 61 +++++++++++++++++++ kernel.asm | 147 ++++++++++++++++++++++++++++++++++++++++---- main.asm | 162 +++++++++++++++++++++++++++++++++++++++++++++---- 5 files changed, 359 insertions(+), 31 deletions(-) diff --git a/inc/kernel.inc b/inc/kernel.inc index 69d8ca7..30c1896 100644 --- a/inc/kernel.inc +++ b/inc/kernel.inc @@ -29,19 +29,23 @@ constant K_INT_CACHE_ERROR(3) constant K_INT_OTHER(4) macro KDumpString(name) { +if K_DEBUG { // does not include error/console-checking! // note: this first instruction must be okay to be in a delay slot. la a2, {name} jal Drive64WriteDirect lli a3, {name}X - {name} } +} macro KMaybeDumpString(str) { +if K_DEBUG { lw t1, K_CONSOLE_AVAILABLE(k0) beqz t1,+ KDumpString({str}) + } +} macro KS(name, str) { align(16) @@ -58,4 +62,3 @@ macro KSL(name, str) { align(16) {name}X: } - diff --git a/inc/main.inc b/inc/main.inc index b15a209..31398e9 100644 --- a/inc/main.inc +++ b/inc/main.inc @@ -10,10 +10,15 @@ constant BLAH_COUNTS(0x0010) constant BLAH_SP_TASK(0x0040) constant BLAH_DLIST_JUMPER(0x0080) constant BLAH_XXD(0x0100) +constant BLAH_DLIST(0x1000) -constant VIDEO_BUFFER(0x80100000) -constant VIDEO_BUFFER_SIZE(640 * 480 * 4) -constant VIDEO_STACK(VIDEO_BUFFER + VIDEO_BUFFER_SIZE) +constant VIDEO_C_BUFFER(0x80100000) +constant VIDEO_C_BUFFER_SIZE(640 * 480 * 4) +constant VIDEO_Z_BUFFER(VIDEO_C_BUFFER + VIDEO_C_BUFFER_SIZE) +constant VIDEO_Z_BUFFER_SIZE(640 * 480 * 2) +constant VIDEO_SOMETHING(VIDEO_Z_BUFFER + VIDEO_Z_BUFFER_SIZE) +constant VIDEO_SOMETHING_SIZE(0x18000) +constant VIDEO_STACK(VIDEO_SOMETHING + VIDEO_SOMETHING_SIZE) constant VIDEO_STACK_SIZE(0x400) constant VIDEO_YIELD(VIDEO_STACK + VIDEO_STACK_SIZE) constant VIDEO_YIELD_SIZE(0xC00) @@ -30,10 +35,10 @@ macro PI_WAIT() { lw t0, PI_STATUS(t5) andi t0, t0, 3 bnez t0,- - nop // delay slot + nop } -macro SP_DMA_WAIT() { // external +macro SP_DMA_WAIT() { // from CPU lui t5, SP_BASE - lw t0, SP_DMA_FULL(t5) diff --git a/inc/n64.inc b/inc/n64.inc index 12e5ef7..9758d02 100644 --- a/inc/n64.inc +++ b/inc/n64.inc @@ -234,6 +234,50 @@ constant SP_PC_BASE($A408) // $04080000..$04080007 SP PC Base Register constant SP_PC($00) // $04080000..$04080003 SP: PC Register constant SP_IBIST_REG($04) // $04080004..$04080007 SP: IMEM BIST Register +// SP_STATUS Read Flags: +constant RSP_HLT($0001) // Halt +constant RSP_BRK($0002) // Break +constant RSP_BSY($0004) // DMA Busy +constant RSP_FUL($0008) // DMA Full +constant RSP_IOF($0010) // IO Full +constant RSP_STP($0020) // Single Step +constant RSP_IOB($0040) // Interrupt On Break +constant RSP_SG0($0080) // Signal 0 Set +constant RSP_SG1($0100) // Signal 1 Set +constant RSP_SG2($0200) // Signal 2 Set +constant RSP_SG3($0400) // Signal 3 Set +constant RSP_SG4($0800) // Signal 4 Set +constant RSP_SG5($1000) // Signal 5 Set +constant RSP_SG6($2000) // Signal 6 Set +constant RSP_SG7($4000) // Signal 7 Set + +// SP_STATUS Write Flags: +constant CLR_HLT($00000001) // Clear Halt +constant SET_HLT($00000002) // Set Halt +constant CLR_BRK($00000004) // Clear Broke +constant CLR_INT($00000008) // Clear Interrupt +constant SET_INT($00000010) // Set Interrupt +constant CLR_STP($00000020) // Clear Single Step +constant SET_STP($00000040) // Set Single Step +constant CLR_IOB($00000080) // Clear Interrupt On Break +constant SET_IOB($00000100) // Set Interrupt On Break +constant CLR_SG0($00000200) // Clear Signal 0 +constant SET_SG0($00000400) // Set Signal 0 +constant CLR_SG1($00000800) // Clear Signal 1 +constant SET_SG1($00001000) // Set Signal 1 +constant CLR_SG2($00002000) // Clear Signal 2 +constant SET_SG2($00004000) // Set Signal 2 +constant CLR_SG3($00008000) // Clear Signal 3 +constant SET_SG3($00010000) // Set Signal 3 +constant CLR_SG4($00020000) // Clear Signal 4 +constant SET_SG4($00040000) // Set Signal 4 +constant CLR_SG5($00080000) // Clear Signal 5 +constant SET_SG5($00100000) // Set Signal 5 +constant CLR_SG6($00200000) // Clear Signal 6 +constant SET_SG6($00400000) // Set Signal 6 +constant CLR_SG7($00800000) // Clear Signal 7 +constant SET_SG7($01000000) // Set Signal 7 + constant DPC_BASE($A410) // $04100000..$0410001F DP Command (DPC) Base Register constant DPC_START($00) // $04100000..$04100003 DPC: CMD DMA Start Register constant DPC_END($04) // $04100004..$04100007 DPC: CMD DMA End Register @@ -256,6 +300,23 @@ constant MI_VERSION($04) // $04300004..$04300007 MI: Version Register constant MI_INTR($08) // $04300008..$0430000B MI: Interrupt Register constant MI_INTR_MASK($0C) // $0430000C..$0430000F MI: Interrupt Mask Register +constant MI_INTR_SP($01) +constant MI_INTR_SI($02) +constant MI_INTR_AI($04) +constant MI_INTR_VI($08) +constant MI_INTR_PI($10) +constant MI_INTR_DP($20) +constant MI_INTR_ALL($3F) + +// TODO: SET and CLR rather than just MASK +constant MI_INTR_MASK_SP($002) +constant MI_INTR_MASK_SI($008) +constant MI_INTR_MASK_AI($020) +constant MI_INTR_MASK_VI($080) +constant MI_INTR_MASK_PI($200) +constant MI_INTR_MASK_DP($800) +constant MI_INTR_MASK_ALL($AAA) + constant VI_BASE($A440) // $04400000..$04400037 Video Interface (VI) Base Register constant VI_STATUS($00) // $04400000..$04400003 VI: Status/Control Register constant VI_ORIGIN($04) // $04400004..$04400007 VI: Origin Register diff --git a/kernel.asm b/kernel.asm index 82b5861..74e49ea 100644 --- a/kernel.asm +++ b/kernel.asm @@ -35,12 +35,34 @@ Start: // enable even more interrupts. lui t2, MI_BASE ori t2, t2, MI_INTR_MASK - lli t0, 0xAAA // LSB to MSB: SP, SI, AI, VI, PI, DP - // by the way, use 0x555 to disable +// lli t0, MI_INTR_MASK_ALL + // i don't have code to handle all the interrupts. in the meantime... + lli t0, MI_INTR_MASK_SP sw t0, 0(t2) - // it looks like i should be initializing PI_BSD_DOM1_* from - // the ROM header at this point, but i don't know what even does does. + // set BSD DOM1 stuff, whatever that is. + lui v1, CART_DOM1_ADDR2 + lw v0, 0(v1) + srl t8, v0, 16 + srl t4, v0, 20 + andi t9, t8, 0xF // t9=$07 + andi t5, t4, 0xF // t5=$03 + srl t7, v0, 8 + // + andi t7, 0xFF // t7=$12 + andi v0, 0xFF // v0=$40 + // wait for PI + lui t2, PI_BASE +- + lw t0, PI_STATUS(t2) + andi t0, t0, 3 + bnez t0,- + nop + // + sw v0, PI_BSD_DOM1_LAT(t2) // $40 + sw t9, PI_BSD_DOM1_PGS(t2) // $07 + sw t5, PI_BSD_DOM1_RLS(t2) // $03 + sw t7, PI_BSD_DOM1_PWD(t2) // $12 // SP defaults to RSP instruction memory: 0xA4001FF0 // we can do better than that. @@ -95,6 +117,20 @@ Drive64CheckConsole: Drive64Done: + // zero out RDRAM from 1 MiB to 4 MiB + // NOTE: this might overwrite the last 4 KiB of ROM that's loaded by 6102? + li t0, 0x80100000 + li t1, 0x80400000 +- +define x(0) +while {x} < 0x100 { + sd r0, {x}(t0) +evaluate x({x} + 8) +} + addiu t0, 0x100 + bne t0, t1,- + nop + // delay to empty pipeline nop nop @@ -257,11 +293,9 @@ InterruptHandler: mfc0 k1, CP0_BadVAddr sw k1, K_BADVADDR(k0) -if K_DEBUG { - // prevent recursive interrupts if IHMain somehow causes an interrupt - lw t1, K_IN_MAIN(k0) - bnez t1, IHExit +// lw t1, K_IN_MAIN(k0) +// bnez t1, IHExit // TODO: reimplement properly lli t0, 1 sw t0, K_IN_MAIN(k0) @@ -269,6 +303,8 @@ if K_DEBUG { ori sp, k0, K_STACK IHMain: // free to modify any GPR from here to IHExit + +if K_DEBUG { KMaybeDumpString(KSNewline) KMaybeDumpString(KSHandling) @@ -306,6 +342,7 @@ IHMain: // free to modify any GPR from here to IHExit KMaybeDumpString(KSNewline) KMaybeDumpString(KSCode) +} // switch-case on the cause code: // conveniently, the ExcCode in Cause is already shifted left by 2. @@ -317,12 +354,11 @@ IHMain: // free to modify any GPR from here to IHExit jr t4 nop KCodeDone: + KMaybeDumpString(KSNewline) IHExit: sw r0, K_IN_MAIN(k0) -} - lui k0, K_BASE ld t0, K_DUMP+0x100(k0) ld t1, K_DUMP+0x108(k0) @@ -360,7 +396,8 @@ IHExit: ld ra, K_DUMP+0xF8(k0) lw k1, K_CAUSE(k0) - xori k1, k1, 13 << 2 // check if this was a trap exception + andi k1, k1, CP0_CAUSE_CODE + xori k1, k1, CP0_CODE_TR << 2 // check if this was a trap exception bnez k1, ReturnFromInterrupt mfc0 k0, CP0_EPC @@ -389,7 +426,84 @@ ReturnFromInterrupt: include "debug.asm" -KCode0:; KMaybeDumpString(KSCode0); j KCodeDone; nop +KCode0: + KMaybeDumpString(KSCode0) + + lui a0, MI_BASE + lw t0, MI_INTR(a0) + lw t1, MI_INTR_MASK(a0) + and s0, t0, t1 // if we don't care about an interrupt, get rid of it + +KMILoop: + beqz s0,+ + + andi t3, s0, MI_INTR_SP // delay slot + bnez t3, KMISP + + andi t4, s0, MI_INTR_SI // delay slot + bnez t4, KMISI + + andi t3, s0, MI_INTR_AI // delay slot + bnez t3, KMIAI + + andi t4, s0, MI_INTR_VI // delay slot + bnez t4, KMIVI + + andi t3, s0, MI_INTR_PI // delay slot + bnez t3, KMIPI + + andi t4, s0, MI_INTR_DP // delay slot + bnez t4, KMIDP + nop ++ + + // can't do this apparently: +// lui a0, MI_BASE +// sw s0, MI_INTR(a0) + j KCodeDone + nop + +// FIXME: camelcase sucks +KMISP: + KMaybeDumpString(KSMISP) + + lui a1, SP_BASE + lw t1, SP_STATUS(a1) + + andi t2, t1, RSP_BRK + beqz t2,+ + li t0, CLR_SG3 | CLR_INT // delay slot + sw t0, SP_STATUS(a1) ++ + + j KMILoop + andi s0, ~MI_INTR_SP + +KMISI: + KMaybeDumpString(KSMISI) + j KMILoop + andi s0, ~MI_INTR_SI + +KMIAI: + KMaybeDumpString(KSMIAI) + j KMILoop + andi s0, ~MI_INTR_AI + +KMIVI: + KMaybeDumpString(KSMIVI) + j KMILoop + andi s0, ~MI_INTR_VI + +KMIPI: + KMaybeDumpString(KSMIPI) + j KMILoop + andi s0, ~MI_INTR_PI + +KMIDP: + KMaybeDumpString(KSMIDP) + j KMILoop + andi s0, ~MI_INTR_DP + KCode1:; KMaybeDumpString(KSCode1); j KCodeDone; nop KCode2:; KMaybeDumpString(KSCode2); j KCodeDone; nop KCode3:; KMaybeDumpString(KSCode3); j KCodeDone; nop @@ -432,6 +546,7 @@ dw KCode20, KCode21, KCode22, KCode23 dw KCode24, KCode25, KCode26, KCode27 dw KCode28, KCode29, KCode30, KCode31 +if K_DEBUG { KS(KSNewline, 10) KSL(KSConsoleConfirmed, "USB debug console detected") KSL(KSHandling, " ~~ Handling Interrupt ~~") @@ -471,5 +586,13 @@ KSL(KSCode29, "RESERVED 29") KSL(KSCode30, "RESERVED 30") KSL(KSCode31, "RESERVED 31") +KSL(KSMISP, " Signal Processor Interrupt") +KSL(KSMISI, " Serial Interface Interrupt") +KSL(KSMIAI, " Audio Interface Interrupt") +KSL(KSMIVI, " Video Interface Interrupt") +KSL(KSMIPI, " Peripheral Interface Interrupt") +KSL(KSMIDP, " Display Processor Interrupt") +} + align(4) nops((K_BASE << 16) + 0x10000) diff --git a/main.asm b/main.asm index fe68512..d5941d4 100644 --- a/main.asm +++ b/main.asm @@ -38,10 +38,10 @@ Main: la a0, LZ_BAKU + 4 lw a3, -4(a0) // load uncompressed size from the file itself li a1, LZ_BAKU.size - 4 - li a2, VIDEO_BUFFER | 0x80000000 + li a2, VIDEO_C_BUFFER jal LzDecomp nop - // TODO: flush cache on video buffer + // TODO: flush cache on color buffer mfc0 t0, CP0_Count nop; nop; nop; nop @@ -51,6 +51,8 @@ Main: subu t1, t0, t1 sw t1, BLAH_COUNTS+0xC(s0) + // FIXME: this is triggering a PI interrupt somehow, + // which is causing the IH debug output to be repeated instead! lui a0, BLAH_BASE lli a1, 0x20 ori a2, a0, BLAH_XXD @@ -71,13 +73,137 @@ InitVideo: mfc0 t0, CP0_Count sw t0, BLAH_COUNTS+0xC(s0) +TestRDP: +if 0 { + // take a peek at the stuff at the Task data we wrote + lui a0, BLAH_BASE + ori a0, a0, BLAH_SP_TASK + lli a1, 0x80 + ori a2, a0, BLAH_XXD + jal DumpAndWrite + lli a3, 0x80 * 4 +} + + // write the jump to our actual instructions + lui a0, BLAH_BASE + lui t0, 0xDE01 // jump (no push) + sw t0, BLAH_DLIST_JUMPER+0(a0) + ori t1, a0, BLAH_DLIST + sw t1, BLAH_DLIST_JUMPER+4(a0) + +define dpos(BLAH_DLIST) +macro WriteDL(evaluate L, evaluate R) { + lui t0, ({L} >> 16) & 0xFFFF + lui t1, ({R} >> 16) & 0xFFFF + ori t0, {L} & 0xFFFF + ori t1, {R} & 0xFFFF + sw t0, {dpos}+0(a0) + sw t1, {dpos}+4(a0) +global evaluate dpos({dpos}+8) +if {dpos} >= 0x8000 { + error "much too much" + // FIXME: just add dpos to a0 and set dpos to 0 when this happens +} +} + + // write some F3DZEX instructions + +{ + // G_RDPPIPESYNC + WriteDL(0xE7000000, 0) + + // G_TEXTURE (disable tile descriptor; dummy second argument) + WriteDL(0xD7000000, 0xFFFFFFFF) + + // G_SETCOMBINE (too complicated to explain here...) + WriteDL(0xFCFFFFFF, 0xFFFE793C) + + // G_RDPSETOTHERMODE (set higher flags, clear all lower flags) + // 0011 1000 0010 1100 0011 0000 + // G_AD_DISABLE | G_CD_MAGICSQ | G_TC_FILT | G_TF_BILERP | + // G_TT_NONE | G_TL_TILE | G_TD_CLAMP | G_MDSFT_TEXTPERSP | + // G_CYC_FILL | G_PM_NPRIMITIVE + WriteDL(0xEF382C30, 0x00000000) + + // G_GEOMETRYMODE + // set some bits (TODO: which?), clear none + WriteDL(0xD9000000, 0x00220405) + + // G_SETSCISSOR coordinate order: (top, left), (right, bottom) + WriteDL(0xED000000 | (0 << 14) | (0 << 2), (320 << 14) | (240 << 2)) + + // G_SETBLENDCOLOR + // sets alpha component to 8, everything else to 0 + WriteDL(0xF9000000, 0x00000008) + + // sets near-far plane clipping? maybe? + // G_MOVEWORD, sets G_MW_CLIP+$0004 + WriteDL(0xDB040004, 2) + // G_MOVEWORD, sets G_MW_CLIP+$000C + WriteDL(0xDB04000C, 2) + // G_MOVEWORD, sets G_MW_CLIP+$0014 + WriteDL(0xDB040014, 0x10000 - 2) + // G_MOVEWORD, sets G_MW_CLIP+$001C + WriteDL(0xDB04001C, 0x10000 - 2) + + // G_ENDDL: absent since we're not jumping to this routine +} + + // G_SETCIMG, set our color buffer (fmt 0, bit size %10, width) + WriteDL(0xFF100000 | (640 - 1), VIDEO_C_BUFFER) + + // G_SETZIMG, set our z buffer (fmt 0, bit size %00, width) + WriteDL(0xFE000000, VIDEO_Z_BUFFER) + + // G_SETFILLCOLOR + WriteDL(0xF7000000, 0xFFFFFFFF) + + // G_FILLRECT coordinate order: (right, bottom), (top, left) + // note that the coordinates are all inclusive! + WriteDL(0xF6000000 | (199 << 14) | (199 << 2), (100 << 14) | (100 << 2)) + + // G_RDPPIPESYNC + WriteDL(0xE7000000, 0) + + // always finish it off by telling RDP to stop! + // G_RDPFULLSYNC, G_ENDDL + WriteDL(0xE9000000, 0); WriteDL(0xDF000000, 0) + + // take a peek at the display list we wrote + lui a0, BLAH_BASE + ori a0, BLAH_DLIST + lli a1, 0x80 + ori a2, a0, BLAH_XXD + jal DumpAndWrite + lli a3, 0x80 * 4 + + // stuff i'm borrowing from zelda: + lui a0, SP_BASE + lli t0, CLR_SG2 | CLR_SG1 | CLR_SG0 | SET_IOB + sw t0, SP_STATUS(a0) + + // NOTE: we should be asserting here that SP_STATUS & 1 != 0 + // set RSP PC to IMEM+$0 + lui a0, SP_PC_BASE + li t0, 0x04001000 + sw t0, SP_PC(a0) + + // tell RSP to run by clearing flags + lui a0, SP_BASE + lli t0, SET_IOB | CLR_STP | CLR_BRK | CLR_HLT + sw t0, SP_STATUS(a0) + nop + + // also one thing i noticed in zelda is they set VI_V_INTR to 2 + // so they get interrupts with scanlines (unlike us who just waits) + MainLoop: // borrowing code from krom for now: WaitScanline(0x1E0) // Wait For Scanline To Reach Vertical Blank WaitScanline(0x1E2) // WaitScanline sets a0 - ori t0, r0, 0x00000800 // Even Field + li t0, 0x00000800 // Even Field sw t0, VI_Y_SCALE(a0) WaitScanline(0x1E0) // Wait For Scanline To Reach Vertical Blank @@ -91,8 +217,8 @@ MainLoop: nop // delay slot SetupScreen: - // NTSC: 640x480, 32BPP, Interlace, Resample Only, DRAM Origin VIDEO_BUFFER - ScreenNTSC(640, 480, BPP32|INTERLACE|AA_MODE_2, VIDEO_BUFFER | UNCACHED) + // NTSC: 640x480, 32BPP, Interlace, Resample Only, DRAM Origin VIDEO_C_BUFFER + ScreenNTSC(640, 480, BPP32|INTERLACE|AA_MODE_2, VIDEO_C_BUFFER | UNCACHED) jr ra nop @@ -101,7 +227,8 @@ LoadRSPBoot: li t3, F3DZEX_BOOT.size subiu t3, t3, 1 // DMA quirk SP_DMA_WAIT() // clobbers t0, t5 - ori t1, t5, 0x1000 +// ori t1, t5, 0x1000 + la t1, 0xA4001000 sw t1, SP_MEM_ADDR(t5) sw t2, SP_DRAM_ADDR(t5) sw t3, SP_RD_LEN(t5) // pull data from RDRAM into DMEM/IMEM @@ -109,13 +236,13 @@ LoadRSPBoot: nop PushVideoTask: - // a0: Task RDRAM Pointer (size: 0x40) + // a0: Task RDRAM Pointer (size: 0x40) (should probably be row-aligned) subiu sp, sp, 0x18 sw ra, 0x10(sp) lli t0, 1 // mode: video lli t1, 4 // flags: ??? - li t2, F3DZEX_BOOT + li t2, F3DZEX_BOOT // does not need masking for some reason li t3, F3DZEX_BOOT.size li t4, F3DZEX_IMEM & ADDR_MASK li t5, F3DZEX_IMEM.size // note: Zelda uses 0x1000 for some reason (0x80 too big). @@ -131,8 +258,8 @@ PushVideoTask: sw t7, 0x1C(a0) li t0, VIDEO_STACK & ADDR_MASK // used for DList calls and returns? li t1, VIDEO_STACK_SIZE - li t2, VIDEO_BUFFER & ADDR_MASK - li t3, (VIDEO_BUFFER & ADDR_MASK) + VIDEO_BUFFER_SIZE // end pointer (not size!) + li t2, VIDEO_SOMETHING & ADDR_MASK + li t3, (VIDEO_SOMETHING & ADDR_MASK) + VIDEO_SOMETHING_SIZE // end pointer (not size!) li t4, ((BLAH_BASE << 16) | BLAH_DLIST_JUMPER) & ADDR_MASK // initial DList lli t5, 8 // size of one jump command li t6, VIDEO_YIELD & ADDR_MASK @@ -145,8 +272,16 @@ PushVideoTask: sw t5, 0x34(a0) sw t6, 0x38(a0) sw t7, 0x3C(a0) - jal PushRSPTask // a0 passthru - nop + + // tell data cache to write itself out + cache 0x19, 0x00(a0) + cache 0x19, 0x10(a0) + cache 0x19, 0x20(a0) + cache 0x19, 0x30(a0) + + li t9, ADDR_MASK + jal PushRSPTask + and a0, a0, t9 lw ra, 0x10(sp) jr ra @@ -155,7 +290,8 @@ PushVideoTask: PushRSPTask: lli t3, 0x40 - 1 // DMA quirk SP_DMA_WAIT() // clobbers t0, t5 - ori t1, t5, 0xFC0 +// ori t1, t5, 0xFC0 + la t1, 0xA4000FC0 sw t1, SP_MEM_ADDR(t5) sw a0, SP_DRAM_ADDR(t5) sw t3, SP_RD_LEN(t5) // pull data from RDRAM into DMEM/IMEM