major work towards getting F3DZEX working

This commit is contained in:
Connor Olding 2018-08-20 02:13:11 +02:00
parent 6d56e1fe42
commit a612461e87
5 changed files with 359 additions and 31 deletions

View File

@ -29,19 +29,23 @@ constant K_INT_CACHE_ERROR(3)
constant K_INT_OTHER(4)
macro KDumpString(name) {
if K_DEBUG {
// does not include error/console-checking!
// note: this first instruction must be okay to be in a delay slot.
la a2, {name}
jal Drive64WriteDirect
lli a3, {name}X - {name}
}
}
macro KMaybeDumpString(str) {
if K_DEBUG {
lw t1, K_CONSOLE_AVAILABLE(k0)
beqz t1,+
KDumpString({str})
+
}
}
macro KS(name, str) {
align(16)
@ -58,4 +62,3 @@ macro KSL(name, str) {
align(16)
{name}X:
}

View File

@ -10,10 +10,15 @@ constant BLAH_COUNTS(0x0010)
constant BLAH_SP_TASK(0x0040)
constant BLAH_DLIST_JUMPER(0x0080)
constant BLAH_XXD(0x0100)
constant BLAH_DLIST(0x1000)
constant VIDEO_BUFFER(0x80100000)
constant VIDEO_BUFFER_SIZE(640 * 480 * 4)
constant VIDEO_STACK(VIDEO_BUFFER + VIDEO_BUFFER_SIZE)
constant VIDEO_C_BUFFER(0x80100000)
constant VIDEO_C_BUFFER_SIZE(640 * 480 * 4)
constant VIDEO_Z_BUFFER(VIDEO_C_BUFFER + VIDEO_C_BUFFER_SIZE)
constant VIDEO_Z_BUFFER_SIZE(640 * 480 * 2)
constant VIDEO_SOMETHING(VIDEO_Z_BUFFER + VIDEO_Z_BUFFER_SIZE)
constant VIDEO_SOMETHING_SIZE(0x18000)
constant VIDEO_STACK(VIDEO_SOMETHING + VIDEO_SOMETHING_SIZE)
constant VIDEO_STACK_SIZE(0x400)
constant VIDEO_YIELD(VIDEO_STACK + VIDEO_STACK_SIZE)
constant VIDEO_YIELD_SIZE(0xC00)
@ -30,10 +35,10 @@ macro PI_WAIT() {
lw t0, PI_STATUS(t5)
andi t0, t0, 3
bnez t0,-
nop // delay slot
nop
}
macro SP_DMA_WAIT() { // external
macro SP_DMA_WAIT() { // from CPU
lui t5, SP_BASE
-
lw t0, SP_DMA_FULL(t5)

View File

@ -234,6 +234,50 @@ constant SP_PC_BASE($A408) // $04080000..$04080007 SP PC Base Register
constant SP_PC($00) // $04080000..$04080003 SP: PC Register
constant SP_IBIST_REG($04) // $04080004..$04080007 SP: IMEM BIST Register
// SP_STATUS Read Flags:
constant RSP_HLT($0001) // Halt
constant RSP_BRK($0002) // Break
constant RSP_BSY($0004) // DMA Busy
constant RSP_FUL($0008) // DMA Full
constant RSP_IOF($0010) // IO Full
constant RSP_STP($0020) // Single Step
constant RSP_IOB($0040) // Interrupt On Break
constant RSP_SG0($0080) // Signal 0 Set
constant RSP_SG1($0100) // Signal 1 Set
constant RSP_SG2($0200) // Signal 2 Set
constant RSP_SG3($0400) // Signal 3 Set
constant RSP_SG4($0800) // Signal 4 Set
constant RSP_SG5($1000) // Signal 5 Set
constant RSP_SG6($2000) // Signal 6 Set
constant RSP_SG7($4000) // Signal 7 Set
// SP_STATUS Write Flags:
constant CLR_HLT($00000001) // Clear Halt
constant SET_HLT($00000002) // Set Halt
constant CLR_BRK($00000004) // Clear Broke
constant CLR_INT($00000008) // Clear Interrupt
constant SET_INT($00000010) // Set Interrupt
constant CLR_STP($00000020) // Clear Single Step
constant SET_STP($00000040) // Set Single Step
constant CLR_IOB($00000080) // Clear Interrupt On Break
constant SET_IOB($00000100) // Set Interrupt On Break
constant CLR_SG0($00000200) // Clear Signal 0
constant SET_SG0($00000400) // Set Signal 0
constant CLR_SG1($00000800) // Clear Signal 1
constant SET_SG1($00001000) // Set Signal 1
constant CLR_SG2($00002000) // Clear Signal 2
constant SET_SG2($00004000) // Set Signal 2
constant CLR_SG3($00008000) // Clear Signal 3
constant SET_SG3($00010000) // Set Signal 3
constant CLR_SG4($00020000) // Clear Signal 4
constant SET_SG4($00040000) // Set Signal 4
constant CLR_SG5($00080000) // Clear Signal 5
constant SET_SG5($00100000) // Set Signal 5
constant CLR_SG6($00200000) // Clear Signal 6
constant SET_SG6($00400000) // Set Signal 6
constant CLR_SG7($00800000) // Clear Signal 7
constant SET_SG7($01000000) // Set Signal 7
constant DPC_BASE($A410) // $04100000..$0410001F DP Command (DPC) Base Register
constant DPC_START($00) // $04100000..$04100003 DPC: CMD DMA Start Register
constant DPC_END($04) // $04100004..$04100007 DPC: CMD DMA End Register
@ -256,6 +300,23 @@ constant MI_VERSION($04) // $04300004..$04300007 MI: Version Register
constant MI_INTR($08) // $04300008..$0430000B MI: Interrupt Register
constant MI_INTR_MASK($0C) // $0430000C..$0430000F MI: Interrupt Mask Register
constant MI_INTR_SP($01)
constant MI_INTR_SI($02)
constant MI_INTR_AI($04)
constant MI_INTR_VI($08)
constant MI_INTR_PI($10)
constant MI_INTR_DP($20)
constant MI_INTR_ALL($3F)
// TODO: SET and CLR rather than just MASK
constant MI_INTR_MASK_SP($002)
constant MI_INTR_MASK_SI($008)
constant MI_INTR_MASK_AI($020)
constant MI_INTR_MASK_VI($080)
constant MI_INTR_MASK_PI($200)
constant MI_INTR_MASK_DP($800)
constant MI_INTR_MASK_ALL($AAA)
constant VI_BASE($A440) // $04400000..$04400037 Video Interface (VI) Base Register
constant VI_STATUS($00) // $04400000..$04400003 VI: Status/Control Register
constant VI_ORIGIN($04) // $04400004..$04400007 VI: Origin Register

View File

@ -35,12 +35,34 @@ Start:
// enable even more interrupts.
lui t2, MI_BASE
ori t2, t2, MI_INTR_MASK
lli t0, 0xAAA // LSB to MSB: SP, SI, AI, VI, PI, DP
// by the way, use 0x555 to disable
// lli t0, MI_INTR_MASK_ALL
// i don't have code to handle all the interrupts. in the meantime...
lli t0, MI_INTR_MASK_SP
sw t0, 0(t2)
// it looks like i should be initializing PI_BSD_DOM1_* from
// the ROM header at this point, but i don't know what even does does.
// set BSD DOM1 stuff, whatever that is.
lui v1, CART_DOM1_ADDR2
lw v0, 0(v1)
srl t8, v0, 16
srl t4, v0, 20
andi t9, t8, 0xF // t9=$07
andi t5, t4, 0xF // t5=$03
srl t7, v0, 8
//
andi t7, 0xFF // t7=$12
andi v0, 0xFF // v0=$40
// wait for PI
lui t2, PI_BASE
-
lw t0, PI_STATUS(t2)
andi t0, t0, 3
bnez t0,-
nop
//
sw v0, PI_BSD_DOM1_LAT(t2) // $40
sw t9, PI_BSD_DOM1_PGS(t2) // $07
sw t5, PI_BSD_DOM1_RLS(t2) // $03
sw t7, PI_BSD_DOM1_PWD(t2) // $12
// SP defaults to RSP instruction memory: 0xA4001FF0
// we can do better than that.
@ -95,6 +117,20 @@ Drive64CheckConsole:
Drive64Done:
// zero out RDRAM from 1 MiB to 4 MiB
// NOTE: this might overwrite the last 4 KiB of ROM that's loaded by 6102?
li t0, 0x80100000
li t1, 0x80400000
-
define x(0)
while {x} < 0x100 {
sd r0, {x}(t0)
evaluate x({x} + 8)
}
addiu t0, 0x100
bne t0, t1,-
nop
// delay to empty pipeline
nop
nop
@ -257,11 +293,9 @@ InterruptHandler:
mfc0 k1, CP0_BadVAddr
sw k1, K_BADVADDR(k0)
if K_DEBUG {
// prevent recursive interrupts if IHMain somehow causes an interrupt
lw t1, K_IN_MAIN(k0)
bnez t1, IHExit
// lw t1, K_IN_MAIN(k0)
// bnez t1, IHExit // TODO: reimplement properly
lli t0, 1
sw t0, K_IN_MAIN(k0)
@ -269,6 +303,8 @@ if K_DEBUG {
ori sp, k0, K_STACK
IHMain: // free to modify any GPR from here to IHExit
if K_DEBUG {
KMaybeDumpString(KSNewline)
KMaybeDumpString(KSHandling)
@ -306,6 +342,7 @@ IHMain: // free to modify any GPR from here to IHExit
KMaybeDumpString(KSNewline)
KMaybeDumpString(KSCode)
}
// switch-case on the cause code:
// conveniently, the ExcCode in Cause is already shifted left by 2.
@ -317,12 +354,11 @@ IHMain: // free to modify any GPR from here to IHExit
jr t4
nop
KCodeDone:
KMaybeDumpString(KSNewline)
IHExit:
sw r0, K_IN_MAIN(k0)
}
lui k0, K_BASE
ld t0, K_DUMP+0x100(k0)
ld t1, K_DUMP+0x108(k0)
@ -360,7 +396,8 @@ IHExit:
ld ra, K_DUMP+0xF8(k0)
lw k1, K_CAUSE(k0)
xori k1, k1, 13 << 2 // check if this was a trap exception
andi k1, k1, CP0_CAUSE_CODE
xori k1, k1, CP0_CODE_TR << 2 // check if this was a trap exception
bnez k1, ReturnFromInterrupt
mfc0 k0, CP0_EPC
@ -389,7 +426,84 @@ ReturnFromInterrupt:
include "debug.asm"
KCode0:; KMaybeDumpString(KSCode0); j KCodeDone; nop
KCode0:
KMaybeDumpString(KSCode0)
lui a0, MI_BASE
lw t0, MI_INTR(a0)
lw t1, MI_INTR_MASK(a0)
and s0, t0, t1 // if we don't care about an interrupt, get rid of it
KMILoop:
beqz s0,+
andi t3, s0, MI_INTR_SP // delay slot
bnez t3, KMISP
andi t4, s0, MI_INTR_SI // delay slot
bnez t4, KMISI
andi t3, s0, MI_INTR_AI // delay slot
bnez t3, KMIAI
andi t4, s0, MI_INTR_VI // delay slot
bnez t4, KMIVI
andi t3, s0, MI_INTR_PI // delay slot
bnez t3, KMIPI
andi t4, s0, MI_INTR_DP // delay slot
bnez t4, KMIDP
nop
+
// can't do this apparently:
// lui a0, MI_BASE
// sw s0, MI_INTR(a0)
j KCodeDone
nop
// FIXME: camelcase sucks
KMISP:
KMaybeDumpString(KSMISP)
lui a1, SP_BASE
lw t1, SP_STATUS(a1)
andi t2, t1, RSP_BRK
beqz t2,+
li t0, CLR_SG3 | CLR_INT // delay slot
sw t0, SP_STATUS(a1)
+
j KMILoop
andi s0, ~MI_INTR_SP
KMISI:
KMaybeDumpString(KSMISI)
j KMILoop
andi s0, ~MI_INTR_SI
KMIAI:
KMaybeDumpString(KSMIAI)
j KMILoop
andi s0, ~MI_INTR_AI
KMIVI:
KMaybeDumpString(KSMIVI)
j KMILoop
andi s0, ~MI_INTR_VI
KMIPI:
KMaybeDumpString(KSMIPI)
j KMILoop
andi s0, ~MI_INTR_PI
KMIDP:
KMaybeDumpString(KSMIDP)
j KMILoop
andi s0, ~MI_INTR_DP
KCode1:; KMaybeDumpString(KSCode1); j KCodeDone; nop
KCode2:; KMaybeDumpString(KSCode2); j KCodeDone; nop
KCode3:; KMaybeDumpString(KSCode3); j KCodeDone; nop
@ -432,6 +546,7 @@ dw KCode20, KCode21, KCode22, KCode23
dw KCode24, KCode25, KCode26, KCode27
dw KCode28, KCode29, KCode30, KCode31
if K_DEBUG {
KS(KSNewline, 10)
KSL(KSConsoleConfirmed, "USB debug console detected")
KSL(KSHandling, " ~~ Handling Interrupt ~~")
@ -471,5 +586,13 @@ KSL(KSCode29, "RESERVED 29")
KSL(KSCode30, "RESERVED 30")
KSL(KSCode31, "RESERVED 31")
KSL(KSMISP, " Signal Processor Interrupt")
KSL(KSMISI, " Serial Interface Interrupt")
KSL(KSMIAI, " Audio Interface Interrupt")
KSL(KSMIVI, " Video Interface Interrupt")
KSL(KSMIPI, " Peripheral Interface Interrupt")
KSL(KSMIDP, " Display Processor Interrupt")
}
align(4)
nops((K_BASE << 16) + 0x10000)

162
main.asm
View File

@ -38,10 +38,10 @@ Main:
la a0, LZ_BAKU + 4
lw a3, -4(a0) // load uncompressed size from the file itself
li a1, LZ_BAKU.size - 4
li a2, VIDEO_BUFFER | 0x80000000
li a2, VIDEO_C_BUFFER
jal LzDecomp
nop
// TODO: flush cache on video buffer
// TODO: flush cache on color buffer
mfc0 t0, CP0_Count
nop; nop; nop; nop
@ -51,6 +51,8 @@ Main:
subu t1, t0, t1
sw t1, BLAH_COUNTS+0xC(s0)
// FIXME: this is triggering a PI interrupt somehow,
// which is causing the IH debug output to be repeated instead!
lui a0, BLAH_BASE
lli a1, 0x20
ori a2, a0, BLAH_XXD
@ -71,13 +73,137 @@ InitVideo:
mfc0 t0, CP0_Count
sw t0, BLAH_COUNTS+0xC(s0)
TestRDP:
if 0 {
// take a peek at the stuff at the Task data we wrote
lui a0, BLAH_BASE
ori a0, a0, BLAH_SP_TASK
lli a1, 0x80
ori a2, a0, BLAH_XXD
jal DumpAndWrite
lli a3, 0x80 * 4
}
// write the jump to our actual instructions
lui a0, BLAH_BASE
lui t0, 0xDE01 // jump (no push)
sw t0, BLAH_DLIST_JUMPER+0(a0)
ori t1, a0, BLAH_DLIST
sw t1, BLAH_DLIST_JUMPER+4(a0)
define dpos(BLAH_DLIST)
macro WriteDL(evaluate L, evaluate R) {
lui t0, ({L} >> 16) & 0xFFFF
lui t1, ({R} >> 16) & 0xFFFF
ori t0, {L} & 0xFFFF
ori t1, {R} & 0xFFFF
sw t0, {dpos}+0(a0)
sw t1, {dpos}+4(a0)
global evaluate dpos({dpos}+8)
if {dpos} >= 0x8000 {
error "much too much"
// FIXME: just add dpos to a0 and set dpos to 0 when this happens
}
}
// write some F3DZEX instructions
{
// G_RDPPIPESYNC
WriteDL(0xE7000000, 0)
// G_TEXTURE (disable tile descriptor; dummy second argument)
WriteDL(0xD7000000, 0xFFFFFFFF)
// G_SETCOMBINE (too complicated to explain here...)
WriteDL(0xFCFFFFFF, 0xFFFE793C)
// G_RDPSETOTHERMODE (set higher flags, clear all lower flags)
// 0011 1000 0010 1100 0011 0000
// G_AD_DISABLE | G_CD_MAGICSQ | G_TC_FILT | G_TF_BILERP |
// G_TT_NONE | G_TL_TILE | G_TD_CLAMP | G_MDSFT_TEXTPERSP |
// G_CYC_FILL | G_PM_NPRIMITIVE
WriteDL(0xEF382C30, 0x00000000)
// G_GEOMETRYMODE
// set some bits (TODO: which?), clear none
WriteDL(0xD9000000, 0x00220405)
// G_SETSCISSOR coordinate order: (top, left), (right, bottom)
WriteDL(0xED000000 | (0 << 14) | (0 << 2), (320 << 14) | (240 << 2))
// G_SETBLENDCOLOR
// sets alpha component to 8, everything else to 0
WriteDL(0xF9000000, 0x00000008)
// sets near-far plane clipping? maybe?
// G_MOVEWORD, sets G_MW_CLIP+$0004
WriteDL(0xDB040004, 2)
// G_MOVEWORD, sets G_MW_CLIP+$000C
WriteDL(0xDB04000C, 2)
// G_MOVEWORD, sets G_MW_CLIP+$0014
WriteDL(0xDB040014, 0x10000 - 2)
// G_MOVEWORD, sets G_MW_CLIP+$001C
WriteDL(0xDB04001C, 0x10000 - 2)
// G_ENDDL: absent since we're not jumping to this routine
}
// G_SETCIMG, set our color buffer (fmt 0, bit size %10, width)
WriteDL(0xFF100000 | (640 - 1), VIDEO_C_BUFFER)
// G_SETZIMG, set our z buffer (fmt 0, bit size %00, width)
WriteDL(0xFE000000, VIDEO_Z_BUFFER)
// G_SETFILLCOLOR
WriteDL(0xF7000000, 0xFFFFFFFF)
// G_FILLRECT coordinate order: (right, bottom), (top, left)
// note that the coordinates are all inclusive!
WriteDL(0xF6000000 | (199 << 14) | (199 << 2), (100 << 14) | (100 << 2))
// G_RDPPIPESYNC
WriteDL(0xE7000000, 0)
// always finish it off by telling RDP to stop!
// G_RDPFULLSYNC, G_ENDDL
WriteDL(0xE9000000, 0); WriteDL(0xDF000000, 0)
// take a peek at the display list we wrote
lui a0, BLAH_BASE
ori a0, BLAH_DLIST
lli a1, 0x80
ori a2, a0, BLAH_XXD
jal DumpAndWrite
lli a3, 0x80 * 4
// stuff i'm borrowing from zelda:
lui a0, SP_BASE
lli t0, CLR_SG2 | CLR_SG1 | CLR_SG0 | SET_IOB
sw t0, SP_STATUS(a0)
// NOTE: we should be asserting here that SP_STATUS & 1 != 0
// set RSP PC to IMEM+$0
lui a0, SP_PC_BASE
li t0, 0x04001000
sw t0, SP_PC(a0)
// tell RSP to run by clearing flags
lui a0, SP_BASE
lli t0, SET_IOB | CLR_STP | CLR_BRK | CLR_HLT
sw t0, SP_STATUS(a0)
nop
// also one thing i noticed in zelda is they set VI_V_INTR to 2
// so they get interrupts with scanlines (unlike us who just waits)
MainLoop:
// borrowing code from krom for now:
WaitScanline(0x1E0) // Wait For Scanline To Reach Vertical Blank
WaitScanline(0x1E2)
// WaitScanline sets a0
ori t0, r0, 0x00000800 // Even Field
li t0, 0x00000800 // Even Field
sw t0, VI_Y_SCALE(a0)
WaitScanline(0x1E0) // Wait For Scanline To Reach Vertical Blank
@ -91,8 +217,8 @@ MainLoop:
nop // delay slot
SetupScreen:
// NTSC: 640x480, 32BPP, Interlace, Resample Only, DRAM Origin VIDEO_BUFFER
ScreenNTSC(640, 480, BPP32|INTERLACE|AA_MODE_2, VIDEO_BUFFER | UNCACHED)
// NTSC: 640x480, 32BPP, Interlace, Resample Only, DRAM Origin VIDEO_C_BUFFER
ScreenNTSC(640, 480, BPP32|INTERLACE|AA_MODE_2, VIDEO_C_BUFFER | UNCACHED)
jr ra
nop
@ -101,7 +227,8 @@ LoadRSPBoot:
li t3, F3DZEX_BOOT.size
subiu t3, t3, 1 // DMA quirk
SP_DMA_WAIT() // clobbers t0, t5
ori t1, t5, 0x1000
// ori t1, t5, 0x1000
la t1, 0xA4001000
sw t1, SP_MEM_ADDR(t5)
sw t2, SP_DRAM_ADDR(t5)
sw t3, SP_RD_LEN(t5) // pull data from RDRAM into DMEM/IMEM
@ -109,13 +236,13 @@ LoadRSPBoot:
nop
PushVideoTask:
// a0: Task RDRAM Pointer (size: 0x40)
// a0: Task RDRAM Pointer (size: 0x40) (should probably be row-aligned)
subiu sp, sp, 0x18
sw ra, 0x10(sp)
lli t0, 1 // mode: video
lli t1, 4 // flags: ???
li t2, F3DZEX_BOOT
li t2, F3DZEX_BOOT // does not need masking for some reason
li t3, F3DZEX_BOOT.size
li t4, F3DZEX_IMEM & ADDR_MASK
li t5, F3DZEX_IMEM.size // note: Zelda uses 0x1000 for some reason (0x80 too big).
@ -131,8 +258,8 @@ PushVideoTask:
sw t7, 0x1C(a0)
li t0, VIDEO_STACK & ADDR_MASK // used for DList calls and returns?
li t1, VIDEO_STACK_SIZE
li t2, VIDEO_BUFFER & ADDR_MASK
li t3, (VIDEO_BUFFER & ADDR_MASK) + VIDEO_BUFFER_SIZE // end pointer (not size!)
li t2, VIDEO_SOMETHING & ADDR_MASK
li t3, (VIDEO_SOMETHING & ADDR_MASK) + VIDEO_SOMETHING_SIZE // end pointer (not size!)
li t4, ((BLAH_BASE << 16) | BLAH_DLIST_JUMPER) & ADDR_MASK // initial DList
lli t5, 8 // size of one jump command
li t6, VIDEO_YIELD & ADDR_MASK
@ -145,8 +272,16 @@ PushVideoTask:
sw t5, 0x34(a0)
sw t6, 0x38(a0)
sw t7, 0x3C(a0)
jal PushRSPTask // a0 passthru
nop
// tell data cache to write itself out
cache 0x19, 0x00(a0)
cache 0x19, 0x10(a0)
cache 0x19, 0x20(a0)
cache 0x19, 0x30(a0)
li t9, ADDR_MASK
jal PushRSPTask
and a0, a0, t9
lw ra, 0x10(sp)
jr ra
@ -155,7 +290,8 @@ PushVideoTask:
PushRSPTask:
lli t3, 0x40 - 1 // DMA quirk
SP_DMA_WAIT() // clobbers t0, t5
ori t1, t5, 0xFC0
// ori t1, t5, 0xFC0
la t1, 0xA4000FC0
sw t1, SP_MEM_ADDR(t5)
sw a0, SP_DRAM_ADDR(t5)
sw t3, SP_RD_LEN(t5) // pull data from RDRAM into DMEM/IMEM