add cache-testing program and software font rendering

This commit is contained in:
Connor Olding 2018-10-02 05:23:45 +02:00
parent 498f4b7fb9
commit 187b5c4cc2
2 changed files with 354 additions and 0 deletions

245
cachetest.asm Normal file
View file

@ -0,0 +1,245 @@
arch n64.cpu
endian msb
include "inc/util.inc"
include "inc/n64.inc"
include "inc/64drive.inc"
output "cache.z64", create
fill 1052672 // ROM size
origin 0
base 0x80000000
// N64 header:
dw $80371240 // PI_BSB_DOM1
dw $F // Initial Clock Rate
dw Start // Boot Address Offset
dw $1444 // Release Offset
db "CRC1" // CRC1: COMPLEMENT CHECK
db "CRC2" // CRC2: CHECKSUM
dd 0 // unused
db "Cache tests "
// "123456789012345678901234567"
db $00 // Developer ID Code
db $00 // Cartridge ID Code
db 0 // unused
db $00 // Country Code
db 0 // unused
insert "bin/6102.bin"
if origin() != 0x1000 {
error "bad header or bootcode; combined size should be exactly 0x1000"
}
constant K_DEBUG(0)
constant K_BASE(0x8000)
constant K_CONSOLE_AVAILABLE(0x0000)
constant K_CI_BASE(0x0004)
constant K_DUMP(0x0020)
constant MAIN_BASE(0x8001)
constant MAIN_FROM(0x0000)
constant MAIN_TO(0x0080)
constant MAIN_FONT(0x4000)
constant WIDTH(640)
constant HEIGHT(480)
constant DEPTH(2)
constant VIDEO_BUFFER(0x80400000 - WIDTH * HEIGHT * DEPTH)
constant VIDEO_MODE(BPP16 | INTERLACE | AA_MODE_2 | DIVOT_EN | PIXEL_ADV_3 | DITHER_FILTER_EN)
constant INIT_STACK(VIDEO_BUFFER - 0x10) // remember, it grows backwards.
macro AsciiNybble(out, reg) {
sltiu at, {reg}, 0xA
bnez at,+
addiu {out}, {reg}, 0x30 // delay slot
addiu {out}, {reg}, 0x41 - 0xA
+
}
Start:
// initialize the N64 so it doesn't immediately die.
lui a0, PIF_BASE
lli t0, 8
sw t0, PIF_RAM+0x3C(a0)
// no console for this test, just drawing on the screen.
lui a0, K_BASE
sw r0, K_CONSOLE_AVAILABLE(a0)
// set up the stack so we can actually call some functions later.
la sp, INIT_STACK
sw r0, 0x0(sp)
sw r0, 0x4(sp)
sw r0, 0x8(sp)
sw r0, 0xC(sp)
// clear the screen.
la a0, VIDEO_BUFFER
la a1, VIDEO_BUFFER + WIDTH * HEIGHT * DEPTH
-
addiu a0, 4
sw r0, -4(a0)
bne a0, a1,-
// write some dummy data to play with.
li t1, 0xDEADBEEF
li t2, 0xCAFEBABE
li t3, 0xABAD1DEA
li t4, 0x12345678
lui a0, MAIN_BASE
// spaced out a bit just to see what happens.
sw t1, 0x00(a0)
sw r0, 0x04(a0)
sw t2, 0x08(a0)
sw r0, 0x0C(a0)
sw t3, 0x10(a0)
sw r0, 0x14(a0)
sw t4, 0x18(a0)
sw r0, 0x1C(a0)
// let's try a DMA despite not having invalidated the writeback cache.
SP_DMA_WAIT() // clobbers t0,a0
la t5, (SP_MEM_BASE << 16) | SP_DMEM
la t6, ((MAIN_BASE << 16) | MAIN_FROM) & ADDR_MASK
li t7, 0x20 - 1 // DMA transfers always take one less.
lui a0, SP_BASE
sw t5, SP_MEM_ADDR(a0)
sw t6, SP_DRAM_ADDR(a0)
sw t7, SP_RD_LEN(a0) // pull data from RDRAM into DMEM/IMEM
SP_DMA_WAIT() // clobbers t0,a0
// and back out again, in a different spot.
la t6, ((MAIN_BASE << 16) | MAIN_TO) & ADDR_MASK
sw t5, SP_MEM_ADDR(a0)
sw t6, SP_DRAM_ADDR(a0)
sw t7, SP_WR_LEN(a0) // pull data from DMEM/IMEM into RDRAM
SP_DMA_WAIT() // clobbers t0,a0
if 0 {
// load results into registers.
lui a0, MAIN_BASE
lw t1, 0x80(a0)
lw t2, 0x88(a0)
lw t3, 0x90(a0)
lw t4, 0x98(a0)
}
lui a0, MAIN_BASE
jal LoadFont16
ori a0, MAIN_FONT
// show our results on-screen.
lli s0, 64 // s0: X
lli s1, 48 // s1: Y
lli s2, 0x20 / 4 // s2: number of words to draw
lui s3, MAIN_BASE
ori s3, MAIN_TO // s3: start of data to dump
// ori s3, MAIN_FROM
MainHexDumpLoop:
lw s4, 0(s3) // s4: current word being drawn
addiu s0, 8 * 8
lli s5, 8 // s5: inner loop iteration count
MainHexDumpInnerLoop:
andi t0, s4, 0x0F
subiu s0, 8
lui a0, MAIN_BASE
ori a0, MAIN_FONT
AsciiNybble(a1, t0)
sll a2, s1, 16
or a2, s0
la a3, VIDEO_BUFFER
jal DrawChar16
nop
subiu s5, 1
bnez s5, MainHexDumpInnerLoop
srl s4, 4
subiu s2, 1
addiu s1, 12
bnez s2, MainHexDumpLoop
addiu s3, 4
// use our old cache-poking utility for now.
jal PokeDataCache
nop
if 0 {
ScreenNTSC2(WIDTH, HEIGHT, VIDEO_MODE, VIDEO_BUFFER | UNCACHED)
} else {
lui a0, VI_BASE
li t1, VIDEO_MODE
li t2, VIDEO_BUFFER | UNCACHED
li t3, 0x00000280
li t4, 0 // 0x00000200
li t5, 0x00000000
li t6, 0x03E52239
li t7, 0x0000020C
sw t1, 4 * 0(a0)
sw t2, 4 * 1(a0)
sw t3, 4 * 2(a0)
sw t4, 4 * 3(a0)
sw t5, 4 * 4(a0)
sw t6, 4 * 5(a0)
sw t7, 4 * 6(a0)
li t1, 0x00000C15
li t2, 0x0C150C15
li t3, 0x006C02EC
li t4, 0x002301FD
li t5, 0x000E0204
li t6, 0x00000400
li t7, 0x02000800
sw t1, 4 * 7(a0)
sw t2, 4 * 8(a0)
sw t3, 4 * 9(a0)
sw t4, 4 * 10(a0)
sw t5, 4 * 11(a0)
sw t6, 4 * 12(a0)
sw t7, 4 * 13(a0)
}
VideoLoop:
WaitScanline(2)
j VideoLoop
nop
Die:
j Die
nop
PokeDataCache:
lui a0, 0x8000
ori a1, a0, 8 * 1024 // cache size
-
cache 1, 0x00(a0)
cache 1, 0x10(a0)
cache 1, 0x20(a0)
cache 1, 0x30(a0)
cache 1, 0x40(a0)
cache 1, 0x50(a0)
cache 1, 0x60(a0)
cache 1, 0x70(a0)
addiu a0, 0x80
bne a0, a1,-
nop
jr ra
nop
include "font.asm"

109
font.asm Normal file
View file

@ -0,0 +1,109 @@
constant FONT_SIZE16(8 * 12 * 2 * 256) // 0xC000
LoadFont16:
// loads a 256-character, 8x12 font
// as an RGB5A1 (16-bpp) image to the specified address.
// a0: address to load font to (size: 0xC000)
li t9, FONT_SIZE16
addu a1, a0, t9 // a1: end of output (exclusive)
la a2, FONT // a2: start of input
la a3, FONT + FONT.size // a3: end of input (exclusive)
LoadFont16Loop:
lbu t9, 0(a2)
addiu a2, 1
// sign-extend every pixel to get our blacks and whites.
sll t1, t9, 24
sll t2, t9, 25
sll t3, t9, 26
sll t4, t9, 27
sll t5, t9, 28
sll t6, t9, 29
sll t7, t9, 30
sll t8, t9, 31
//
sra t1, 31
sra t2, 31
sra t3, 31
sra t4, 31
sra t5, 31
sra t6, 31
sra t7, 31
sra t8, 31
sh t1, 0x0(a0)
sh t2, 0x2(a0)
sh t3, 0x4(a0)
sh t4, 0x6(a0)
sh t5, 0x8(a0)
sh t6, 0xA(a0)
sh t7, 0xC(a0)
sh t8, 0xE(a0)
bne a2, a3, LoadFont16Loop
addiu a0, 0x10
jr ra
nop
DrawChar16:
// draws a 16-bpp character on-screen at the specified coordinates.
// a0: font data address (same argument as LoadFont16)
// a1: character (range: 0 to 255 inclusive)
// a2: X, Y coordinate in pixels: X | Y << 16
// a3: output image address
lli t9, 8 * 12 * 2
multu a1, t9
mflo t9
addu a0, t9 // a0: character data address
andi a1, a2, 0xFFFF // a1: X
srl a2, 16 // a2: Y
sll t0, a1, 1
lli t9, WIDTH * 2
multu a2, t9
mflo t9
addu a3, t0 // offset output by X
addu a3, t9 // offset output by Y
lli t9, 12 // t9: rows remaining (character height)
DrawChar16Loop:
// character width hardcoded for 8.
lhu t1, 0x0(a0)
lhu t2, 0x2(a0)
lhu t3, 0x4(a0)
lhu t4, 0x6(a0)
lhu t5, 0x8(a0)
lhu t6, 0xA(a0)
lhu t7, 0xC(a0)
lhu t8, 0xE(a0)
sh t1, 0x0(a3)
sh t2, 0x2(a3)
sh t3, 0x4(a3)
sh t4, 0x6(a3)
sh t5, 0x8(a3)
sh t6, 0xA(a3)
sh t7, 0xC(a3)
sh t8, 0xE(a3)
addiu a0, 0x10
subiu t9, 1
bnez t9, DrawChar16Loop
addiu a3, WIDTH * 2
jr ra
nop
FontLUT16:
dh 0x0000, 0xFFFF
FontLUT32:
dw 0x00000000, 0xFFFFFFFF
align(16); insert FONT, "res/dwarf.1bpp"