ref: 1f8985903a326287ee4c5098316a6caa3bdd9467
parent: 38c6960e62514a0ec67bc48661b911999ef513e5
author: Snesrev <[email protected]>
date: Tue Sep 6 12:36:25 EDT 2022
Avoid extra memcpy for drawing
--- a/main.c
+++ b/main.c
@@ -20,7 +20,6 @@
#include "zelda_rtl.h"
#include "config.h"
-extern Ppu *GetPpuForRendering();
extern Dsp *GetDspForRendering();
extern Snes *g_snes;
extern uint8 g_emulated_ram[0x20000];
@@ -125,6 +124,24 @@
kSnesSamplesPerBlock = (534 * kSampleRate) / 32000,
};
+static void RenderScreenWithPerf(uint32 *pixel_buffer) {
+ if (g_display_perf) {
+ static float history[64], average;
+ static int history_pos;
+ uint64 before = SDL_GetPerformanceCounter();
+ ZeldaDrawPpuFrame(pixel_buffer);
+ uint64 after = SDL_GetPerformanceCounter();
+ float v = (double)SDL_GetPerformanceFrequency() / (after - before);
+ average += v - history[history_pos];
+ history[history_pos] = v;
+ history_pos = (history_pos + 1) & 63;
+ g_curr_fps = average * (1.0f / 64);
+ } else {
+ ZeldaDrawPpuFrame(pixel_buffer);
+ }
+}
+
+
#undef main
int main(int argc, char** argv) {
ParseConfigFile();
@@ -258,21 +275,6 @@
if (is_turbo)
continue;
- if (g_display_perf) {
- static float history[64], average;
- static int history_pos;
- uint64 before = SDL_GetPerformanceCounter();
- ZeldaDrawPpuFrame();
- uint64 after = SDL_GetPerformanceCounter();
- float v = (double)SDL_GetPerformanceFrequency() / (after - before);
- average += v - history[history_pos];
- history[history_pos] = v;
- history_pos = (history_pos + 1) & 63;
- g_curr_fps = average * (1.0f / 64);
- } else {
- ZeldaDrawPpuFrame();
- }
-
PlayAudio(snes_run, device, audioBuffer);
RenderScreen(window, renderer, texture, (g_win_flags & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0);
@@ -369,7 +371,7 @@
printf("Failed to lock texture: %s\n", SDL_GetError());
return;
}
- ppu_putPixels(GetPpuForRendering(), (uint8_t*) pixels);
+ RenderScreenWithPerf((uint32 *)pixels);
if (g_display_perf)
RenderNumber((uint32 *)pixels + 512*2, g_curr_fps);
SDL_UnlockTexture(texture);
--- a/snes/ppu.c
+++ b/snes/ppu.c
@@ -22,6 +22,7 @@
static bool ppu_getWindowState(Ppu* ppu, int layer, int x);
static bool ppu_evaluateSprites(Ppu* ppu, int line);
static uint16_t ppu_getVramRemap(Ppu* ppu);
+static void PpuDrawWholeLine(Ppu *ppu, uint y);
Ppu* ppu_init(Snes* snes) {
Ppu* ppu = (Ppu * )malloc(sizeof(Ppu));
@@ -130,7 +131,6 @@
ppu->countersLatched = false;
ppu->ppu1openBus = 0;
ppu->ppu2openBus = 0;
- memset(ppu->pixelBuffer, 0, sizeof(ppu->pixelBuffer));
}
void ppu_saveload(Ppu *ppu, SaveLoadFunc *func, void *ctx) {
@@ -137,30 +137,16 @@
func(ctx, &ppu->vram, offsetof(Ppu, mosaicModulo) - offsetof(Ppu, vram));
}
-void ppu_handleVblank(Ppu* ppu) {
- // called either right after ppu_checkOverscan at (0,225), or at (0,240)
- if(!ppu->forcedBlank) {
- ppu->oamAdr = ppu->oamAdrWritten;
- ppu->oamInHigh = ppu->oamInHighWritten;
- ppu->oamSecondWrite = false;
- }
- ppu->frameInterlace_always_zero = ppu->interlace_always_zero; // set if we have a interlaced frame
+void PpuBeginDrawing(Ppu *ppu, uint32_t *pixels) {
+ ppu->renderBuffer = pixels + 512 * 16;
+
+ // clear top 16 and last 16 lines
+ memset(pixels, 0, 512 * 16 * 4);
+ memset(pixels + (464 * 512), 0, 512 * 16 * 4);
}
-static void PpuDrawWholeLine(Ppu *ppu, uint y);
-
-void ppu_runLine(Ppu* ppu, int line) {
+void ppu_runLine(Ppu *ppu, int line) {
if(line == 0) {
-
- // Ensure all window layer fields are just 0 or 1
- for (int i = 0; i < 6; i++) {
- WindowLayer *wl = &ppu->windowLayer[i];
- wl->window1enabled = (wl->window1enabled != 0);
- wl->window2enabled = (wl->window2enabled != 0);
- wl->window1inversed = (wl->window1inversed != 0);
- wl->window2inversed = (wl->window2inversed != 0);
- }
-
ppu->rangeOver = false;
ppu->timeOver = false;
ppu->evenFrame = !ppu->evenFrame;
@@ -188,6 +174,10 @@
ppu_handlePixel(ppu, x, line);
}
}
+
+ // Duplicate each line
+ uint32 *dst = &ppu->renderBuffer[(line - 1) * 1024];
+ memcpy(dst + 512, dst, 512 * 4);
}
}
@@ -726,12 +716,9 @@
static NOINLINE void PpuDrawWholeLine(Ppu *ppu, uint y) {
if (ppu->forcedBlank) {
- int row = (y - 1) + (ppu->evenFrame ? 0 : 239);
- uint8_t *dst = &ppu->pixelBuffer[row * 2048];
- for (int i = 0; i < 256; i++, dst += 8) {
- dst[1] = dst[5] = 0;
- dst[2] = dst[6] = 0;
- dst[3] = dst[7] = 0;
+ uint32 *dst = &ppu->renderBuffer[(y - 1) * 1024];
+ for (int i = 0; i < 256; i++, dst += 2) {
+ dst[1] = dst[0] = 0;
}
return;
}
@@ -780,8 +767,7 @@
uint32 cw_clip_math = ((cwin.bits & kCwBitsMod[ppu->clipMode]) ^ kCwBitsMod[ppu->clipMode + 4]) |
((cwin.bits & kCwBitsMod[ppu->preventMathMode]) ^ kCwBitsMod[ppu->preventMathMode + 4]) << 8;
- int row = (y - 1) + (ppu->evenFrame ? 0 : 239);
- uint32 *dst = (uint32*)&ppu->pixelBuffer[row * 2048];
+ uint32 *dst = &ppu->renderBuffer[(y - 1) * 1024];
uint32 windex = 0;
do {
@@ -890,13 +876,16 @@
r2 = r; g2 = g; b2 = b;
}
}
- int row = (y - 1) + (ppu->evenFrame ? 0 : 239);
- ppu->pixelBuffer[row * 2048 + x * 8 + 1] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
- ppu->pixelBuffer[row * 2048 + x * 8 + 2] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
- ppu->pixelBuffer[row * 2048 + x * 8 + 3] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
- ppu->pixelBuffer[row * 2048 + x * 8 + 5] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
- ppu->pixelBuffer[row * 2048 + x * 8 + 6] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
- ppu->pixelBuffer[row * 2048 + x * 8 + 7] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
+ int row = y - 1;
+ uint8 *pixelBuffer = (uint8*) &ppu->renderBuffer[row * 1024 + x * 2];
+ pixelBuffer[0] = 0;
+ pixelBuffer[1] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
+ pixelBuffer[2] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
+ pixelBuffer[3] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
+ pixelBuffer[4] = 0;
+ pixelBuffer[5] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
+ pixelBuffer[6] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
+ pixelBuffer[7] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
}
static const int bitDepthsPerMode[10][4] = {
@@ -1636,14 +1625,3 @@
}
}
-void ppu_putPixels(Ppu* ppu, uint8_t* pixels) {
- for(int y = 0; y < 224; y++) {
- int dest = y * 2 + 16;
- int y1 = y + (ppu->evenFrame ? 0 : 239);
- memcpy(pixels + (dest * 2048), &ppu->pixelBuffer[y1 * 2048], 2048);
- memcpy(pixels + ((dest + 1) * 2048), &ppu->pixelBuffer[y1 * 2048], 2048);
- }
- // clear top 16 and last 16 lines
- memset(pixels, 0, 2048 * 16);
- memset(pixels + (464 * 2048), 0, 2048 * 16);
-}
--- a/snes/ppu.h
+++ b/snes/ppu.h
@@ -46,6 +46,7 @@
bool lineHasSprites;
uint8_t lastBrightnessMult;
uint8_t lastMosaicModulo;
+ uint32_t *renderBuffer;
Snes* snes;
// store 31 extra entries to remove the need for clamp
uint8_t brightnessMult[32 + 31];
@@ -138,10 +139,6 @@
uint8_t ppu2openBus;
uint8_t mosaicModulo[256];
-
- // pixel buffer (xbgr)
- // times 2 for even and odd frame
- uint8_t pixelBuffer[512 * 4 * 239 * 2];
};
Ppu* ppu_init(Snes* snes);
@@ -151,7 +148,7 @@
void ppu_runLine(Ppu* ppu, int line);
uint8_t ppu_read(Ppu* ppu, uint8_t adr);
void ppu_write(Ppu* ppu, uint8_t adr, uint8_t val);
-void ppu_putPixels(Ppu* ppu, uint8_t* pixels);
void ppu_saveload(Ppu *ppu, SaveLoadFunc *func, void *ctx);
+void PpuBeginDrawing(Ppu *ppu, uint32_t *buffer);
#endif
--- a/snes/snes.h
+++ b/snes/snes.h
@@ -83,9 +83,6 @@
// snes_other.c functions:
bool snes_loadRom(Snes* snes, uint8_t* data, int length);
-void snes_setButtonState(Snes* snes, int player, int button, bool pressed);
-void snes_setPixels(Snes* snes, uint8_t* pixelData);
-void snes_setSamples(Snes* snes, int16_t* sampleData, int samplesPerFrame);
void snes_saveload(Snes *snes, SaveLoadFunc *func, void *ctx);
enum {
--- a/snes/snes_other.c
+++ b/snes/snes_other.c
@@ -104,34 +104,6 @@
return true;
}
-void snes_setButtonState(Snes* snes, int player, int button, bool pressed) {
- // set key in constroller
- if(player == 1) {
- if(pressed) {
- snes->input1->currentState |= 1 << button;
- } else {
- snes->input1->currentState &= ~(1 << button);
- }
- } else {
- if(pressed) {
- snes->input2->currentState |= 1 << button;
- } else {
- snes->input2->currentState &= ~(1 << button);
- }
- }
-}
-
-void snes_setPixels(Snes* snes, uint8_t* pixelData) {
- // size is 4 (rgba) * 512 (w) * 480 (h)
- ppu_putPixels(snes->ppu, pixelData);
-}
-
-void snes_setSamples(Snes* snes, int16_t* sampleData, int samplesPerFrame) {
- // size is 2 (int16) * 2 (stereo) * samplesPerFrame
- // sets samples in the sampleData
- dsp_getSamples(snes->apu->dsp, sampleData, samplesPerFrame);
-}
-
static void readHeader(uint8_t* data, int location, CartHeader* header) {
// read name, TODO: non-ASCII names?
for(int i = 0; i < 21; i++) {
--- a/zelda_cpu_infra.c
+++ b/zelda_cpu_infra.c
@@ -310,10 +310,6 @@
RunOrigAsmCodeOneLoop(snes);
}
-struct Ppu *GetPpuForRendering() {
- return g_zenv.ppu;
-}
-
Dsp *GetDspForRendering() {
SpcPlayer_GenerateSamples(g_zenv.player);
return g_zenv.player->dsp;
--- a/zelda_rtl.c
+++ b/zelda_rtl.c
@@ -161,8 +161,10 @@
c->rep_count--;
}
-void ZeldaDrawPpuFrame() {
+void ZeldaDrawPpuFrame(uint32 *pixel_buffer) {
SimpleHdma hdma_chans[2];
+
+ PpuBeginDrawing(g_zenv.ppu, pixel_buffer);
dma_startDma(g_zenv.dma, HDMAEN_copy, true);
--- a/zelda_rtl.h
+++ b/zelda_rtl.h
@@ -184,7 +184,8 @@
void zelda_ppu_write_word(uint32_t adr, uint16_t val);
void zelda_apu_runcycles();
const uint8 *SimpleHdma_GetPtr(uint32 p);
-void ZeldaDrawPpuFrame();
+// 512x480 32-bit pixels
+void ZeldaDrawPpuFrame(uint32 *pixel_buffer);
void HdmaSetup(uint32 addr6, uint32 addr7, uint8 transfer_unit, uint8 reg6, uint8 reg7, uint8 indirect_bank);
void ZeldaInitializationCode();
void ZeldaRunGameLoop();