shithub: zelda3

Download patch

ref: 1f8985903a326287ee4c5098316a6caa3bdd9467
parent: 38c6960e62514a0ec67bc48661b911999ef513e5
author: Snesrev <[email protected]>
date: Tue Sep 6 12:36:25 EDT 2022

Avoid extra memcpy for drawing

--- a/main.c
+++ b/main.c
@@ -20,7 +20,6 @@
 #include "zelda_rtl.h"
 #include "config.h"
 
-extern Ppu *GetPpuForRendering();
 extern Dsp *GetDspForRendering();
 extern Snes *g_snes;
 extern uint8 g_emulated_ram[0x20000];
@@ -125,6 +124,24 @@
   kSnesSamplesPerBlock = (534 * kSampleRate) / 32000,
 };
 
+static void RenderScreenWithPerf(uint32 *pixel_buffer) {
+  if (g_display_perf) {
+    static float history[64], average;
+    static int history_pos;
+    uint64 before = SDL_GetPerformanceCounter();
+    ZeldaDrawPpuFrame(pixel_buffer);
+    uint64 after = SDL_GetPerformanceCounter();
+    float v = (double)SDL_GetPerformanceFrequency() / (after - before);
+    average += v - history[history_pos];
+    history[history_pos] = v;
+    history_pos = (history_pos + 1) & 63;
+    g_curr_fps = average * (1.0f / 64);
+  } else {
+    ZeldaDrawPpuFrame(pixel_buffer);
+  }
+}
+
+
 #undef main
 int main(int argc, char** argv) {
   ParseConfigFile();
@@ -258,21 +275,6 @@
     if (is_turbo)
       continue;
 
-    if (g_display_perf) {
-      static float history[64], average;
-      static int history_pos;
-      uint64 before = SDL_GetPerformanceCounter();
-      ZeldaDrawPpuFrame();
-      uint64 after = SDL_GetPerformanceCounter();
-      float v = (double)SDL_GetPerformanceFrequency() / (after - before);
-      average += v - history[history_pos];
-      history[history_pos] = v;
-      history_pos = (history_pos + 1) & 63;
-      g_curr_fps = average * (1.0f / 64);
-    } else {
-      ZeldaDrawPpuFrame();
-    }
-
     PlayAudio(snes_run, device, audioBuffer);
     RenderScreen(window, renderer, texture, (g_win_flags & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0);
 
@@ -369,7 +371,7 @@
     printf("Failed to lock texture: %s\n", SDL_GetError());
     return;
   }
-  ppu_putPixels(GetPpuForRendering(), (uint8_t*) pixels);
+  RenderScreenWithPerf((uint32 *)pixels);
   if (g_display_perf)
     RenderNumber((uint32 *)pixels + 512*2, g_curr_fps);
   SDL_UnlockTexture(texture);
--- a/snes/ppu.c
+++ b/snes/ppu.c
@@ -22,6 +22,7 @@
 static bool ppu_getWindowState(Ppu* ppu, int layer, int x);
 static bool ppu_evaluateSprites(Ppu* ppu, int line);
 static uint16_t ppu_getVramRemap(Ppu* ppu);
+static void PpuDrawWholeLine(Ppu *ppu, uint y);
 
 Ppu* ppu_init(Snes* snes) {
   Ppu* ppu = (Ppu * )malloc(sizeof(Ppu));
@@ -130,7 +131,6 @@
   ppu->countersLatched = false;
   ppu->ppu1openBus = 0;
   ppu->ppu2openBus = 0;
-  memset(ppu->pixelBuffer, 0, sizeof(ppu->pixelBuffer));
 }
 
 void ppu_saveload(Ppu *ppu, SaveLoadFunc *func, void *ctx) {
@@ -137,30 +137,16 @@
   func(ctx, &ppu->vram, offsetof(Ppu, mosaicModulo) - offsetof(Ppu, vram));
 }
 
-void ppu_handleVblank(Ppu* ppu) {
-  // called either right after ppu_checkOverscan at (0,225), or at (0,240)
-  if(!ppu->forcedBlank) {
-    ppu->oamAdr = ppu->oamAdrWritten;
-    ppu->oamInHigh = ppu->oamInHighWritten;
-    ppu->oamSecondWrite = false;
-  }
-  ppu->frameInterlace_always_zero = ppu->interlace_always_zero; // set if we have a interlaced frame
+void PpuBeginDrawing(Ppu *ppu, uint32_t *pixels) {
+  ppu->renderBuffer = pixels + 512 * 16;
+
+  // clear top 16 and last 16 lines
+  memset(pixels, 0, 512 * 16 * 4);
+  memset(pixels + (464 * 512), 0, 512 * 16 * 4);
 }
 
-static void PpuDrawWholeLine(Ppu *ppu, uint y);
-
-void ppu_runLine(Ppu* ppu, int line) {
+void ppu_runLine(Ppu *ppu, int line) {
   if(line == 0) {
-
-    // Ensure all window layer fields are just 0 or 1
-    for (int i = 0; i < 6; i++) {
-      WindowLayer *wl = &ppu->windowLayer[i];
-      wl->window1enabled = (wl->window1enabled != 0);
-      wl->window2enabled = (wl->window2enabled != 0);
-      wl->window1inversed = (wl->window1inversed != 0);
-      wl->window2inversed = (wl->window2inversed != 0);
-    }
-
     ppu->rangeOver = false;
     ppu->timeOver = false;
     ppu->evenFrame = !ppu->evenFrame;
@@ -188,6 +174,10 @@
         ppu_handlePixel(ppu, x, line);
       }
     }
+
+    // Duplicate each line
+    uint32 *dst = &ppu->renderBuffer[(line - 1) * 1024];
+    memcpy(dst + 512, dst, 512 * 4);
   }
 }
 
@@ -726,12 +716,9 @@
 
 static NOINLINE void PpuDrawWholeLine(Ppu *ppu, uint y) {
   if (ppu->forcedBlank) {
-    int row = (y - 1) + (ppu->evenFrame ? 0 : 239);
-    uint8_t *dst = &ppu->pixelBuffer[row * 2048];
-    for (int i = 0; i < 256; i++, dst += 8) {
-      dst[1] = dst[5] = 0;
-      dst[2] = dst[6] = 0;
-      dst[3] = dst[7] = 0;
+    uint32 *dst = &ppu->renderBuffer[(y - 1) * 1024];
+    for (int i = 0; i < 256; i++, dst += 2) {
+      dst[1] = dst[0] = 0;
     }
     return;
   }
@@ -780,8 +767,7 @@
   uint32 cw_clip_math = ((cwin.bits & kCwBitsMod[ppu->clipMode]) ^ kCwBitsMod[ppu->clipMode + 4]) |
                         ((cwin.bits & kCwBitsMod[ppu->preventMathMode]) ^ kCwBitsMod[ppu->preventMathMode + 4]) << 8;
 
-  int row = (y - 1) + (ppu->evenFrame ? 0 : 239);
-  uint32 *dst = (uint32*)&ppu->pixelBuffer[row * 2048];
+  uint32 *dst = &ppu->renderBuffer[(y - 1) * 1024];
   
   uint32 windex = 0;
   do {
@@ -890,13 +876,16 @@
       r2 = r; g2 = g; b2 = b;
     }
   }
-  int row = (y - 1) + (ppu->evenFrame ? 0 : 239);
-  ppu->pixelBuffer[row * 2048 + x * 8 + 1] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
-  ppu->pixelBuffer[row * 2048 + x * 8 + 2] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
-  ppu->pixelBuffer[row * 2048 + x * 8 + 3] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
-  ppu->pixelBuffer[row * 2048 + x * 8 + 5] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
-  ppu->pixelBuffer[row * 2048 + x * 8 + 6] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
-  ppu->pixelBuffer[row * 2048 + x * 8 + 7] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
+  int row = y - 1;
+  uint8 *pixelBuffer = (uint8*) &ppu->renderBuffer[row * 1024 + x * 2];
+  pixelBuffer[0] = 0;
+  pixelBuffer[1] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
+  pixelBuffer[2] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
+  pixelBuffer[3] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
+  pixelBuffer[4] = 0;
+  pixelBuffer[5] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
+  pixelBuffer[6] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
+  pixelBuffer[7] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
 }
 
 static const int bitDepthsPerMode[10][4] = {
@@ -1636,14 +1625,3 @@
   }
 }
 
-void ppu_putPixels(Ppu* ppu, uint8_t* pixels) {
-  for(int y = 0; y < 224; y++) {
-    int dest = y * 2 + 16;
-    int y1 = y + (ppu->evenFrame ? 0 : 239);
-    memcpy(pixels + (dest * 2048), &ppu->pixelBuffer[y1 * 2048], 2048);
-    memcpy(pixels + ((dest + 1) * 2048), &ppu->pixelBuffer[y1 * 2048], 2048);
-  }
-  // clear top 16 and last 16 lines
-  memset(pixels, 0, 2048 * 16);
-  memset(pixels + (464 * 2048), 0, 2048 * 16);
-}
--- a/snes/ppu.h
+++ b/snes/ppu.h
@@ -46,6 +46,7 @@
   bool lineHasSprites;
   uint8_t lastBrightnessMult;
   uint8_t lastMosaicModulo;
+  uint32_t *renderBuffer;
   Snes* snes;
   // store 31 extra entries to remove the need for clamp
   uint8_t brightnessMult[32 + 31]; 
@@ -138,10 +139,6 @@
   uint8_t ppu2openBus;
 
   uint8_t mosaicModulo[256];
-
-  // pixel buffer (xbgr)
-  // times 2 for even and odd frame
-  uint8_t pixelBuffer[512 * 4 * 239 * 2];
 };
 
 Ppu* ppu_init(Snes* snes);
@@ -151,7 +148,7 @@
 void ppu_runLine(Ppu* ppu, int line);
 uint8_t ppu_read(Ppu* ppu, uint8_t adr);
 void ppu_write(Ppu* ppu, uint8_t adr, uint8_t val);
-void ppu_putPixels(Ppu* ppu, uint8_t* pixels);
 void ppu_saveload(Ppu *ppu, SaveLoadFunc *func, void *ctx);
+void PpuBeginDrawing(Ppu *ppu, uint32_t *buffer);
 
 #endif
--- a/snes/snes.h
+++ b/snes/snes.h
@@ -83,9 +83,6 @@
 // snes_other.c functions:
 
 bool snes_loadRom(Snes* snes, uint8_t* data, int length);
-void snes_setButtonState(Snes* snes, int player, int button, bool pressed);
-void snes_setPixels(Snes* snes, uint8_t* pixelData);
-void snes_setSamples(Snes* snes, int16_t* sampleData, int samplesPerFrame);
 void snes_saveload(Snes *snes, SaveLoadFunc *func, void *ctx);
 
 enum {
--- a/snes/snes_other.c
+++ b/snes/snes_other.c
@@ -104,34 +104,6 @@
   return true;
 }
 
-void snes_setButtonState(Snes* snes, int player, int button, bool pressed) {
-  // set key in constroller
-  if(player == 1) {
-    if(pressed) {
-      snes->input1->currentState |= 1 << button;
-    } else {
-      snes->input1->currentState &= ~(1 << button);
-    }
-  } else {
-    if(pressed) {
-      snes->input2->currentState |= 1 << button;
-    } else {
-      snes->input2->currentState &= ~(1 << button);
-    }
-  }
-}
-
-void snes_setPixels(Snes* snes, uint8_t* pixelData) {
-  // size is 4 (rgba) * 512 (w) * 480 (h)
-  ppu_putPixels(snes->ppu, pixelData);
-}
-
-void snes_setSamples(Snes* snes, int16_t* sampleData, int samplesPerFrame) {
-  // size is 2 (int16) * 2 (stereo) * samplesPerFrame
-  // sets samples in the sampleData
-  dsp_getSamples(snes->apu->dsp, sampleData, samplesPerFrame);
-}
-
 static void readHeader(uint8_t* data, int location, CartHeader* header) {
   // read name, TODO: non-ASCII names?
   for(int i = 0; i < 21; i++) {
--- a/zelda_cpu_infra.c
+++ b/zelda_cpu_infra.c
@@ -310,10 +310,6 @@
   RunOrigAsmCodeOneLoop(snes);
 }
 
-struct Ppu *GetPpuForRendering() {
-  return g_zenv.ppu;
-}
-
 Dsp *GetDspForRendering() {
   SpcPlayer_GenerateSamples(g_zenv.player);
   return g_zenv.player->dsp;
--- a/zelda_rtl.c
+++ b/zelda_rtl.c
@@ -161,8 +161,10 @@
   c->rep_count--;
 }
 
-void ZeldaDrawPpuFrame() {
+void ZeldaDrawPpuFrame(uint32 *pixel_buffer) {
   SimpleHdma hdma_chans[2];
+
+  PpuBeginDrawing(g_zenv.ppu, pixel_buffer);
 
   dma_startDma(g_zenv.dma, HDMAEN_copy, true);
 
--- a/zelda_rtl.h
+++ b/zelda_rtl.h
@@ -184,7 +184,8 @@
 void zelda_ppu_write_word(uint32_t adr, uint16_t val);
 void zelda_apu_runcycles();
 const uint8 *SimpleHdma_GetPtr(uint32 p);
-void ZeldaDrawPpuFrame();
+// 512x480 32-bit pixels
+void ZeldaDrawPpuFrame(uint32 *pixel_buffer);
 void HdmaSetup(uint32 addr6, uint32 addr7, uint8 transfer_unit, uint8 reg6, uint8 reg7, uint8 indirect_bank);
 void ZeldaInitializationCode();
 void ZeldaRunGameLoop();