shithub: zelda3

Download patch

ref: 0184245d3352001a72bbc12386d7882cc23eb778
parent: dfc9377fca976ca112ec9d906b69c8cb179c40f6
author: Snesrev <[email protected]>
date: Sun Sep 11 10:37:14 EDT 2022

Switch to another pixel format to improve performance

SDL_UnlockTexture was really slow on some computers

--- a/main.c
+++ b/main.c
@@ -194,10 +194,18 @@
     printf("Failed to create renderer: %s\n", SDL_GetError());
     return 1;
   }
+
+  SDL_RendererInfo renderer_info;
+  SDL_GetRendererInfo(renderer, &renderer_info);
+  printf("Supported texture formats:");
+  for (int i = 0; i < renderer_info.num_texture_formats; i++)
+    printf(" %s", SDL_GetPixelFormatName(renderer_info.texture_formats[i]));
+  printf("\n");
+
   g_renderer = renderer;
   if (!g_config.ignore_aspect_ratio)
     SDL_RenderSetLogicalSize(renderer, kRenderWidth, kRenderHeight);
-  SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_RGBX8888, SDL_TEXTUREACCESS_STREAMING, kRenderWidth * 2, kRenderHeight * 2);
+  SDL_Texture* texture = SDL_CreateTexture(renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, kRenderWidth * 2, kRenderHeight * 2);
   if(texture == NULL) {
     printf("Failed to create texture: %s\n", SDL_GetError());
     return 1;
@@ -303,15 +311,32 @@
     if (g_input1_state & 0xf0)
       g_gamepad_buttons = 0;
 
+    uint64 t0 = SDL_GetPerformanceCounter();
+
     bool is_turbo = RunOneFrame(snes_run, g_input1_state | g_gamepad_buttons, (frameCtr++ & 0x7f) != 0 && g_turbo);
 
     if (is_turbo)
       continue;
 
+
+    uint64 t1 = SDL_GetPerformanceCounter();
     PlayAudio(snes_run, device, have.channels, audioBuffer);
+    uint64 t2 = SDL_GetPerformanceCounter();
+
     RenderScreen(window, renderer, texture, (g_win_flags & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0);
+    uint64 t3 = SDL_GetPerformanceCounter();
+    SDL_RenderPresent(renderer); // vsyncs to 60 FPS?
+    uint64 t4 = SDL_GetPerformanceCounter();
 
-    SDL_RenderPresent(renderer); // vsyncs to 60 FPS
+    double f = 1e3 / (double)SDL_GetPerformanceFrequency();
+    if (0) printf("Perf %6.2f %6.2f %6.2f %6.2f\n", 
+      (t1 - t0) * f,
+      (t2 - t1) * f,
+      (t3 - t2) * f,
+      (t4 - t3) * f
+      );
+
+
     // if vsync isn't working, delay manually
     curTick = SDL_GetTicks();
 
@@ -406,26 +431,43 @@
   int i;
   sprintf(buf, "%d", n);
   for (s = buf, i = 2 * 4; *s; s++, i += 8 * 4)
-    RenderDigit(dst + ((pitch + i + 4) << big), pitch, *s - '0', 0x40404000, big);
+    RenderDigit(dst + ((pitch + i + 4) << big), pitch, *s - '0', 0x404040, big);
   for (s = buf, i = 2 * 4; *s; s++, i += 8 * 4)
-    RenderDigit(dst + (i << big), pitch, *s - '0', 0xffffff00, big);
+    RenderDigit(dst + (i << big), pitch, *s - '0', 0xffffff, big);
 }
 
 static void RenderScreen(SDL_Window *window, SDL_Renderer *renderer, SDL_Texture *texture, bool fullscreen) {
   uint8* pixels = NULL;
   int pitch = 0;
+  uint64 t0 = SDL_GetPerformanceCounter();
   if(SDL_LockTexture(texture, NULL, (void**)&pixels, &pitch) != 0) {
     printf("Failed to lock texture: %s\n", SDL_GetError());
     return;
   }
+  uint64 t1 = SDL_GetPerformanceCounter();
   bool hq = RenderScreenWithPerf(pixels, pitch, g_ppu_render_flags);
   if (g_display_perf)
     RenderNumber(pixels + (pitch*2<<hq), pitch, g_curr_fps, hq);
+
+  uint64 t2 = SDL_GetPerformanceCounter();
   SDL_UnlockTexture(texture);
+  uint64 t3 = SDL_GetPerformanceCounter();
   SDL_RenderClear(renderer);
-
+  uint64 t4 = SDL_GetPerformanceCounter();
   SDL_Rect src_rect = { 0, 0, kRenderWidth, kRenderHeight };
   SDL_RenderCopy(renderer, texture, hq ? NULL : &src_rect, NULL);
+  uint64 t5 = SDL_GetPerformanceCounter();
+
+  double f = 1e3 / (double)SDL_GetPerformanceFrequency();
+  if (0) printf("RenderPerf %6.2f %6.2f %6.2f %6.2f %6.2f\n",
+    (t1 - t0) * f,
+    (t2 - t1) * f,
+    (t3 - t2) * f,
+    (t4 - t3) * f,
+    (t5 - t4) * f
+  );
+
+
 }
 
 static void HandleCommand(uint32 j, bool pressed) {
--- a/snes/ppu.c
+++ b/snes/ppu.c
@@ -157,7 +157,7 @@
   if (hq) {
     for (int i = 0; i < 256; i++) {
       uint32 color = ppu->cgram[i];
-      ppu->colorMapRgb[i] = ppu->brightnessMult[color & 0x1f] << 24 | ppu->brightnessMult[(color >> 5) & 0x1f] << 16 | ppu->brightnessMult[(color >> 10) & 0x1f] << 8;
+      ppu->colorMapRgb[i] = ppu->brightnessMult[color & 0x1f] << 16 | ppu->brightnessMult[(color >> 5) & 0x1f] << 8 | ppu->brightnessMult[(color >> 10) & 0x1f];
     }
   }
 
@@ -876,9 +876,9 @@
       uint32 i = left;
       do {
         uint32 color = ppu->cgram[ppu->bgBuffers[0].pixel[i]];
-        dst[1] = dst[0] = ppu->brightnessMult[color & clip_color_mask] << 24 |
-                          ppu->brightnessMult[(color >> 5) & clip_color_mask] << 16 |
-                          ppu->brightnessMult[(color >> 10) & clip_color_mask] << 8;
+        dst[1] = dst[0] = ppu->brightnessMult[color & clip_color_mask] << 16 |
+                          ppu->brightnessMult[(color >> 5) & clip_color_mask] << 8 |
+                          ppu->brightnessMult[(color >> 10) & clip_color_mask];
       } while (dst += 2, ++i < right);
     } else {
       uint8 *half_color_map = ppu->halfColor ? ppu->brightnessMultHalf : ppu->brightnessMult;
@@ -913,7 +913,7 @@
             b += b2;
           }
         }
-        dst[0] = dst[1] = color_map[r] << 24 | color_map[g] << 16 | color_map[b] << 8;
+        dst[0] = dst[1] = color_map[b] | color_map[g] << 8 | color_map[r] << 16;
       } while (dst += 2, ++i < right);
     }
   } while (cw_clip_math >>= 1, ++windex < cwin.nr);
@@ -977,14 +977,14 @@
   }
   int row = y - 1;
   uint8 *pixelBuffer = (uint8*) &ppu->renderBuffer[row * 2 * ppu->renderPitch + x * 8];
-  pixelBuffer[0] = 0;
-  pixelBuffer[1] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
-  pixelBuffer[2] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
-  pixelBuffer[3] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
-  pixelBuffer[4] = 0;
-  pixelBuffer[5] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
-  pixelBuffer[6] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
-  pixelBuffer[7] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
+  pixelBuffer[0] = ((b2 << 3) | (b2 >> 2)) * ppu->brightness / 15;
+  pixelBuffer[1] = ((g2 << 3) | (g2 >> 2)) * ppu->brightness / 15;
+  pixelBuffer[2] = ((r2 << 3) | (r2 >> 2)) * ppu->brightness / 15;
+  pixelBuffer[3] = 0;
+  pixelBuffer[4] = ((b << 3) | (b >> 2)) * ppu->brightness / 15;
+  pixelBuffer[5] = ((g << 3) | (g >> 2)) * ppu->brightness / 15;
+  pixelBuffer[6] = ((r << 3) | (r >> 2)) * ppu->brightness / 15;
+  pixelBuffer[7] = 0;
 }
 
 static const int bitDepthsPerMode[10][4] = {