shithub: ft²

Download patch

ref: 48da1fbedb5db4c173875542774f13b93b7b44a1
parent: 584ded059ff4b7c88d4950fae0fe675c04a090c1
author: Olav Sørensen <[email protected]>
date: Sun Feb 27 09:16:52 EST 2022

Code refactoring: Use new ft2_hpc.c file for HPC timers

--- a/src/ft2_events.c
+++ b/src/ft2_events.c
@@ -48,43 +48,42 @@
 static HWND hWnd;
 static HANDLE oneInstHandle, hMapFile;
 static LPCTSTR sharedMemBuf;
-
-// used for Windows usleep() implementation
-static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval);
 #endif
 
 static void handleInput(void);
 
-// usleep() implementation for Windows (Warning: This might not be future-safe!)
-#ifdef _WIN32
-void usleep(uint32_t usec)
+#ifdef _WIN32 // Windows usleep() implementation
+
+static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval);
+static NTSTATUS (__stdcall *NtQueryTimerResolution)(PULONG MinimumResolution, PULONG MaximumResolution, PULONG ActualResolution);
+static NTSTATUS (__stdcall *NtSetTimerResolution)(ULONG DesiredResolution, BOOLEAN SetResolution, PULONG CurrentResolution);
+
+static void (*usleep)(int32_t usec);
+
+static void usleepGood(int32_t usec)
 {
-	LARGE_INTEGER lpDueTime;
+	LARGE_INTEGER delayInterval;
 
-	if (NtDelayExecution == NULL)
-	{
-		// NtDelayExecution() is not available (shouldn't happen), use regular sleep()
-		Sleep(usec / 1000);
-	}
-	else
-	{
-		// this prevents a 64-bit MUL (will not overflow with the ranges we use anyway)
-		lpDueTime.HighPart = UINT32_MAX;
-		lpDueTime.LowPart = (DWORD)(-10 * (int32_t)usec);
+	// NtDelayExecution() delays in 100ns-units, and negative value = delay from current time
+	usec *= -10;
 
-		NtDelayExecution(false, &lpDueTime);
-	}
+	delayInterval.HighPart = 0xFFFFFFFF;
+	delayInterval.LowPart = usec;
+	NtDelayExecution(false, &delayInterval);
 }
 
-void setupWin32Usleep(void)
+static void usleepWeak(int32_t usec) // fallback if no NtDelayExecution()
 {
-	NtDelayExecution = (NTSTATUS (__stdcall *)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtDelayExecution");
-	timeBeginPeriod(0); // enter highest timer resolution
+	Sleep((usec + 500) / 1000);
 }
 
-void freeWin32Usleep(void)
+void windowsSetupUsleep(void)
 {
-	timeEndPeriod(0); // exit highest timer resolution
+	NtDelayExecution = (NTSTATUS (__stdcall *)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtDelayExecution");
+	NtQueryTimerResolution = (NTSTATUS (__stdcall *)(PULONG, PULONG, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtQueryTimerResolution");
+	NtSetTimerResolution = (NTSTATUS (__stdcall *)(ULONG, BOOLEAN, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtSetTimerResolution");
+
+	usleep = (NtDelayExecution != NULL) ? usleepGood : usleepWeak;
 }
 #endif
 
@@ -402,17 +401,9 @@
 		else if (event->window.event == SDL_WINDOWEVENT_SHOWN)
 			video.windowHidden = false;
 
-		if (video.vsync60HzPresent)
-		{
-			/* If we minimize the window and vsync is present, vsync is temporarily turned off.
-			** recalc waitVBL() vars so that it can sleep properly in said mode.
-			*/
-			if (event->window.event == SDL_WINDOWEVENT_MINIMIZED ||
-				event->window.event == SDL_WINDOWEVENT_FOCUS_LOST)
-			{
-				setupWaitVBL();
-			}
-		}
+		// reset vblank end time if we minimize window
+		if (event->window.event == SDL_WINDOWEVENT_MINIMIZED || event->window.event == SDL_WINDOWEVENT_FOCUS_LOST)
+			hpc_ResetEndTime(&video.vblankHpc);
 	}
 }
 
--- a/src/ft2_events.h
+++ b/src/ft2_events.h
@@ -20,7 +20,4 @@
 #ifdef _WIN32
 bool handleSingleInstancing(int32_t argc, char **argv);
 void closeSingleInstancing(void);
-void usleep(uint32_t usec);
-void setupWin32Usleep(void);
-void freeWin32Usleep(void);
 #endif
--- /dev/null
+++ b/src/ft2_hpc.c
@@ -1,0 +1,127 @@
+/*
+** Hardware Performance Counter delay routines
+*/
+
+#ifdef _WIN32
+#define WIN32_MEAN_AND_LEAN
+#include <windows.h>
+#else
+#include <unistd.h>
+#endif
+#include <SDL2/SDL.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include "ft2_hpc.h"
+
+#define FRAC_BITS 53
+#define FRAC_SCALE (1ULL << FRAC_BITS)
+#define FRAC_MASK (FRAC_SCALE-1)
+
+hpcFreq_t hpcFreq;
+
+#ifdef _WIN32 // Windows usleep() implementation
+
+static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval);
+static NTSTATUS (__stdcall *NtQueryTimerResolution)(PULONG MinimumResolution, PULONG MaximumResolution, PULONG ActualResolution);
+static NTSTATUS (__stdcall *NtSetTimerResolution)(ULONG DesiredResolution, BOOLEAN SetResolution, PULONG CurrentResolution);
+
+static void (*usleep)(int32_t usec);
+
+static void usleepGood(int32_t usec)
+{
+	LARGE_INTEGER delayInterval;
+
+	// NtDelayExecution() delays in 100ns-units, and negative value = delay from current time
+	usec *= -10;
+
+	delayInterval.HighPart = 0xFFFFFFFF;
+	delayInterval.LowPart = usec;
+	NtDelayExecution(false, &delayInterval);
+}
+
+static void usleepWeak(int32_t usec) // fallback if no NtDelayExecution()
+{
+	Sleep((usec + 500) / 1000);
+}
+
+static void windowsSetupUsleep(void)
+{
+	NtDelayExecution = (NTSTATUS (__stdcall *)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtDelayExecution");
+	NtQueryTimerResolution = (NTSTATUS (__stdcall *)(PULONG, PULONG, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtQueryTimerResolution");
+	NtSetTimerResolution = (NTSTATUS (__stdcall *)(ULONG, BOOLEAN, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtSetTimerResolution");
+
+	usleep = (NtDelayExecution != NULL) ? usleepGood : usleepWeak;
+}
+#endif
+
+void hpc_Init(void)
+{
+#ifdef _WIN32
+	windowsSetupUsleep();
+#endif
+	hpcFreq.freq64 = SDL_GetPerformanceFrequency();
+	hpcFreq.dFreq = (double)hpcFreq.freq64;
+	hpcFreq.dFreqMulMicro = (1000.0 * 1000.0) / hpcFreq.dFreq;
+}
+
+void hpc_SetDurationInHz(hpc_t *hpc, const double dHz)
+{
+	const double dDuration = hpcFreq.dFreq / dHz;
+
+	// break down duration into integer and frac parts
+	double dDurationInt;
+	double dDurationFrac = modf(dDuration, &dDurationInt);
+
+	// set 64:53fp values
+	hpc->duration64Int = (uint64_t)dDurationInt;
+	hpc->duration64Frac = (uint64_t)round(dDurationFrac * FRAC_SCALE);
+}
+
+void hpc_ResetEndTime(hpc_t *hpc)
+{
+	hpc->endTime64Int = SDL_GetPerformanceCounter() + hpc->duration64Int;
+	hpc->endTime64Frac = hpc->duration64Frac;
+}
+
+void hpc_Wait(hpc_t *hpc)
+{
+#ifdef _WIN32 // set resolution to 0.5ms (safest minium) - this is confirmed to improve NtDelayExecution() and Sleep()
+	ULONG originalTimerResolution, minRes, maxRes, curRes;
+
+	if (NtQueryTimerResolution != NULL && NtSetTimerResolution != NULL)
+	{
+		if (!NtQueryTimerResolution(&minRes, &maxRes, &originalTimerResolution))
+		{
+			if (originalTimerResolution != 5000 && maxRes <= 5000)
+				NtSetTimerResolution(5000, TRUE, &curRes); // set to 0.5ms (safest minimum)
+		}
+	}
+#endif
+
+	const uint64_t currTime64 = SDL_GetPerformanceCounter();
+	if (currTime64 < hpc->endTime64Int)
+	{
+		uint64_t timeLeft64 = hpc->endTime64Int - currTime64;
+
+		// convert to int32_t for fast SSE2 SIMD usage lateron
+		if (timeLeft64 > INT32_MAX)
+			timeLeft64 = INT32_MAX;
+
+		const int32_t timeLeft32 = (int32_t)timeLeft64;
+
+		int32_t microSecsLeft = (int32_t)((timeLeft32 * hpcFreq.dFreqMulMicro) + 0.5); // rounded
+		if (microSecsLeft > 0)
+			usleep(microSecsLeft);
+	}
+
+	// set next end time
+
+	hpc->endTime64Int += hpc->duration64Int;
+
+	hpc->endTime64Frac += hpc->duration64Frac;
+	if (hpc->endTime64Frac >= FRAC_SCALE)
+	{
+		hpc->endTime64Frac &= FRAC_MASK;
+		hpc->endTime64Int++;
+	}
+}
--- /dev/null
+++ b/src/ft2_hpc.h
@@ -1,0 +1,23 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+
+typedef struct
+{
+	uint64_t freq64;
+	double dFreq, dFreqMulMicro;
+} hpcFreq_t;
+
+typedef struct
+{
+	uint64_t duration64Int, duration64Frac;
+	uint64_t endTime64Int, endTime64Frac;
+} hpc_t;
+
+extern hpcFreq_t hpcFreq;
+
+void hpc_Init(void);
+void hpc_SetDurationInHz(hpc_t *hpc, double dHz);
+void hpc_ResetEndTime(hpc_t *hpc);
+void hpc_Wait(hpc_t *hpc);
--- a/src/ft2_main.c
+++ b/src/ft2_main.c
@@ -33,12 +33,12 @@
 #include "ft2_events.h"
 #include "ft2_bmp.h"
 #include "ft2_structs.h"
+#include "ft2_hpc.h"
 
 #ifdef HAS_MIDI
 static SDL_Thread *initMidiThread;
 #endif
 
-static void setupPerfFreq(void);
 static void initializeVars(void);
 static void cleanUpAndExit(void); // never call this inside the main loop
 #ifdef __APPLE__
@@ -117,7 +117,6 @@
 		return 0;
 	}
 
-	setupWin32Usleep();
 	disableWasapi(); // disable problematic WASAPI SDL2 audio driver on Windows (causes clicks/pops sometimes...)
 	                 // 13.03.2020: This is still needed with SDL 2.0.12...
 #endif
@@ -144,6 +143,9 @@
 	*/
 	SDL_StopTextInput();
 
+	hpc_Init();
+	hpc_SetDurationInHz(&video.vblankHpc, VBLANK_HZ);
+
 #ifdef __APPLE__
 	osxSetDirToProgramDirFromArgs(argv);
 #endif
@@ -178,8 +180,6 @@
 	audio.currOutputDevice = getAudioOutputDeviceFromConfig();
 	audio.currInputDevice = getAudioInputDeviceFromConfig();
 
-	setupPerfFreq();
-
 	if (!setupAudio(CONFIG_HIDE_ERRORS)) // can we open the audio device?
 	{
 		// nope, try with the default audio device
@@ -232,11 +232,11 @@
 	SDL_DetachThread(initMidiThread); // don't wait for this thread, let it clean up when done
 #endif
 
-	setupWaitVBL(); // this is needed for potential okBox() calls in handleModuleLoadFromArg()
+	hpc_ResetEndTime(&video.vblankHpc); // this is needed for potential okBox() calls in handleModuleLoadFromArg()
 	handleModuleLoadFromArg(argc, argv);
 
 	editor.mainLoopOngoing = true;
-	setupWaitVBL(); // this must be the very last thing done before entering the main loop
+	hpc_ResetEndTime(&video.vblankHpc); // this must be the very last thing done before entering the main loop
 
 	while (editor.programRunning)
 	{
@@ -377,7 +377,6 @@
 	}
 
 #ifdef _WIN32
-	freeWin32Usleep();
 	closeSingleInstancing();
 #endif
 
@@ -416,28 +415,6 @@
 	}
 }
 #endif
-
-static void setupPerfFreq(void)
-{
-	double dInt;
-
-	const uint64_t perfFreq64 = SDL_GetPerformanceFrequency();
-	assert(perfFreq64 != 0);
-
-	editor.dPerfFreq = (double)perfFreq64;
-	editor.dPerfFreqMulMicro = 1000000.0 / editor.dPerfFreq;
-	editor.dPerfFreqMulMs = 1.0 / (editor.dPerfFreq / 1000.0);
-
-	// calculate vblank time for performance counters and split into int/frac
-	double dFrac = modf(editor.dPerfFreq / VBLANK_HZ, &dInt);
-
-	// integer part
-	video.vblankTimeLen = (int32_t)dInt;
-
-	// fractional part scaled to 0..2^32-1
-	dFrac *= UINT32_MAX+1.0;
-	video.vblankTimeLenFrac = (uint32_t)dFrac;
-}
 
 #ifdef _WIN32
 static void disableWasapi(void)
--- a/src/ft2_video.c
+++ b/src/ft2_video.c
@@ -48,7 +48,6 @@
 
 static bool songIsModified;
 static char wndTitle[256];
-static uint64_t timeNext64, timeNext64Frac;
 static sprite_t sprites[SPRITE_NUM];
 
 // for FPS counter
@@ -180,7 +179,8 @@
 
 	if (!video.vsync60HzPresent)
 	{
-		waitVBL(); // we have no VSync, do crude thread sleeping to sync to ~60Hz
+		// we have no VSync, do crude thread sleeping to sync to ~60Hz
+		hpc_Wait(&video.vblankHpc);
 	}
 	else
 	{
@@ -190,14 +190,14 @@
 #ifdef __APPLE__
 		// macOS: VSync gets disabled if the window is 100% covered by another window. Let's add a (crude) fix:
 		if (minimized || !(windowFlags & SDL_WINDOW_INPUT_FOCUS))
-			waitVBL();
+			hpc_Wait(&video.vblankHpc);
 #elif __unix__
 		// *NIX: VSync gets disabled in fullscreen mode (at least on some distros/systems). Let's add a fix:
 		if (minimized || video.fullscreen)
-			waitVBL();
+			hpc_Wait(&video.vblankHpc);
 #else
 		if (minimized)
-			waitVBL();
+			hpc_Wait(&video.vblankHpc);
 #endif
 	}
 
@@ -719,44 +719,6 @@
 			clr32 += srcPitch;
 			dst32 += dstPitch;
 		}
-	}
-}
-
-void setupWaitVBL(void)
-{
-	// set next frame time
-	timeNext64 = SDL_GetPerformanceCounter() + video.vblankTimeLen;
-	timeNext64Frac = video.vblankTimeLenFrac;
-}
-
-void waitVBL(void)
-{
-	// this routine almost never delays if we have 60Hz vsync, but it's still needed in some occasions
-
-	uint64_t time64 = SDL_GetPerformanceCounter();
-	if (time64 < timeNext64)
-	{
-		time64 = timeNext64 - time64;
-		if (time64 > INT32_MAX)
-			time64 = INT32_MAX;
-
-		const int32_t diff32 = (int32_t)time64;
-
-		// convert and round to microseconds
-		const int32_t time32 = (int32_t)((diff32 * editor.dPerfFreqMulMicro) + 0.5);
-
-		// delay until we have reached the next frame
-		if (time32 > 0)
-			usleep(time32);
-	}
-
-	// update next frame time
-	timeNext64 += video.vblankTimeLen;
-	timeNext64Frac += video.vblankTimeLenFrac;
-	if (timeNext64Frac > UINT32_MAX)
-	{
-		timeNext64Frac &= UINT32_MAX;
-		timeNext64++;
 	}
 }
 
--- a/src/ft2_video.h
+++ b/src/ft2_video.h
@@ -5,6 +5,7 @@
 #include "ft2_header.h"
 #include "ft2_palette.h"
 #include "ft2_audio.h"
+#include "ft2_hpc.h"
 
 enum
 {
@@ -20,10 +21,11 @@
 {
 	bool fullscreen, showFPSCounter, useDesktopMouseCoords;
 	uint32_t xScale, yScale;
-	uint32_t *frameBuffer, palette[PAL_NUM], vblankTimeLen, vblankTimeLenFrac;
+	uint32_t *frameBuffer, palette[PAL_NUM];
 #ifdef _WIN32
 	HWND hWnd;
 #endif
+	hpc_t vblankHpc;
 	SDL_Window *window;
 	double dMonitorRefreshRate;
 	float fMouseXMul, fMouseYMul;
@@ -74,5 +76,3 @@
 void setWindowSizeFromConfig(bool updateRenderer);
 bool recreateTexture(void);
 void toggleFullScreen(void);
-void setupWaitVBL(void);
-void waitVBL(void);
--- a/src/scopes/ft2_scopes.c
+++ b/src/scopes/ft2_scopes.c
@@ -20,12 +20,12 @@
 #include "../ft2_video.h"
 #include "../ft2_tables.h"
 #include "../ft2_structs.h"
+#include "../ft2_hpc.h"
 #include "ft2_scopes.h"
 #include "ft2_scopedraw.h"
 
 static volatile bool scopesUpdatingFlag, scopesDisplayingFlag;
-static uint32_t scopeTimeLen, scopeTimeLenFrac;
-static uint64_t timeNext64, timeNext64Frac;
+static hpc_t scopeHpc;
 static volatile scope_t scope[MAX_CHANNELS];
 static SDL_Thread *scopeThread;
 
@@ -520,9 +520,8 @@
 	// this is needed for scope stability (confirmed)
 	SDL_SetThreadPriority(SDL_THREAD_PRIORITY_HIGH);
 
-	// set next frame time
-	timeNext64 = SDL_GetPerformanceCounter() + scopeTimeLen;
-	timeNext64Frac = scopeTimeLenFrac;
+	hpc_SetDurationInHz(&scopeHpc, SCOPE_HZ);
+	hpc_ResetEndTime(&scopeHpc);
 
 	while (editor.programRunning)
 	{
@@ -530,31 +529,7 @@
 		updateScopes();
 		editor.scopeThreadBusy = false;
 
-		uint64_t time64 = SDL_GetPerformanceCounter();
-		if (time64 < timeNext64)
-		{
-			time64 = timeNext64 - time64;
-			if (time64 > INT32_MAX)
-				time64 = INT32_MAX;
-
-			const int32_t diff32 = (int32_t)time64;
-
-			// convert and round to microseconds
-			const int32_t time32 = (int32_t)((diff32 * editor.dPerfFreqMulMicro) + 0.5);
-
-			// delay until we have reached the next frame
-			if (time32 > 0)
-				usleep(time32);
-		}
-
-		// update next tick time
-		timeNext64 += scopeTimeLen;
-		timeNext64Frac += scopeTimeLenFrac;
-		if (timeNext64Frac > UINT32_MAX)
-		{
-			timeNext64Frac &= UINT32_MAX;
-			timeNext64++;
-		}
+		hpc_Wait(&scopeHpc);
 	}
 
 	(void)ptr;
@@ -563,18 +538,6 @@
 
 bool initScopes(void)
 {
-	double dInt;
-
-	// calculate scope time for performance counters and split into int/frac
-	double dFrac = modf(editor.dPerfFreq / SCOPE_HZ, &dInt);
-
-	// integer part
-	scopeTimeLen = (int32_t)dInt;
-
-	// fractional part (scaled to 0..2^32-1)
-	dFrac *= UINT32_MAX+1.0;
-	scopeTimeLenFrac = (uint32_t)dFrac;
-
 	scopeThread = SDL_CreateThread(scopeThreadFunc, NULL, NULL);
 	if (scopeThread == NULL)
 	{
--- a/vs2019_project/ft2-clone/ft2-clone.vcxproj
+++ b/vs2019_project/ft2-clone/ft2-clone.vcxproj
@@ -299,6 +299,7 @@
     <ClCompile Include="..\..\src\ft2_events.c" />
     <ClCompile Include="..\..\src\ft2_gui.c" />
     <ClCompile Include="..\..\src\ft2_help.c" />
+    <ClCompile Include="..\..\src\ft2_hpc.c" />
     <ClCompile Include="..\..\src\ft2_inst_ed.c" />
     <ClCompile Include="..\..\src\ft2_keyboard.c" />
     <ClCompile Include="..\..\src\ft2_main.c" />
@@ -394,6 +395,7 @@
     <ClInclude Include="..\..\src\ft2_gui.h" />
     <ClInclude Include="..\..\src\ft2_header.h" />
     <ClInclude Include="..\..\src\ft2_help.h" />
+    <ClInclude Include="..\..\src\ft2_hpc.h" />
     <ClInclude Include="..\..\src\ft2_inst_ed.h" />
     <ClInclude Include="..\..\src\ft2_keyboard.h" />
     <ClInclude Include="..\..\src\ft2_midi.h" />
--- a/vs2019_project/ft2-clone/ft2-clone.vcxproj.filters
+++ b/vs2019_project/ft2-clone/ft2-clone.vcxproj.filters
@@ -163,6 +163,7 @@
     <ClCompile Include="..\..\src\scopes\ft2_scopes.c">
       <Filter>scopes</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\ft2_hpc.c" />
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="..\..\src\rtmidi\RtMidi.h">
@@ -319,6 +320,9 @@
       <Filter>scopes</Filter>
     </ClInclude>
     <ClInclude Include="..\..\src\ft2_cpu.h">
+      <Filter>headers</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\src\ft2_hpc.h">
       <Filter>headers</Filter>
     </ClInclude>
   </ItemGroup>