shithub: pt2-clone

Download patch

ref: aadb24ea9df19ef1443af2a407503954cd8adfa8
parent: 5f89c018a1fec42311abce9be1528d233d883886
author: Olav Sørensen <[email protected]>
date: Tue Jan 24 17:46:34 EST 2023

Fixing of frame delay/wait routines

--- a/src/pt2_askbox.c
+++ b/src/pt2_askbox.c
@@ -321,7 +321,7 @@
 
 				// reset vblank end time if we minimize window
 				if (event.window.event == SDL_WINDOWEVENT_MINIMIZED || event.window.event == SDL_WINDOWEVENT_FOCUS_LOST)
-					hpc_ResetEndTime(&video.vblankHpc);
+					hpc_ResetCounters(&video.vblankHpc);
 			}
 			else if (event.type == SDL_KEYDOWN)
 			{
--- a/src/pt2_audio.c
+++ b/src/pt2_audio.c
@@ -432,7 +432,7 @@
 
 		// BPM -> Hz -> tick length for performance counter (syncing visuals to audio)
 		double dTimeInt;
-		double dTimeFrac = modf(hpcFreq.dFreq / dHz, &dTimeInt);
+		double dTimeFrac = modf((double)hpcFreq.freq64 / dHz, &dTimeInt);
 		const int32_t timeInt = (int32_t)dTimeInt;
 	
 		dTimeFrac = floor((dTimeFrac * (UINT32_MAX+1.0)) + 0.5); // fractional part (scaled to 0..2^32-1)
--- a/src/pt2_hpc.c
+++ b/src/pt2_hpc.c
@@ -1,4 +1,6 @@
-// Hardware Performance Counter delay routines, by 8bitbubsy
+/*
+** Hardware Performance Counter delay routines
+*/
 
 #ifdef _WIN32
 #define WIN32_MEAN_AND_LEAN
@@ -11,8 +13,7 @@
 #include <stdbool.h>
 #include "pt2_hpc.h"
 
-// more bits than this makes little sense (double -> uint64_t precision)
-#define FRAC_BITS 53
+#define FRAC_BITS 63
 #define FRAC_SCALE (1ULL << FRAC_BITS)
 #define FRAC_MASK (FRAC_SCALE-1)
 
@@ -20,6 +21,10 @@
 
 #ifdef _WIN32 // Windows usleep() implementation
 
+#define STATUS_SUCCESS 0
+
+static bool canAdjustTimerResolution;
+
 static NTSTATUS (__stdcall *NtDelayExecution)(BOOL Alertable, PLARGE_INTEGER DelayInterval);
 static NTSTATUS (__stdcall *NtQueryTimerResolution)(PULONG MinimumResolution, PULONG MaximumResolution, PULONG ActualResolution);
 static NTSTATUS (__stdcall *NtSetTimerResolution)(ULONG DesiredResolution, BOOLEAN SetResolution, PULONG CurrentResolution);
@@ -30,15 +35,15 @@
 {
 	LARGE_INTEGER delayInterval;
 
-	// NtDelayExecution() delays in 100ns-units, and negative value = delay from current time
+	// NtDelayExecution() delays in 100ns-units, and a negative value means to delay from current time
 	usec *= -10;
 
-	delayInterval.HighPart = 0xFFFFFFFF;
+	delayInterval.HighPart = 0xFFFFFFFF; // negative 64-bit value, we only set the lower dword
 	delayInterval.LowPart = usec;
 	NtDelayExecution(false, &delayInterval);
 }
 
-static void usleepWeak(int32_t usec) // fallback if no NtDelayExecution()
+static void usleepPoor(int32_t usec) // fallback if no NtDelayExecution()
 {
 	Sleep((usec + 500) / 1000);
 }
@@ -46,10 +51,11 @@
 static void windowsSetupUsleep(void)
 {
 	NtDelayExecution = (NTSTATUS (__stdcall *)(BOOL, PLARGE_INTEGER))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtDelayExecution");
+	usleep = (NtDelayExecution != NULL) ? usleepGood : usleepPoor;
+
 	NtQueryTimerResolution = (NTSTATUS (__stdcall *)(PULONG, PULONG, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtQueryTimerResolution");
 	NtSetTimerResolution = (NTSTATUS (__stdcall *)(ULONG, BOOLEAN, PULONG))GetProcAddress(GetModuleHandle("ntdll.dll"), "NtSetTimerResolution");
-
-	usleep = (NtDelayExecution != NULL) ? usleepGood : usleepWeak;
+	canAdjustTimerResolution = (NtQueryTimerResolution != NULL && NtSetTimerResolution != NULL);
 }
 #endif
 
@@ -59,50 +65,69 @@
 	windowsSetupUsleep();
 #endif
 	hpcFreq.freq64 = SDL_GetPerformanceFrequency();
-	hpcFreq.dFreq = (double)hpcFreq.freq64;
-	hpcFreq.dFreqMulMicro = (1000.0 * 1000.0) / hpcFreq.dFreq;
+
+	double dFreq = (double)hpcFreq.freq64;
+
+	hpcFreq.dFreqMulMs = 1000.0 / dFreq;
+	hpcFreq.dFreqMulMicro = (1000.0 * 1000.0) / dFreq;
 }
 
-void hpc_SetDurationInHz(hpc_t *hpc, const double dHz)
+// returns 64-bit fractional part of u64 divided by u32
+static uint64_t getFrac64FromU64DivU32(uint64_t dividend, uint32_t divisor)
 {
-	const double dDuration = hpcFreq.dFreq / dHz;
+	if (dividend == 0 || divisor == 0 || divisor >= dividend)
+		return 0;
 
-	// break down duration into integer and frac parts
-	double dDurationInt;
-	double dDurationFrac = modf(dDuration, &dDurationInt);
+	dividend %= divisor;
 
-	// set 64:53fp values
-	hpc->duration64Int = (uint64_t)dDurationInt;
-	hpc->duration64Frac = (uint64_t)round(dDurationFrac * FRAC_SCALE);
+	if (dividend == 0)
+		return 0;
+
+	const uint32_t quotient  = (uint32_t)((dividend << 32) / divisor);
+	const uint32_t remainder = (uint32_t)((dividend << 32) % divisor);
+
+	const uint32_t resultHi = quotient;
+	const uint32_t resultLo = (uint32_t)(((uint64_t)remainder << 32) / divisor);
+
+	return ((uint64_t)resultHi << 32) | resultLo;
 }
 
-void hpc_ResetEndTime(hpc_t *hpc)
+void hpc_SetDurationInHz(hpc_t *hpc, uint32_t hz)
 {
-	hpc->endTime64Int = SDL_GetPerformanceCounter() + hpc->duration64Int;
-	hpc->endTime64Frac = hpc->duration64Frac;
+	// set 64:63fp value
+	hpc->durationInt = hpcFreq.freq64 / hz;
+	hpc->durationFrac = getFrac64FromU64DivU32(hpcFreq.freq64, hz) >> 1;
+
+	hpc->resetFrame = hz * 3600; // reset counters every hour
+
 }
 
-void hpc_Wait(hpc_t *hpc)
+void hpc_ResetCounters(hpc_t *hpc)
 {
-#ifdef _WIN32 // set resolution to 0.5ms (safest minium) - this is confirmed to improve NtDelayExecution() and Sleep()
-	ULONG originalTimerResolution, minRes, maxRes, curRes;
+	hpc->endTimeInt = SDL_GetPerformanceCounter() + hpc->durationInt;
+	hpc->endTimeFrac = hpc->durationFrac;
+}
 
-	if (NtQueryTimerResolution != NULL && NtSetTimerResolution != NULL)
+void hpc_Wait(hpc_t *hpc)
+{
+#ifdef _WIN32
+	/* Make sure resolution is set to 0.5ms (safest minimum) - this is confirmed to improve
+	** NtDelayExecution() and Sleep(). This will only be changed when needed, not per frame.
+	*/
+	ULONG curRes, minRes, maxRes, junk;
+	if (canAdjustTimerResolution && NtQueryTimerResolution(&minRes, &maxRes, &curRes) == STATUS_SUCCESS)
 	{
-		if (!NtQueryTimerResolution(&minRes, &maxRes, &originalTimerResolution))
-		{
-			if (originalTimerResolution != 5000 && maxRes <= 5000)
-				NtSetTimerResolution(5000, TRUE, &curRes); // set to 0.5ms (safest minimum)
-		}
+		if (curRes != 5000 && maxRes <= 5000)
+			NtSetTimerResolution(5000, TRUE, &junk); // 0.5ms
 	}
 #endif
 
 	const uint64_t currTime64 = SDL_GetPerformanceCounter();
-	if (currTime64 < hpc->endTime64Int)
+	if (currTime64 < hpc->endTimeInt)
 	{
-		uint64_t timeLeft64 = hpc->endTime64Int - currTime64;
+		uint64_t timeLeft64 = hpc->endTimeInt - currTime64;
 
-		// limit (and cast to) int32_t for fast SSE2 SIMD usage
+		// convert to int32_t for fast SSE2 SIMD usage lateron
 		if (timeLeft64 > INT32_MAX)
 			timeLeft64 = INT32_MAX;
 
@@ -115,12 +140,25 @@
 
 	// set next end time
 
-	hpc->endTime64Int += hpc->duration64Int;
+	hpc->endTimeInt += hpc->durationInt;
 
-	hpc->endTime64Frac += hpc->duration64Frac;
-	if (hpc->endTime64Frac >= FRAC_SCALE)
+	// handle fractional part
+	hpc->endTimeFrac += hpc->durationFrac;
+	if (hpc->endTimeFrac >= FRAC_SCALE)
 	{
-		hpc->endTime64Frac &= FRAC_MASK;
-		hpc->endTime64Int++;
+		hpc->endTimeFrac &= FRAC_MASK;
+		hpc->endTimeInt++;
+	}
+
+	/* The counter ("endTimeInt") can accumulate major errors after a couple of hours,
+	** since each frame is not happening at perfect intervals.
+	** To fix this, reset the counter's int & frac once every hour. We should only get
+	** up to one frame of stutter while they are resetting, then it's back to normal.
+	*/
+	hpc->frameCounter++;
+	if (hpc->frameCounter >= hpc->resetFrame)
+	{
+		hpc->frameCounter = 0;
+		hpc_ResetCounters(hpc);
 	}
 }
--- a/src/pt2_hpc.h
+++ b/src/pt2_hpc.h
@@ -6,18 +6,19 @@
 typedef struct
 {
 	uint64_t freq64;
-	double dFreq, dFreqMulMicro;
+	double dFreqMulMicro, dFreqMulMs;
 } hpcFreq_t;
 
 typedef struct
 {
-	uint64_t duration64Int, duration64Frac;
-	uint64_t endTime64Int, endTime64Frac;
+	uint64_t durationInt, durationFrac;
+	uint64_t endTimeInt, endTimeFrac;
+	uint64_t frameCounter, resetFrame;
 } hpc_t;
 
+extern hpcFreq_t hpcFreq;
+
 void hpc_Init(void);
-void hpc_SetDurationInHz(hpc_t *hpc, double dHz);
-void hpc_ResetEndTime(hpc_t *hpc);
+void hpc_SetDurationInHz(hpc_t *hpc, uint32_t dHz);
+void hpc_ResetCounters(hpc_t *hpc);
 void hpc_Wait(hpc_t *hpc);
-
-extern hpcFreq_t hpcFreq;
--- a/src/pt2_main.c
+++ b/src/pt2_main.c
@@ -336,7 +336,7 @@
 	SDL_EventState(SDL_DROPFILE, SDL_ENABLE);
 
 	editor.mainLoopOngoing = true;
-	hpc_ResetEndTime(&video.vblankHpc); // this must be the very last thing done before entering the main loop
+	hpc_ResetCounters(&video.vblankHpc); // this must be the last thing we do before entering the main loop
 
 	// XXX: if you change anything in the main loop, make sure it goes in the askBox()(pt2_askbox.c) loop too, if needed
 	while (editor.programRunning)
@@ -377,7 +377,7 @@
 
 			// reset vblank end time if we minimize window
 			if (event.window.event == SDL_WINDOWEVENT_MINIMIZED || event.window.event == SDL_WINDOWEVENT_FOCUS_LOST)
-				hpc_ResetEndTime(&video.vblankHpc);
+				hpc_ResetCounters(&video.vblankHpc);
 		}
 
 #ifdef _WIN32
--- a/src/pt2_sampling.c
+++ b/src/pt2_sampling.c
@@ -475,7 +475,7 @@
 {
 	changeStatusText("PLEASE WAIT ...");
 	flipFrame();
-	hpc_ResetEndTime(&video.vblankHpc);
+	hpc_ResetCounters(&video.vblankHpc);
 
 	editor.sampleZero = false;
 	editor.blockMarkFlag = false;
--- a/src/pt2_scopes.c
+++ b/src/pt2_scopes.c
@@ -286,7 +286,7 @@
 	SDL_SetThreadPriority(SDL_THREAD_PRIORITY_HIGH);
 
 	hpc_SetDurationInHz(&scopeHpc, SCOPE_HZ);
-	hpc_ResetEndTime(&scopeHpc);
+	hpc_ResetCounters(&scopeHpc);
 
 	while (editor.programRunning)
 	{
--- a/src/pt2_visuals_sync.c
+++ b/src/pt2_visuals_sync.c
@@ -146,7 +146,7 @@
 
 	const double dAudioLatencySecs = audioBufferSize / (double)audioFreq;
 
-	dFrac = modf(dAudioLatencySecs * hpcFreq.dFreq, &dInt);
+	dFrac = modf(dAudioLatencySecs * (double)hpcFreq.freq64, &dInt);
 
 	// integer part
 	audLatencyPerfValInt = (uint32_t)dInt;