From 1be668e68a1937f2af239e2707ab914286018892 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Thu, 30 Nov 2023 10:39:42 -0800 Subject: [PATCH] HLE: Add OS-specific precise sleep methods to reduce spinwaiting (#5948) * feat: add nanosleep for linux and macos * Add Windows 0.5ms sleep - Imprecise waits for longer waits with clock alignment - 1/4 the spin time on vsync timer * Remove old experiment * Fix event leak * Tweaking for MacOS * Linux tweaks, nanosleep vsync improvement * Fix overbias * Cleanup * Fix realignment * Add some docs and some cleanup NanosleepPool needs more, Nanosleep has some benchmark code that needs removed. * Rename "Microsleep" to "PreciseSleep" Might have been confused with "microseconds", which no measurement is performed in. * Remove nanosleep measurement * Remove unused debug logging * Nanosleep Pool Documentation * More cleanup * Whitespace * Formatting * Address Feedback * Allow SleepUntilTimePoint to take EventWaitHandle * Remove `_chrono` stopwatch in SurfaceFlinger * Move spinwaiting logic to PreciseSleepHelper Technically, these achieve different things, but having them here makes them easier to reuse or tune. --- .../PreciseSleep/IPreciseSleepEvent.cs | 38 +++ src/Ryujinx.Common/PreciseSleep/Nanosleep.cs | 160 ++++++++++++ .../PreciseSleep/NanosleepEvent.cs | 84 +++++++ .../PreciseSleep/NanosleepPool.cs | 228 ++++++++++++++++++ .../PreciseSleep/PreciseSleepHelper.cs | 104 ++++++++ src/Ryujinx.Common/PreciseSleep/SleepEvent.cs | 51 ++++ .../PreciseSleep/WindowsGranularTimer.cs | 220 +++++++++++++++++ .../PreciseSleep/WindowsSleepEvent.cs | 92 +++++++ .../HOS/Kernel/Common/KTimeManager.cs | 39 +-- .../Services/SurfaceFlinger/SurfaceFlinger.cs | 33 +-- 10 files changed, 1000 insertions(+), 49 deletions(-) create mode 100644 src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs create mode 100644 src/Ryujinx.Common/PreciseSleep/Nanosleep.cs create mode 100644 src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs create mode 100644 src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs create mode 100644 src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs create mode 100644 src/Ryujinx.Common/PreciseSleep/SleepEvent.cs create mode 100644 src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs create mode 100644 src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs diff --git a/src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs new file mode 100644 index 000000000..26b5ab685 --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/IPreciseSleepEvent.cs @@ -0,0 +1,38 @@ +using System; + +namespace Ryujinx.Common.PreciseSleep +{ + /// + /// An event which works similarly to an AutoResetEvent, but is backed by a + /// more precise timer that allows waits of less than a millisecond. + /// + public interface IPreciseSleepEvent : IDisposable + { + /// + /// Adjust a timepoint to better fit the host clock. + /// When no adjustment is made, the input timepoint will be returned. + /// + /// Timepoint to adjust + /// Requested timeout in nanoseconds + /// Adjusted timepoint + long AdjustTimePoint(long timePoint, long timeoutNs); + + /// + /// Sleep until a timepoint, or a signal is received. + /// Given no signal, may wake considerably before, or slightly after the timeout. + /// + /// Timepoint to sleep until + /// True if signalled or waited, false if a wait could not be performed + bool SleepUntil(long timePoint); + + /// + /// Sleep until a signal is received. + /// + void Sleep(); + + /// + /// Signal the event, waking any sleeping thread or the next attempted sleep. + /// + void Signal(); + } +} diff --git a/src/Ryujinx.Common/PreciseSleep/Nanosleep.cs b/src/Ryujinx.Common/PreciseSleep/Nanosleep.cs new file mode 100644 index 000000000..67f067ae2 --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/Nanosleep.cs @@ -0,0 +1,160 @@ +using System; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +namespace Ryujinx.Common.PreciseSleep +{ + /// + /// Access to Linux/MacOS nanosleep, with platform specific bias to improve precision. + /// + [SupportedOSPlatform("macos")] + [SupportedOSPlatform("linux")] + [SupportedOSPlatform("android")] + [SupportedOSPlatform("ios")] + internal static partial class Nanosleep + { + private const long LinuxBaseNanosleepBias = 50000; // 0.05ms + + // Penalty for max allowed sleep duration + private const long LinuxNanosleepAccuracyPenaltyThreshold = 200000; // 0.2ms + private const long LinuxNanosleepAccuracyPenalty = 30000; // 0.03ms + + // Penalty for base sleep duration + private const long LinuxNanosleepBasePenaltyThreshold = 500000; // 0.5ms + private const long LinuxNanosleepBasePenalty = 30000; // 0.03ms + private const long LinuxNanosleepPenaltyPerMillisecond = 18000; // 0.018ms + private const long LinuxNanosleepPenaltyCap = 18000; // 0.018ms + + private const long LinuxStrictBiasOffset = 150_000; // 0.15ms + + // Nanosleep duration is biased depending on the requested timeout on MacOS. + // These match the results when measuring on an M1 processor at AboveNormal priority. + private const long MacosBaseNanosleepBias = 5000; // 0.005ms + private const long MacosBiasPerMillisecond = 140000; // 0.14ms + private const long MacosBiasMaxNanoseconds = 20_000_000; // 20ms + private const long MacosStrictBiasOffset = 150_000; // 0.15ms + + public static long Bias { get; } + + /// + /// Get bias for a given nanosecond timeout. + /// Some platforms calculate their bias differently, this method can be used to counteract it. + /// + /// Nanosecond timeout + /// Bias in nanoseconds + public static long GetBias(long timeoutNs) + { + if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS()) + { + long biasNs = Math.Min(timeoutNs, MacosBiasMaxNanoseconds); + return MacosBaseNanosleepBias + biasNs * MacosBiasPerMillisecond / 1_000_000; + } + else + { + long bias = LinuxBaseNanosleepBias; + + if (timeoutNs > LinuxNanosleepBasePenaltyThreshold) + { + long penalty = (timeoutNs - LinuxNanosleepBasePenaltyThreshold) * LinuxNanosleepPenaltyPerMillisecond / 1_000_000; + bias += LinuxNanosleepBasePenalty + Math.Min(LinuxNanosleepPenaltyCap, penalty); + } + + return bias; + } + } + + /// + /// Get a stricter bias for a given nanosecond timeout, + /// which can improve the chances the sleep completes before the timeout. + /// Some platforms calculate their bias differently, this method can be used to counteract it. + /// + /// Nanosecond timeout + /// Strict bias in nanoseconds + public static long GetStrictBias(long timeoutNs) + { + if (OperatingSystem.IsMacOS() || OperatingSystem.IsIOS()) + { + return GetBias(timeoutNs) + MacosStrictBiasOffset; + } + else + { + long bias = GetBias(timeoutNs) + LinuxStrictBiasOffset; + + if (timeoutNs > LinuxNanosleepAccuracyPenaltyThreshold) + { + bias += LinuxNanosleepAccuracyPenalty; + } + + return bias; + } + } + + static Nanosleep() + { + Bias = GetBias(0); + } + + [StructLayout(LayoutKind.Sequential)] + private struct Timespec + { + public long tv_sec; // Seconds + public long tv_nsec; // Nanoseconds + } + + [LibraryImport("libc", SetLastError = true)] + private static partial int nanosleep(ref Timespec req, ref Timespec rem); + + /// + /// Convert a timeout in nanoseconds to a timespec for nanosleep. + /// + /// Timeout in nanoseconds + /// Timespec for nanosleep + private static Timespec GetTimespecFromNanoseconds(ulong nanoseconds) + { + return new Timespec + { + tv_sec = (long)(nanoseconds / 1_000_000_000), + tv_nsec = (long)(nanoseconds % 1_000_000_000) + }; + } + + /// + /// Sleep for approximately a given time period in nanoseconds. + /// + /// Time to sleep for in nanoseconds + public static void Sleep(long nanoseconds) + { + nanoseconds -= GetBias(nanoseconds); + + if (nanoseconds >= 0) + { + Timespec req = GetTimespecFromNanoseconds((ulong)nanoseconds); + Timespec rem = new(); + + nanosleep(ref req, ref rem); + } + } + + /// + /// Sleep for at most a given time period in nanoseconds. + /// Uses a stricter bias to wake before the requested duration. + /// + /// + /// Due to OS scheduling behaviour, this timeframe may still be missed. + /// + /// Maximum allowed time for sleep + public static void SleepAtMost(long nanoseconds) + { + // Stricter bias to ensure we wake before the timepoint. + nanoseconds -= GetStrictBias(nanoseconds); + + if (nanoseconds >= 0) + { + Timespec req = GetTimespecFromNanoseconds((ulong)nanoseconds); + Timespec rem = new(); + + nanosleep(ref req, ref rem); + } + } + } +} diff --git a/src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs new file mode 100644 index 000000000..f54fb09c1 --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/NanosleepEvent.cs @@ -0,0 +1,84 @@ +using System; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Common.PreciseSleep +{ + /// + /// A precise sleep event for linux and macos that uses nanosleep for more precise timeouts. + /// + [SupportedOSPlatform("macos")] + [SupportedOSPlatform("linux")] + [SupportedOSPlatform("android")] + [SupportedOSPlatform("ios")] + internal class NanosleepEvent : IPreciseSleepEvent + { + private readonly AutoResetEvent _waitEvent = new(false); + private readonly NanosleepPool _pool; + + public NanosleepEvent() + { + _pool = new NanosleepPool(_waitEvent); + } + + public long AdjustTimePoint(long timePoint, long timeoutNs) + { + // No adjustment + return timePoint; + } + + public bool SleepUntil(long timePoint) + { + long now = PerformanceCounter.ElapsedTicks; + long delta = (timePoint - now); + long ms = Math.Min(delta / PerformanceCounter.TicksPerMillisecond, int.MaxValue); + long ns = (delta * 1_000_000) / PerformanceCounter.TicksPerMillisecond; + + if (ms > 0) + { + _waitEvent.WaitOne((int)ms); + + return true; + } + else if (ns - Nanosleep.Bias > 0) + { + // Don't bother starting a sleep if there's already a signal active. + if (_waitEvent.WaitOne(0)) + { + return true; + } + + // The 1ms wait will be interrupted by the nanosleep timeout if it completes. + if (!_pool.SleepAndSignal(ns, timePoint)) + { + // Too many threads on the pool. + return false; + } + _waitEvent.WaitOne(1); + _pool.IgnoreSignal(); + + return true; + } + + return false; + } + + public void Sleep() + { + _waitEvent.WaitOne(); + } + + public void Signal() + { + _waitEvent.Set(); + } + + public void Dispose() + { + GC.SuppressFinalize(this); + + _pool.Dispose(); + _waitEvent.Dispose(); + } + } +} diff --git a/src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs b/src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs new file mode 100644 index 000000000..c0973dcb3 --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/NanosleepPool.cs @@ -0,0 +1,228 @@ +using System; +using System.Collections.Generic; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Common.PreciseSleep +{ + /// + /// A pool of threads used to allow "interruptable" nanosleep for a single target event. + /// + [SupportedOSPlatform("macos")] + [SupportedOSPlatform("linux")] + [SupportedOSPlatform("android")] + [SupportedOSPlatform("ios")] + internal class NanosleepPool : IDisposable + { + public const int MaxThreads = 8; + + /// + /// A thread that nanosleeps and may signal an event on wake. + /// When a thread is assigned a nanosleep to perform, it also gets a signal ID. + /// The pool's target event is only signalled if this ID matches the latest dispatched one. + /// + private class NanosleepThread : IDisposable + { + private static readonly long _timePointEpsilon; + + static NanosleepThread() + { + _timePointEpsilon = PerformanceCounter.TicksPerMillisecond / 100; // 0.01ms + } + + private readonly Thread _thread; + private readonly NanosleepPool _parent; + private readonly AutoResetEvent _newWaitEvent; + private bool _running = true; + + private long _signalId; + private long _nanoseconds; + private long _timePoint; + + public long SignalId => _signalId; + + /// + /// Creates a new NanosleepThread for a parent pool, with a specified thread ID. + /// + /// Parent NanosleepPool + /// Thread ID + public NanosleepThread(NanosleepPool parent, int id) + { + _parent = parent; + _newWaitEvent = new(false); + + _thread = new Thread(Loop) + { + Name = $"Common.Nanosleep.{id}", + Priority = ThreadPriority.AboveNormal, + IsBackground = true + }; + + _thread.Start(); + } + + /// + /// Service requests to perform a nanosleep, signal parent pool when complete. + /// + private void Loop() + { + _newWaitEvent.WaitOne(); + + while (_running) + { + Nanosleep.Sleep(_nanoseconds); + + _parent.Signal(this); + _newWaitEvent.WaitOne(); + } + + _newWaitEvent.Dispose(); + } + + /// + /// Assign a nanosleep for this thread to perform, then signal at the end. + /// + /// Nanoseconds to sleep + /// Signal ID + /// Target timepoint + public void SleepAndSignal(long nanoseconds, long signalId, long timePoint) + { + _signalId = signalId; + _nanoseconds = nanoseconds; + _timePoint = timePoint; + _newWaitEvent.Set(); + } + + /// + /// Resurrect an active nanosleep's signal if its target timepoint is a close enough match. + /// + /// New signal id to assign the nanosleep + /// Target timepoint + /// True if resurrected, false otherwise + public bool Resurrect(long signalId, long timePoint) + { + if (Math.Abs(timePoint - _timePoint) < _timePointEpsilon) + { + _signalId = signalId; + + return true; + } + + return false; + } + + /// + /// Dispose the NanosleepThread, interrupting its worker loop. + /// + public void Dispose() + { + if (_running) + { + _running = false; + _newWaitEvent.Set(); + } + } + } + + private readonly object _lock = new(); + private readonly List _threads = new(); + private readonly List _active = new(); + private readonly Stack _free = new(); + private readonly AutoResetEvent _signalTarget; + + private long _signalId; + + /// + /// Creates a new NanosleepPool with a target event to signal when a nanosleep completes. + /// + /// Event to signal when nanosleeps complete + public NanosleepPool(AutoResetEvent signalTarget) + { + _signalTarget = signalTarget; + } + + /// + /// Signal the target event (if the source sleep has not been superseded) + /// and free the nanosleep thread. + /// + /// Nanosleep thread that completed + private void Signal(NanosleepThread thread) + { + lock (_lock) + { + _active.Remove(thread); + _free.Push(thread); + + if (thread.SignalId == _signalId) + { + _signalTarget.Set(); + } + } + } + + /// + /// Sleep for the given number of nanoseconds and signal the target event. + /// This does not block the caller thread. + /// + /// Nanoseconds to sleep + /// Target timepoint + /// True if the signal will be set, false otherwise + public bool SleepAndSignal(long nanoseconds, long timePoint) + { + lock (_lock) + { + _signalId++; + + // Check active sleeps, if any line up with the requested timepoint then resurrect that nanosleep. + foreach (NanosleepThread existing in _active) + { + if (existing.Resurrect(_signalId, timePoint)) + { + return true; + } + } + + if (!_free.TryPop(out NanosleepThread thread)) + { + if (_threads.Count >= MaxThreads) + { + return false; + } + + thread = new NanosleepThread(this, _threads.Count); + + _threads.Add(thread); + } + + _active.Add(thread); + + thread.SleepAndSignal(nanoseconds, _signalId, timePoint); + + return true; + } + } + + /// + /// Ignore the latest nanosleep. + /// + public void IgnoreSignal() + { + _signalId++; + } + + /// + /// Dispose the NanosleepPool, disposing all of its active threads. + /// + public void Dispose() + { + GC.SuppressFinalize(this); + + foreach (NanosleepThread thread in _threads) + { + thread.Dispose(); + } + + _threads.Clear(); + } + } +} diff --git a/src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs b/src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs new file mode 100644 index 000000000..3c30a7f60 --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/PreciseSleepHelper.cs @@ -0,0 +1,104 @@ +using Ryujinx.Common.SystemInterop; +using System; +using System.Threading; + +namespace Ryujinx.Common.PreciseSleep +{ + public static class PreciseSleepHelper + { + /// + /// Create a precise sleep event for the current platform. + /// + /// A precise sleep event + public static IPreciseSleepEvent CreateEvent() + { + if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsIOS() || OperatingSystem.IsAndroid()) + { + return new NanosleepEvent(); + } + else if (OperatingSystem.IsWindows()) + { + return new WindowsSleepEvent(); + } + else + { + return new SleepEvent(); + } + } + + /// + /// Sleeps up to the closest point to the timepoint that the OS reasonably allows. + /// The provided event is used by the timer to wake the current thread, and should not be signalled from any other source. + /// + /// Event used to wake this thread + /// Target timepoint in host ticks + public static void SleepUntilTimePoint(EventWaitHandle evt, long timePoint) + { + if (OperatingSystem.IsWindows()) + { + WindowsGranularTimer.Instance.SleepUntilTimePointWithoutExternalSignal(evt, timePoint); + } + else + { + // Events might oversleep by a little, depending on OS. + // We don't want to miss the timepoint, so bias the wait to be lower. + // Nanosleep can possibly handle it better, too. + long accuracyBias = PerformanceCounter.TicksPerMillisecond / 2; + long now = PerformanceCounter.ElapsedTicks + accuracyBias; + long ms = Math.Min((timePoint - now) / PerformanceCounter.TicksPerMillisecond, int.MaxValue); + + if (ms > 0) + { + evt.WaitOne((int)ms); + } + + if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS() || OperatingSystem.IsIOS() || OperatingSystem.IsAndroid()) + { + // Do a nanosleep. + now = PerformanceCounter.ElapsedTicks; + long ns = ((timePoint - now) * 1_000_000) / PerformanceCounter.TicksPerMillisecond; + + Nanosleep.SleepAtMost(ns); + } + } + } + + /// + /// Spinwait until the given timepoint. If wakeSignal is or becomes 1, return early. + /// Thread is allowed to yield. + /// + /// Target timepoint in host ticks + /// Returns early if this is set to 1 + public static void SpinWaitUntilTimePoint(long timePoint, ref long wakeSignal) + { + SpinWait spinWait = new(); + + while (Interlocked.Read(ref wakeSignal) != 1 && PerformanceCounter.ElapsedTicks < timePoint) + { + // Our time is close - don't let SpinWait go off and potentially Thread.Sleep(). + if (spinWait.NextSpinWillYield) + { + Thread.Yield(); + + spinWait.Reset(); + } + else + { + spinWait.SpinOnce(); + } + } + } + + /// + /// Spinwait until the given timepoint, with no opportunity to wake early. + /// + /// Target timepoint in host ticks + public static void SpinWaitUntilTimePoint(long timePoint) + { + while (PerformanceCounter.ElapsedTicks < timePoint) + { + Thread.SpinWait(5); + } + } + } +} diff --git a/src/Ryujinx.Common/PreciseSleep/SleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/SleepEvent.cs new file mode 100644 index 000000000..f0769d1e4 --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/SleepEvent.cs @@ -0,0 +1,51 @@ +using System; +using System.Threading; + +namespace Ryujinx.Common.PreciseSleep +{ + /// + /// A cross-platform precise sleep event that has millisecond granularity. + /// + internal class SleepEvent : IPreciseSleepEvent + { + private readonly AutoResetEvent _waitEvent = new(false); + + public long AdjustTimePoint(long timePoint, long timeoutNs) + { + // No adjustment + return timePoint; + } + + public bool SleepUntil(long timePoint) + { + long now = PerformanceCounter.ElapsedTicks; + long ms = Math.Min((timePoint - now) / PerformanceCounter.TicksPerMillisecond, int.MaxValue); + + if (ms > 0) + { + _waitEvent.WaitOne((int)ms); + + return true; + } + + return false; + } + + public void Sleep() + { + _waitEvent.WaitOne(); + } + + public void Signal() + { + _waitEvent.Set(); + } + + public void Dispose() + { + GC.SuppressFinalize(this); + + _waitEvent.Dispose(); + } + } +} diff --git a/src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs b/src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs new file mode 100644 index 000000000..a0de16341 --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/WindowsGranularTimer.cs @@ -0,0 +1,220 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Common.SystemInterop +{ + /// + /// Timer that attempts to align with the hardware timer interrupt, + /// and can alert listeners on ticks. + /// + [SupportedOSPlatform("windows")] + internal partial class WindowsGranularTimer + { + private const int MinimumGranularity = 5000; + + private static readonly WindowsGranularTimer _instance = new(); + public static WindowsGranularTimer Instance => _instance; + + private readonly struct WaitingObject + { + public readonly long Id; + public readonly EventWaitHandle Signal; + public readonly long TimePoint; + + public WaitingObject(long id, EventWaitHandle signal, long timePoint) + { + Id = id; + Signal = signal; + TimePoint = timePoint; + } + } + + [LibraryImport("ntdll.dll", SetLastError = true)] + private static partial int NtSetTimerResolution(int DesiredResolution, [MarshalAs(UnmanagedType.Bool)] bool SetResolution, out int CurrentResolution); + + [LibraryImport("ntdll.dll", SetLastError = true)] + private static partial int NtQueryTimerResolution(out int MaximumResolution, out int MinimumResolution, out int CurrentResolution); + + [LibraryImport("ntdll.dll", SetLastError = true)] + private static partial uint NtDelayExecution([MarshalAs(UnmanagedType.Bool)] bool Alertable, ref long DelayInterval); + + public long GranularityNs => _granularityNs; + public long GranularityTicks => _granularityTicks; + + private readonly Thread _timerThread; + private long _granularityNs = MinimumGranularity * 100L; + private long _granularityTicks; + private long _lastTicks = PerformanceCounter.ElapsedTicks; + private long _lastId; + + private readonly object _lock = new(); + private readonly List _waitingObjects = new(); + + private WindowsGranularTimer() + { + _timerThread = new Thread(Loop) + { + IsBackground = true, + Name = "Common.WindowsTimer", + Priority = ThreadPriority.Highest + }; + + _timerThread.Start(); + } + + /// + /// Measure and initialize the timer's target granularity. + /// + private void Initialize() + { + NtQueryTimerResolution(out _, out int min, out int curr); + + if (min > 0) + { + min = Math.Max(min, MinimumGranularity); + + _granularityNs = min * 100L; + NtSetTimerResolution(min, true, out _); + } + else + { + _granularityNs = curr * 100L; + } + + _granularityTicks = (_granularityNs * PerformanceCounter.TicksPerMillisecond) / 1_000_000; + } + + /// + /// Main loop for the timer thread. Wakes every clock tick and signals any listeners, + /// as well as keeping track of clock alignment. + /// + private void Loop() + { + Initialize(); + while (true) + { + long delayInterval = -1; // Next tick + NtSetTimerResolution((int)(_granularityNs / 100), true, out _); + NtDelayExecution(false, ref delayInterval); + + long newTicks = PerformanceCounter.ElapsedTicks; + long nextTicks = newTicks + _granularityTicks; + + lock (_lock) + { + for (int i = 0; i < _waitingObjects.Count; i++) + { + if (nextTicks > _waitingObjects[i].TimePoint) + { + // The next clock tick will be after the timepoint, we need to signal now. + _waitingObjects[i].Signal.Set(); + + _waitingObjects.RemoveAt(i--); + } + } + + _lastTicks = newTicks; + } + } + } + + /// + /// Sleep until a timepoint. + /// + /// Reset event to use to be awoken by the clock tick, or an external signal + /// Target timepoint + /// True if waited or signalled, false otherwise + public bool SleepUntilTimePoint(AutoResetEvent evt, long timePoint) + { + if (evt.WaitOne(0)) + { + return true; + } + + long id; + + lock (_lock) + { + // Return immediately if the next tick is after the requested timepoint. + long nextTicks = _lastTicks + _granularityTicks; + + if (nextTicks > timePoint) + { + return false; + } + + id = ++_lastId; + + _waitingObjects.Add(new WaitingObject(id, evt, timePoint)); + } + + evt.WaitOne(); + + lock (_lock) + { + for (int i = 0; i < _waitingObjects.Count; i++) + { + if (id == _waitingObjects[i].Id) + { + _waitingObjects.RemoveAt(i--); + break; + } + } + } + + return true; + } + + /// + /// Sleep until a timepoint, but don't expect any external signals. + /// + /// + /// Saves some effort compared to the sleep that expects to be signalled. + /// + /// Reset event to use to be awoken by the clock tick + /// Target timepoint + /// True if waited, false otherwise + public bool SleepUntilTimePointWithoutExternalSignal(EventWaitHandle evt, long timePoint) + { + long id; + + lock (_lock) + { + // Return immediately if the next tick is after the requested timepoint. + long nextTicks = _lastTicks + _granularityTicks; + + if (nextTicks > timePoint) + { + return false; + } + + id = ++_lastId; + + _waitingObjects.Add(new WaitingObject(id, evt, timePoint)); + } + + evt.WaitOne(); + + return true; + } + + /// + /// Returns the two nearest clock ticks for a given timepoint. + /// + /// Target timepoint + /// The nearest clock ticks before and after the given timepoint + public (long, long) ReturnNearestTicks(long timePoint) + { + long last = _lastTicks; + long delta = timePoint - last; + + long lowTicks = delta / _granularityTicks; + long highTicks = (delta + _granularityTicks - 1) / _granularityTicks; + + return (last + lowTicks * _granularityTicks, last + highTicks * _granularityTicks); + } + } +} diff --git a/src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs b/src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs new file mode 100644 index 000000000..87c10d18e --- /dev/null +++ b/src/Ryujinx.Common/PreciseSleep/WindowsSleepEvent.cs @@ -0,0 +1,92 @@ +using Ryujinx.Common.SystemInterop; +using System; +using System.Runtime.Versioning; +using System.Threading; + +namespace Ryujinx.Common.PreciseSleep +{ + /// + /// A precise sleep event that uses Windows specific methods to increase clock resolution beyond 1ms, + /// use the clock's phase for more precise waits, and potentially align timepoints with it. + /// + [SupportedOSPlatform("windows")] + internal class WindowsSleepEvent : IPreciseSleepEvent + { + /// + /// The clock can drift a bit, so add this to encourage the clock to still wait if the next tick is forecasted slightly before it. + /// + private const long ErrorBias = 50000; + + /// + /// Allowed to be 0.05ms away from the clock granularity to reduce precision. + /// + private const long ClockAlignedBias = 50000; + + /// + /// The fraction of clock granularity above the timepoint that will align it down to the lower timepoint. + /// Currently set to the lower 1/4, so for 0.5ms granularity: 0.1ms would be rounded down, 0.2 ms would be rounded up. + /// + private const long ReverseTimePointFraction = 4; + + private readonly AutoResetEvent _waitEvent = new(false); + private readonly WindowsGranularTimer _timer = WindowsGranularTimer.Instance; + + /// + /// Set to true to disable timepoint realignment. + /// + public bool Precise { get; set; } = false; + + public long AdjustTimePoint(long timePoint, long timeoutNs) + { + if (Precise || timePoint == long.MaxValue) + { + return timePoint; + } + + // Does the timeout align with the host clock? + + long granularity = _timer.GranularityNs; + long misalignment = timeoutNs % granularity; + + if ((misalignment < ClockAlignedBias || misalignment > granularity - ClockAlignedBias) && timeoutNs > ClockAlignedBias) + { + // Inaccurate sleep for 0.5ms increments, typically. + + (long low, long high) = _timer.ReturnNearestTicks(timePoint); + + if (timePoint - low < _timer.GranularityTicks / ReverseTimePointFraction) + { + timePoint = low; + } + else + { + timePoint = high; + } + } + + return timePoint; + } + + public bool SleepUntil(long timePoint) + { + return _timer.SleepUntilTimePoint(_waitEvent, timePoint + (ErrorBias * PerformanceCounter.TicksPerMillisecond) / 1_000_000); + } + + public void Sleep() + { + _waitEvent.WaitOne(); + } + + public void Signal() + { + _waitEvent.Set(); + } + + public void Dispose() + { + GC.SuppressFinalize(this); + + _waitEvent.Dispose(); + } + } +} diff --git a/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs b/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs index 499bc2c61..3c5fa067f 100644 --- a/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs +++ b/src/Ryujinx.HLE/HOS/Kernel/Common/KTimeManager.cs @@ -1,4 +1,5 @@ using Ryujinx.Common; +using Ryujinx.Common.PreciseSleep; using System; using System.Collections.Generic; using System.Threading; @@ -23,7 +24,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common private readonly KernelContext _context; private readonly List _waitingObjects; - private AutoResetEvent _waitEvent; + private IPreciseSleepEvent _waitEvent; private bool _keepRunning; private long _enforceWakeupFromSpinWait; @@ -54,6 +55,8 @@ namespace Ryujinx.HLE.HOS.Kernel.Common timePoint = long.MaxValue; } + timePoint = _waitEvent.AdjustTimePoint(timePoint, timeout); + lock (_context.CriticalSection.Lock) { _waitingObjects.Add(new WaitingObject(schedulerObj, timePoint)); @@ -64,7 +67,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common } } - _waitEvent.Set(); + _waitEvent.Signal(); } public void UnscheduleFutureInvocation(IKFutureSchedulerObject schedulerObj) @@ -83,10 +86,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Common private void WaitAndCheckScheduledObjects() { - SpinWait spinWait = new(); WaitingObject next; - using (_waitEvent = new AutoResetEvent(false)) + using (_waitEvent = PreciseSleepHelper.CreateEvent()) { while (_keepRunning) { @@ -103,30 +105,9 @@ namespace Ryujinx.HLE.HOS.Kernel.Common if (next.TimePoint > timePoint) { - long ms = Math.Min((next.TimePoint - timePoint) / PerformanceCounter.TicksPerMillisecond, int.MaxValue); - - if (ms > 0) + if (!_waitEvent.SleepUntil(next.TimePoint)) { - _waitEvent.WaitOne((int)ms); - } - else - { - while (Interlocked.Read(ref _enforceWakeupFromSpinWait) != 1 && PerformanceCounter.ElapsedTicks < next.TimePoint) - { - // Our time is close - don't let SpinWait go off and potentially Thread.Sleep(). - if (spinWait.NextSpinWillYield) - { - Thread.Yield(); - - spinWait.Reset(); - } - else - { - spinWait.SpinOnce(); - } - } - - spinWait.Reset(); + PreciseSleepHelper.SpinWaitUntilTimePoint(next.TimePoint, ref _enforceWakeupFromSpinWait); } } @@ -145,7 +126,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common } else { - _waitEvent.WaitOne(); + _waitEvent.Sleep(); } } } @@ -212,7 +193,7 @@ namespace Ryujinx.HLE.HOS.Kernel.Common public void Dispose() { _keepRunning = false; - _waitEvent?.Set(); + _waitEvent?.Signal(); } } } diff --git a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs index d3d9dc030..712d640c2 100644 --- a/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs +++ b/src/Ryujinx.HLE/HOS/Services/SurfaceFlinger/SurfaceFlinger.cs @@ -1,5 +1,7 @@ -using Ryujinx.Common.Configuration; +using Ryujinx.Common; +using Ryujinx.Common.Configuration; using Ryujinx.Common.Logging; +using Ryujinx.Common.PreciseSleep; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu; using Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvMap; @@ -23,9 +25,7 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger private readonly Thread _composerThread; - private readonly Stopwatch _chrono; - - private readonly ManualResetEvent _event = new(false); + private readonly AutoResetEvent _event = new(false); private readonly AutoResetEvent _nextFrameEvent = new(true); private long _ticks; private long _ticksPerFrame; @@ -64,11 +64,9 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger _composerThread = new Thread(HandleComposition) { Name = "SurfaceFlinger.Composer", + Priority = ThreadPriority.AboveNormal }; - _chrono = new Stopwatch(); - _chrono.Start(); - _ticks = 0; _spinTicks = Stopwatch.Frequency / 500; _1msTicks = Stopwatch.Frequency / 1000; @@ -299,11 +297,11 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger { _isRunning = true; - long lastTicks = _chrono.ElapsedTicks; + long lastTicks = PerformanceCounter.ElapsedTicks; while (_isRunning) { - long ticks = _chrono.ElapsedTicks; + long ticks = PerformanceCounter.ElapsedTicks; if (_swapInterval == 0) { @@ -336,21 +334,16 @@ namespace Ryujinx.HLE.HOS.Services.SurfaceFlinger } // Sleep if possible. If the time til the next frame is too low, spin wait instead. - long diff = _ticksPerFrame - (_ticks + _chrono.ElapsedTicks - ticks); + long diff = _ticksPerFrame - (_ticks + PerformanceCounter.ElapsedTicks - ticks); if (diff > 0) { + PreciseSleepHelper.SleepUntilTimePoint(_event, PerformanceCounter.ElapsedTicks + diff); + + diff = _ticksPerFrame - (_ticks + PerformanceCounter.ElapsedTicks - ticks); + if (diff < _spinTicks) { - do - { - // SpinWait is a little more HT/SMT friendly than aggressively updating/checking ticks. - // The value of 5 still gives us quite a bit of precision (~0.0003ms variance at worst) while waiting a reasonable amount of time. - Thread.SpinWait(5); - - ticks = _chrono.ElapsedTicks; - _ticks += ticks - lastTicks; - lastTicks = ticks; - } while (_ticks < _ticksPerFrame); + PreciseSleepHelper.SpinWaitUntilTimePoint(PerformanceCounter.ElapsedTicks + diff); } else {