nvservices: mitigate abort with heavy load on the GPU processing thread (#1173)

* nvservices: mitigate abort with heavy load on the GPU processing thread. This should fix Mario Tennis and LM3 regressions with syncpoints. NOTE: Mario Tennis seems to have another issue related to the texture cache that happens randomly when starting a match. PS: Also add a debug logger for all known ioctl call to facilitate debugging and add a missing UpdateMin in EventSignal. * Address LDj3SNuD's comment * Address gdkchan's comment
2020-05-01 23:18:42 +02:00 · 2020-05-01 23:18:42 +02:00 · 81cba3c3df
commit 81cba3c3df
parent 71dbb38b9a
3 changed files with 90 additions and 16 deletions
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvDeviceFile.cs
@ -1,7 +1,9 @@
-using Ryujinx.HLE.HOS.Kernel.Memory;
+using Ryujinx.Common.Logging;
 using Ryujinx.HLE.HOS.Kernel.Memory;
 using Ryujinx.HLE.HOS.Kernel.Process;
 using System;
 using System.Diagnostics;
 using System.Reflection;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
@ -50,11 +52,18 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
        protected delegate NvInternalResult IoctlProcessorInline<T, T1>(ref T arguments, ref T1 inlineData);
        protected delegate NvInternalResult IoctlProcessorInlineSpan<T, T1>(ref T arguments, Span<T1> inlineData);
        private static NvInternalResult PrintResult(MethodInfo info, NvInternalResult result)
        {
            Logger.PrintDebug(LogClass.ServiceNv, $"{info.Name} returned result {result}");
            return result;
        }
        protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessor<T> callback, Span<byte> arguments) where T : struct
        {
            Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
-            return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]);
+            return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]));
        }
        protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInline<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
@ -62,19 +71,19 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
            Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
            Debug.Assert(inlineBuffer.Length == Unsafe.SizeOf<T1>());
-            return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]);
+            return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]));
        }
        protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessorSpan<T> callback, Span<byte> arguments) where T : struct
        {
-            return callback(MemoryMarshal.Cast<byte, T>(arguments));
+            return PrintResult(callback.Method, callback(MemoryMarshal.Cast<byte, T>(arguments)));
        }
        protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInlineSpan<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
        {
            Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
-            return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer));
+            return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer)));
        }
        public abstract void Close();
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/NvHostCtrlDeviceFile.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/NvHostCtrlDeviceFile.cs
@ -310,6 +310,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
            hostEvent.State = NvHostEventState.Cancelled;
            _device.System.HostSyncpoint.UpdateMin(hostEvent.Fence.Id);
            return NvInternalResult.Success;
        }
@ -398,8 +400,10 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
                hostEvent.State == NvHostEventState.Signaled  ||
                hostEvent.State == NvHostEventState.Cancelled))
            {
-                hostEvent.Wait(_device.Gpu, fence);
+                bool timedOut = hostEvent.Wait(_device.Gpu, fence);
                if (timedOut)
                {
                    if (isWaitEventCmd)
                    {
                        value = ((fence.Id & 0xfff) << 16) | 0x10000000;
@ -414,6 +418,13 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
                    result = NvInternalResult.TryAgain;
                }
                else
                {
                    value = fence.Value;
                    return NvInternalResult.Success;
                }
            }
            else
            {
                Logger.PrintError(LogClass.ServiceNv, $"Invalid Event at index {eventIndex} (isWaitEventAsyncCmd: {isWaitEventAsyncCmd}, isWaitEventCmd: {isWaitEventCmd})");
--- a/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostEvent.cs
+++ b/Ryujinx.HLE/HOS/Services/Nv/NvDrvServices/NvHostCtrl/Types/NvHostEvent.cs
@ -1,8 +1,10 @@
 using Ryujinx.Common.Logging;
 using Ryujinx.Graphics.Gpu;
 using Ryujinx.Graphics.Gpu.Synchronization;
 using Ryujinx.HLE.HOS.Kernel.Threading;
 using Ryujinx.HLE.HOS.Services.Nv.Types;
 using System;
 using System.Threading;
 namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
 {
@ -16,6 +18,15 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
        private NvHostSyncpt          _syncpointManager;
        private SyncpointWaiterHandle _waiterInformation;
        private NvFence _previousFailingFence;
        private uint    _failingCount;
        /// <summary>
        /// Max failing count until waiting on CPU.
        /// FIXME: This seems enough for most of the cases, reduce if needed.
        /// </summary>
        private const uint FailingCountMax = 2;
        public NvHostEvent(NvHostSyncpt syncpointManager, uint eventId, Horizon system)
        {
            Fence.Id = 0;
@ -27,11 +38,20 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
            _eventId = eventId;
            _syncpointManager = syncpointManager;
            ResetFailingState();
        }
        private void ResetFailingState()
        {
            _previousFailingFence.Id    = NvFence.InvalidSyncPointId;
            _previousFailingFence.Value = 0;
            _failingCount               = 0;
        }
        public void Reset()
        {
-            Fence.Id    = NvFence.InvalidSyncPointId;
+            Fence.Id    = 0;
            Fence.Value = 0;
            State       = NvHostEventState.Available;
        }
@ -52,6 +72,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
        private void GpuSignaled()
        {
            ResetFailingState();
            Signal();
        }
@ -61,18 +83,50 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
            {
                gpuContext.Synchronization.UnregisterCallback(Fence.Id, _waiterInformation);
                if (_previousFailingFence.Id == Fence.Id && _previousFailingFence.Value == Fence.Value)
                {
                    _failingCount++;
                }
                else
                {
                    _failingCount = 1;
                    _previousFailingFence = Fence;
                }
                Signal();
            }
            Event.WritableEvent.Clear();
        }
-        public void Wait(GpuContext gpuContext, NvFence fence)
+        public bool Wait(GpuContext gpuContext, NvFence fence)
        {
            Fence = fence;
            State = NvHostEventState.Waiting;
            // NOTE: nvservices code should always wait on the GPU side.
            //       If we do this, we may get an abort or undefined behaviour when the GPU processing thread is blocked for a long period (for example, during shader compilation).
            //       The reason for this is that the NVN code will try to wait until giving up.
            //       This is done by trying to wait and signal multiple times until aborting after you are past the timeout.
            //       As such, if it fails too many time, we enforce a wait on the CPU side indefinitely.
            //       This allows to keep GPU and CPU in sync when we are slow.
            if (_failingCount == FailingCountMax)
            {
                Logger.PrintWarning(LogClass.ServiceNv, "GPU processing thread is too slow, waiting on CPU...");
                bool timedOut = Fence.Wait(gpuContext, Timeout.InfiniteTimeSpan);
                GpuSignaled();
                return timedOut;
            }
            else
            {
                _waiterInformation = gpuContext.Synchronization.RegisterCallbackOnSyncpoint(Fence.Id, Fence.Value, GpuSignaled);
                return true;
            }
        }
        public string DumpState(GpuContext gpuContext)