nvservices: mitigate abort with heavy load on the GPU processing thread (#1173)

* nvservices: mitigate abort with heavy load on the GPU processing thread.

This should fix Mario Tennis and LM3 regressions with syncpoints.

NOTE: Mario Tennis seems to have another issue related to the texture
cache that happens randomly when starting a match.

PS: Also add a debug logger for all known ioctl call to facilitate
debugging and add a missing UpdateMin in EventSignal.

* Address LDj3SNuD's comment

* Address gdkchan's comment
This commit is contained in:
Thog 2020-05-01 23:18:42 +02:00 committed by GitHub
parent 71dbb38b9a
commit 81cba3c3df
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 90 additions and 16 deletions

View file

@ -1,7 +1,9 @@
using Ryujinx.HLE.HOS.Kernel.Memory; using Ryujinx.Common.Logging;
using Ryujinx.HLE.HOS.Kernel.Memory;
using Ryujinx.HLE.HOS.Kernel.Process; using Ryujinx.HLE.HOS.Kernel.Process;
using System; using System;
using System.Diagnostics; using System.Diagnostics;
using System.Reflection;
using System.Runtime.CompilerServices; using System.Runtime.CompilerServices;
using System.Runtime.InteropServices; using System.Runtime.InteropServices;
@ -50,11 +52,18 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
protected delegate NvInternalResult IoctlProcessorInline<T, T1>(ref T arguments, ref T1 inlineData); protected delegate NvInternalResult IoctlProcessorInline<T, T1>(ref T arguments, ref T1 inlineData);
protected delegate NvInternalResult IoctlProcessorInlineSpan<T, T1>(ref T arguments, Span<T1> inlineData); protected delegate NvInternalResult IoctlProcessorInlineSpan<T, T1>(ref T arguments, Span<T1> inlineData);
private static NvInternalResult PrintResult(MethodInfo info, NvInternalResult result)
{
Logger.PrintDebug(LogClass.ServiceNv, $"{info.Name} returned result {result}");
return result;
}
protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessor<T> callback, Span<byte> arguments) where T : struct protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessor<T> callback, Span<byte> arguments) where T : struct
{ {
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>()); Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]); return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]));
} }
protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInline<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInline<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
@ -62,19 +71,19 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>()); Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
Debug.Assert(inlineBuffer.Length == Unsafe.SizeOf<T1>()); Debug.Assert(inlineBuffer.Length == Unsafe.SizeOf<T1>());
return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]); return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]));
} }
protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessorSpan<T> callback, Span<byte> arguments) where T : struct protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessorSpan<T> callback, Span<byte> arguments) where T : struct
{ {
return callback(MemoryMarshal.Cast<byte, T>(arguments)); return PrintResult(callback.Method, callback(MemoryMarshal.Cast<byte, T>(arguments)));
} }
protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInlineSpan<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInlineSpan<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
{ {
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>()); Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer)); return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer)));
} }
public abstract void Close(); public abstract void Close();

View file

@ -310,6 +310,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
hostEvent.State = NvHostEventState.Cancelled; hostEvent.State = NvHostEventState.Cancelled;
_device.System.HostSyncpoint.UpdateMin(hostEvent.Fence.Id);
return NvInternalResult.Success; return NvInternalResult.Success;
} }
@ -398,8 +400,10 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
hostEvent.State == NvHostEventState.Signaled || hostEvent.State == NvHostEventState.Signaled ||
hostEvent.State == NvHostEventState.Cancelled)) hostEvent.State == NvHostEventState.Cancelled))
{ {
hostEvent.Wait(_device.Gpu, fence); bool timedOut = hostEvent.Wait(_device.Gpu, fence);
if (timedOut)
{
if (isWaitEventCmd) if (isWaitEventCmd)
{ {
value = ((fence.Id & 0xfff) << 16) | 0x10000000; value = ((fence.Id & 0xfff) << 16) | 0x10000000;
@ -414,6 +418,13 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
result = NvInternalResult.TryAgain; result = NvInternalResult.TryAgain;
} }
else else
{
value = fence.Value;
return NvInternalResult.Success;
}
}
else
{ {
Logger.PrintError(LogClass.ServiceNv, $"Invalid Event at index {eventIndex} (isWaitEventAsyncCmd: {isWaitEventAsyncCmd}, isWaitEventCmd: {isWaitEventCmd})"); Logger.PrintError(LogClass.ServiceNv, $"Invalid Event at index {eventIndex} (isWaitEventAsyncCmd: {isWaitEventAsyncCmd}, isWaitEventCmd: {isWaitEventCmd})");

View file

@ -1,8 +1,10 @@
using Ryujinx.Common.Logging;
using Ryujinx.Graphics.Gpu; using Ryujinx.Graphics.Gpu;
using Ryujinx.Graphics.Gpu.Synchronization; using Ryujinx.Graphics.Gpu.Synchronization;
using Ryujinx.HLE.HOS.Kernel.Threading; using Ryujinx.HLE.HOS.Kernel.Threading;
using Ryujinx.HLE.HOS.Services.Nv.Types; using Ryujinx.HLE.HOS.Services.Nv.Types;
using System; using System;
using System.Threading;
namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
{ {
@ -16,6 +18,15 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
private NvHostSyncpt _syncpointManager; private NvHostSyncpt _syncpointManager;
private SyncpointWaiterHandle _waiterInformation; private SyncpointWaiterHandle _waiterInformation;
private NvFence _previousFailingFence;
private uint _failingCount;
/// <summary>
/// Max failing count until waiting on CPU.
/// FIXME: This seems enough for most of the cases, reduce if needed.
/// </summary>
private const uint FailingCountMax = 2;
public NvHostEvent(NvHostSyncpt syncpointManager, uint eventId, Horizon system) public NvHostEvent(NvHostSyncpt syncpointManager, uint eventId, Horizon system)
{ {
Fence.Id = 0; Fence.Id = 0;
@ -27,11 +38,20 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
_eventId = eventId; _eventId = eventId;
_syncpointManager = syncpointManager; _syncpointManager = syncpointManager;
ResetFailingState();
}
private void ResetFailingState()
{
_previousFailingFence.Id = NvFence.InvalidSyncPointId;
_previousFailingFence.Value = 0;
_failingCount = 0;
} }
public void Reset() public void Reset()
{ {
Fence.Id = NvFence.InvalidSyncPointId; Fence.Id = 0;
Fence.Value = 0; Fence.Value = 0;
State = NvHostEventState.Available; State = NvHostEventState.Available;
} }
@ -52,6 +72,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
private void GpuSignaled() private void GpuSignaled()
{ {
ResetFailingState();
Signal(); Signal();
} }
@ -61,18 +83,50 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
{ {
gpuContext.Synchronization.UnregisterCallback(Fence.Id, _waiterInformation); gpuContext.Synchronization.UnregisterCallback(Fence.Id, _waiterInformation);
if (_previousFailingFence.Id == Fence.Id && _previousFailingFence.Value == Fence.Value)
{
_failingCount++;
}
else
{
_failingCount = 1;
_previousFailingFence = Fence;
}
Signal(); Signal();
} }
Event.WritableEvent.Clear(); Event.WritableEvent.Clear();
} }
public void Wait(GpuContext gpuContext, NvFence fence) public bool Wait(GpuContext gpuContext, NvFence fence)
{ {
Fence = fence; Fence = fence;
State = NvHostEventState.Waiting; State = NvHostEventState.Waiting;
// NOTE: nvservices code should always wait on the GPU side.
// If we do this, we may get an abort or undefined behaviour when the GPU processing thread is blocked for a long period (for example, during shader compilation).
// The reason for this is that the NVN code will try to wait until giving up.
// This is done by trying to wait and signal multiple times until aborting after you are past the timeout.
// As such, if it fails too many time, we enforce a wait on the CPU side indefinitely.
// This allows to keep GPU and CPU in sync when we are slow.
if (_failingCount == FailingCountMax)
{
Logger.PrintWarning(LogClass.ServiceNv, "GPU processing thread is too slow, waiting on CPU...");
bool timedOut = Fence.Wait(gpuContext, Timeout.InfiniteTimeSpan);
GpuSignaled();
return timedOut;
}
else
{
_waiterInformation = gpuContext.Synchronization.RegisterCallbackOnSyncpoint(Fence.Id, Fence.Value, GpuSignaled); _waiterInformation = gpuContext.Synchronization.RegisterCallbackOnSyncpoint(Fence.Id, Fence.Value, GpuSignaled);
return true;
}
} }
public string DumpState(GpuContext gpuContext) public string DumpState(GpuContext gpuContext)