nvservices: mitigate abort with heavy load on the GPU processing thread (#1173)
* nvservices: mitigate abort with heavy load on the GPU processing thread. This should fix Mario Tennis and LM3 regressions with syncpoints. NOTE: Mario Tennis seems to have another issue related to the texture cache that happens randomly when starting a match. PS: Also add a debug logger for all known ioctl call to facilitate debugging and add a missing UpdateMin in EventSignal. * Address LDj3SNuD's comment * Address gdkchan's comment
This commit is contained in:
parent
71dbb38b9a
commit
81cba3c3df
3 changed files with 90 additions and 16 deletions
|
@ -1,7 +1,9 @@
|
||||||
using Ryujinx.HLE.HOS.Kernel.Memory;
|
using Ryujinx.Common.Logging;
|
||||||
|
using Ryujinx.HLE.HOS.Kernel.Memory;
|
||||||
using Ryujinx.HLE.HOS.Kernel.Process;
|
using Ryujinx.HLE.HOS.Kernel.Process;
|
||||||
using System;
|
using System;
|
||||||
using System.Diagnostics;
|
using System.Diagnostics;
|
||||||
|
using System.Reflection;
|
||||||
using System.Runtime.CompilerServices;
|
using System.Runtime.CompilerServices;
|
||||||
using System.Runtime.InteropServices;
|
using System.Runtime.InteropServices;
|
||||||
|
|
||||||
|
@ -50,11 +52,18 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
|
||||||
protected delegate NvInternalResult IoctlProcessorInline<T, T1>(ref T arguments, ref T1 inlineData);
|
protected delegate NvInternalResult IoctlProcessorInline<T, T1>(ref T arguments, ref T1 inlineData);
|
||||||
protected delegate NvInternalResult IoctlProcessorInlineSpan<T, T1>(ref T arguments, Span<T1> inlineData);
|
protected delegate NvInternalResult IoctlProcessorInlineSpan<T, T1>(ref T arguments, Span<T1> inlineData);
|
||||||
|
|
||||||
|
private static NvInternalResult PrintResult(MethodInfo info, NvInternalResult result)
|
||||||
|
{
|
||||||
|
Logger.PrintDebug(LogClass.ServiceNv, $"{info.Name} returned result {result}");
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessor<T> callback, Span<byte> arguments) where T : struct
|
protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessor<T> callback, Span<byte> arguments) where T : struct
|
||||||
{
|
{
|
||||||
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
|
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
|
||||||
|
|
||||||
return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]);
|
return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInline<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
|
protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInline<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
|
||||||
|
@ -62,19 +71,19 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices
|
||||||
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
|
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
|
||||||
Debug.Assert(inlineBuffer.Length == Unsafe.SizeOf<T1>());
|
Debug.Assert(inlineBuffer.Length == Unsafe.SizeOf<T1>());
|
||||||
|
|
||||||
return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]);
|
return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], ref MemoryMarshal.Cast<byte, T1>(inlineBuffer)[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessorSpan<T> callback, Span<byte> arguments) where T : struct
|
protected static NvInternalResult CallIoctlMethod<T>(IoctlProcessorSpan<T> callback, Span<byte> arguments) where T : struct
|
||||||
{
|
{
|
||||||
return callback(MemoryMarshal.Cast<byte, T>(arguments));
|
return PrintResult(callback.Method, callback(MemoryMarshal.Cast<byte, T>(arguments)));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInlineSpan<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
|
protected static NvInternalResult CallIoctlMethod<T, T1>(IoctlProcessorInlineSpan<T, T1> callback, Span<byte> arguments, Span<byte> inlineBuffer) where T : struct where T1 : struct
|
||||||
{
|
{
|
||||||
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
|
Debug.Assert(arguments.Length == Unsafe.SizeOf<T>());
|
||||||
|
|
||||||
return callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer));
|
return PrintResult(callback.Method, callback(ref MemoryMarshal.Cast<byte, T>(arguments)[0], MemoryMarshal.Cast<byte, T1>(inlineBuffer)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract void Close();
|
public abstract void Close();
|
||||||
|
|
|
@ -310,6 +310,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
||||||
|
|
||||||
hostEvent.State = NvHostEventState.Cancelled;
|
hostEvent.State = NvHostEventState.Cancelled;
|
||||||
|
|
||||||
|
_device.System.HostSyncpoint.UpdateMin(hostEvent.Fence.Id);
|
||||||
|
|
||||||
return NvInternalResult.Success;
|
return NvInternalResult.Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -398,20 +400,29 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
||||||
hostEvent.State == NvHostEventState.Signaled ||
|
hostEvent.State == NvHostEventState.Signaled ||
|
||||||
hostEvent.State == NvHostEventState.Cancelled))
|
hostEvent.State == NvHostEventState.Cancelled))
|
||||||
{
|
{
|
||||||
hostEvent.Wait(_device.Gpu, fence);
|
bool timedOut = hostEvent.Wait(_device.Gpu, fence);
|
||||||
|
|
||||||
if (isWaitEventCmd)
|
if (timedOut)
|
||||||
{
|
{
|
||||||
value = ((fence.Id & 0xfff) << 16) | 0x10000000;
|
if (isWaitEventCmd)
|
||||||
|
{
|
||||||
|
value = ((fence.Id & 0xfff) << 16) | 0x10000000;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
value = fence.Id << 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
value |= eventIndex;
|
||||||
|
|
||||||
|
result = NvInternalResult.TryAgain;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
value = fence.Id << 4;
|
value = fence.Value;
|
||||||
|
|
||||||
|
return NvInternalResult.Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
value |= eventIndex;
|
|
||||||
|
|
||||||
result = NvInternalResult.TryAgain;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
|
using Ryujinx.Common.Logging;
|
||||||
using Ryujinx.Graphics.Gpu;
|
using Ryujinx.Graphics.Gpu;
|
||||||
using Ryujinx.Graphics.Gpu.Synchronization;
|
using Ryujinx.Graphics.Gpu.Synchronization;
|
||||||
using Ryujinx.HLE.HOS.Kernel.Threading;
|
using Ryujinx.HLE.HOS.Kernel.Threading;
|
||||||
using Ryujinx.HLE.HOS.Services.Nv.Types;
|
using Ryujinx.HLE.HOS.Services.Nv.Types;
|
||||||
using System;
|
using System;
|
||||||
|
using System.Threading;
|
||||||
|
|
||||||
namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
||||||
{
|
{
|
||||||
|
@ -16,6 +18,15 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
||||||
private NvHostSyncpt _syncpointManager;
|
private NvHostSyncpt _syncpointManager;
|
||||||
private SyncpointWaiterHandle _waiterInformation;
|
private SyncpointWaiterHandle _waiterInformation;
|
||||||
|
|
||||||
|
private NvFence _previousFailingFence;
|
||||||
|
private uint _failingCount;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Max failing count until waiting on CPU.
|
||||||
|
/// FIXME: This seems enough for most of the cases, reduce if needed.
|
||||||
|
/// </summary>
|
||||||
|
private const uint FailingCountMax = 2;
|
||||||
|
|
||||||
public NvHostEvent(NvHostSyncpt syncpointManager, uint eventId, Horizon system)
|
public NvHostEvent(NvHostSyncpt syncpointManager, uint eventId, Horizon system)
|
||||||
{
|
{
|
||||||
Fence.Id = 0;
|
Fence.Id = 0;
|
||||||
|
@ -27,11 +38,20 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
||||||
_eventId = eventId;
|
_eventId = eventId;
|
||||||
|
|
||||||
_syncpointManager = syncpointManager;
|
_syncpointManager = syncpointManager;
|
||||||
|
|
||||||
|
ResetFailingState();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ResetFailingState()
|
||||||
|
{
|
||||||
|
_previousFailingFence.Id = NvFence.InvalidSyncPointId;
|
||||||
|
_previousFailingFence.Value = 0;
|
||||||
|
_failingCount = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Reset()
|
public void Reset()
|
||||||
{
|
{
|
||||||
Fence.Id = NvFence.InvalidSyncPointId;
|
Fence.Id = 0;
|
||||||
Fence.Value = 0;
|
Fence.Value = 0;
|
||||||
State = NvHostEventState.Available;
|
State = NvHostEventState.Available;
|
||||||
}
|
}
|
||||||
|
@ -52,6 +72,8 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
||||||
|
|
||||||
private void GpuSignaled()
|
private void GpuSignaled()
|
||||||
{
|
{
|
||||||
|
ResetFailingState();
|
||||||
|
|
||||||
Signal();
|
Signal();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -61,18 +83,50 @@ namespace Ryujinx.HLE.HOS.Services.Nv.NvDrvServices.NvHostCtrl
|
||||||
{
|
{
|
||||||
gpuContext.Synchronization.UnregisterCallback(Fence.Id, _waiterInformation);
|
gpuContext.Synchronization.UnregisterCallback(Fence.Id, _waiterInformation);
|
||||||
|
|
||||||
|
if (_previousFailingFence.Id == Fence.Id && _previousFailingFence.Value == Fence.Value)
|
||||||
|
{
|
||||||
|
_failingCount++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_failingCount = 1;
|
||||||
|
|
||||||
|
_previousFailingFence = Fence;
|
||||||
|
}
|
||||||
|
|
||||||
Signal();
|
Signal();
|
||||||
}
|
}
|
||||||
|
|
||||||
Event.WritableEvent.Clear();
|
Event.WritableEvent.Clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Wait(GpuContext gpuContext, NvFence fence)
|
public bool Wait(GpuContext gpuContext, NvFence fence)
|
||||||
{
|
{
|
||||||
Fence = fence;
|
Fence = fence;
|
||||||
State = NvHostEventState.Waiting;
|
State = NvHostEventState.Waiting;
|
||||||
|
|
||||||
_waiterInformation = gpuContext.Synchronization.RegisterCallbackOnSyncpoint(Fence.Id, Fence.Value, GpuSignaled);
|
// NOTE: nvservices code should always wait on the GPU side.
|
||||||
|
// If we do this, we may get an abort or undefined behaviour when the GPU processing thread is blocked for a long period (for example, during shader compilation).
|
||||||
|
// The reason for this is that the NVN code will try to wait until giving up.
|
||||||
|
// This is done by trying to wait and signal multiple times until aborting after you are past the timeout.
|
||||||
|
// As such, if it fails too many time, we enforce a wait on the CPU side indefinitely.
|
||||||
|
// This allows to keep GPU and CPU in sync when we are slow.
|
||||||
|
if (_failingCount == FailingCountMax)
|
||||||
|
{
|
||||||
|
Logger.PrintWarning(LogClass.ServiceNv, "GPU processing thread is too slow, waiting on CPU...");
|
||||||
|
|
||||||
|
bool timedOut = Fence.Wait(gpuContext, Timeout.InfiniteTimeSpan);
|
||||||
|
|
||||||
|
GpuSignaled();
|
||||||
|
|
||||||
|
return timedOut;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_waiterInformation = gpuContext.Synchronization.RegisterCallbackOnSyncpoint(Fence.Id, Fence.Value, GpuSignaled);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public string DumpState(GpuContext gpuContext)
|
public string DumpState(GpuContext gpuContext)
|
||||||
|
|
Reference in a new issue