From 8e1adb95cf7f67b976f105f4cac26d3ff2986057 Mon Sep 17 00:00:00 2001 From: mpnico Date: Thu, 26 Aug 2021 23:50:28 +0200 Subject: [PATCH] Add support for HLE macros and accelerate MultiDrawElementsIndirectCount #2 (#2557) * Add support for HLE macros and accelerate MultiDrawElementsIndirectCount * Add missing barrier * Fix index buffer count * Add support check for each macro hle before use * Add missing xml doc Co-authored-by: gdkchan --- Ryujinx.Common/Hash128.cs | 6 + Ryujinx.Graphics.GAL/Capabilities.cs | 3 + Ryujinx.Graphics.GAL/IPipeline.cs | 5 + .../Engine/GPFifo/GPFifoClass.cs | 9 +- .../Engine/GPFifo/GPFifoDevice.cs | 30 +++- .../Engine/GPFifo/GPFifoProcessor.cs | 19 ++- Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs | 29 +++- Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs | 51 +++++-- Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs | 142 ++++++++++++++++++ .../Engine/MME/MacroHLEFunctionName.cs | 11 ++ .../Engine/MME/MacroHLETable.cs | 89 +++++++++++ .../Engine/MME/MacroInterpreter.cs | 8 +- Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs | 2 +- .../Engine/MME/MacroJitContext.cs | 8 +- .../Engine/Threed/DrawManager.cs | 59 ++++++++ .../Engine/Threed/ThreedClass.cs | 21 +++ Ryujinx.Graphics.Gpu/GraphicsConfig.cs | 5 + Ryujinx.Graphics.Gpu/Memory/BufferCache.cs | 12 ++ Ryujinx.Graphics.OpenGL/HwCapabilities.cs | 2 + Ryujinx.Graphics.OpenGL/Pipeline.cs | 58 ++++++- Ryujinx.Graphics.OpenGL/Renderer.cs | 1 + Ryujinx.Graphics.OpenGL/VertexArray.cs | 22 ++- 22 files changed, 552 insertions(+), 40 deletions(-) create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs create mode 100644 Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs diff --git a/Ryujinx.Common/Hash128.cs b/Ryujinx.Common/Hash128.cs index 99cd015c..04457bd0 100644 --- a/Ryujinx.Common/Hash128.cs +++ b/Ryujinx.Common/Hash128.cs @@ -9,6 +9,12 @@ namespace Ryujinx.Common public ulong Low; public ulong High; + public Hash128(ulong low, ulong high) + { + Low = low; + High = high; + } + public override string ToString() { return $"{High:x16}{Low:x16}"; diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index 937c3f5b..78a99554 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.GAL public bool SupportsNonConstantTextureOffset { get; } public bool SupportsTextureShadowLod { get; } public bool SupportsViewportSwizzle { get; } + public bool SupportsIndirectParameters { get; } public int MaximumComputeSharedMemorySize { get; } public float MaximumSupportedAnisotropy { get; } @@ -25,6 +26,7 @@ namespace Ryujinx.Graphics.GAL bool supportsNonConstantTextureOffset, bool supportsTextureShadowLod, bool supportsViewportSwizzle, + bool supportsIndirectParameters, int maximumComputeSharedMemorySize, float maximumSupportedAnisotropy, int storageBufferOffsetAlignment) @@ -37,6 +39,7 @@ namespace Ryujinx.Graphics.GAL SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; SupportsTextureShadowLod = supportsTextureShadowLod; SupportsViewportSwizzle = supportsViewportSwizzle; + SupportsIndirectParameters = supportsIndirectParameters; MaximumComputeSharedMemorySize = maximumComputeSharedMemorySize; MaximumSupportedAnisotropy = maximumSupportedAnisotropy; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; diff --git a/Ryujinx.Graphics.GAL/IPipeline.cs b/Ryujinx.Graphics.GAL/IPipeline.cs index b2f9d5cb..a5af6391 100644 --- a/Ryujinx.Graphics.GAL/IPipeline.cs +++ b/Ryujinx.Graphics.GAL/IPipeline.cs @@ -19,6 +19,8 @@ namespace Ryujinx.Graphics.GAL int stencilValue, int stencilMask); + void CommandBufferBarrier(); + void CopyBuffer(BufferHandle source, BufferHandle destination, int srcOffset, int dstOffset, int size); void DispatchCompute(int groupsX, int groupsY, int groupsZ); @@ -33,6 +35,9 @@ namespace Ryujinx.Graphics.GAL void EndTransformFeedback(); + void MultiDrawIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride); + void MultiDrawIndexedIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride); + void SetAlphaTest(bool enable, float reference, CompareOp op); void SetBlendState(int index, BlendDescriptor blend); diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs index 28822f4e..fe49b0f2 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs @@ -161,6 +161,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// Method call argument public void SetReference(int argument) { + _context.Renderer.Pipeline.CommandBufferBarrier(); + _context.CreateHostSyncIfNeeded(); } @@ -195,10 +197,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// Pushes an argument to a macro. /// /// Index of the macro + /// GPU virtual address where the command word is located /// Argument to be pushed to the macro - public void MmePushArgument(int index, int argument) + public void MmePushArgument(int index, ulong gpuVa, int argument) { - _macros[index].PushArgument(argument); + _macros[index].PushArgument(gpuVa, argument); } /// @@ -208,7 +211,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// Initial argument passed to the macro public void MmeStart(int index, int argument) { - _macros[index].StartExecution(argument); + _macros[index].StartExecution(_context, _parent, _macroCode, argument); } /// diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs index ada3bc4b..b3de738d 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoDevice.cs @@ -54,11 +54,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// /// Fetch the command buffer. /// - public void Fetch(MemoryManager memoryManager) + /// If true, flushes potential GPU written data before reading the command buffer + public void Fetch(MemoryManager memoryManager, bool flush = true) { if (Words == null) { - Words = MemoryMarshal.Cast(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, true)).ToArray(); + Words = MemoryMarshal.Cast(memoryManager.GetSpan(EntryAddress, (int)EntryCount * 4, flush)).ToArray(); } } } @@ -73,6 +74,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo private readonly AutoResetEvent _event; private bool _interrupt; + private int _flushSkips; /// /// Creates a new instance of the GPU General Purpose FIFO device. @@ -188,8 +190,16 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo // Process command buffers. while (_ibEnable && !_interrupt && _commandBufferQueue.TryDequeue(out CommandBuffer entry)) { + bool flushCommandBuffer = true; + + if (_flushSkips != 0) + { + _flushSkips--; + flushCommandBuffer = false; + } + _currentCommandBuffer = entry; - _currentCommandBuffer.Fetch(entry.Processor.MemoryManager); + _currentCommandBuffer.Fetch(entry.Processor.MemoryManager, flushCommandBuffer); // If we are changing the current channel, // we need to force all the host state to be updated. @@ -199,12 +209,24 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo entry.Processor.ForceAllDirty(); } - entry.Processor.Process(_currentCommandBuffer.Words); + entry.Processor.Process(entry.EntryAddress, _currentCommandBuffer.Words); } _interrupt = false; } + /// + /// Sets the number of flushes that should be skipped for subsequent command buffers. + /// + /// + /// This can improve performance when command buffer data only needs to be consumed by the GPU. + /// + /// The amount of flushes that should be skipped + internal void SetFlushSkips(int count) + { + _flushSkips = count; + } + /// /// Interrupts command processing. This will break out of the DispatchCalls loop. /// diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs index ea34d6cd..096b795c 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs @@ -28,6 +28,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// public MemoryManager MemoryManager => _channel.MemoryManager; + /// + /// 3D Engine. + /// + public ThreedClass ThreedClass => _3dClass; + /// /// Internal GPFIFO state. /// @@ -70,13 +75,16 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// /// Processes a command buffer. /// + /// Base GPU virtual address of the command buffer /// Command buffer - public void Process(ReadOnlySpan commandBuffer) + public void Process(ulong baseGpuVa, ReadOnlySpan commandBuffer) { for (int index = 0; index < commandBuffer.Length; index++) { int command = commandBuffer[index]; + ulong gpuVa = baseGpuVa + (ulong)index * 4; + if (_state.MethodCount != 0) { if (TryFastI2mBufferUpdate(commandBuffer, ref index)) @@ -84,7 +92,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo continue; } - Send(_state.Method, command, _state.SubChannel, _state.MethodCount <= 1); + Send(gpuVa, _state.Method, command, _state.SubChannel, _state.MethodCount <= 1); if (!_state.NonIncrementing) { @@ -120,7 +128,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo _state.NonIncrementing = meth.SecOp == SecOp.NonIncMethod; break; case SecOp.ImmdDataMethod: - Send(meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true); + Send(gpuVa, meth.MethodAddress, meth.ImmdData, meth.MethodSubchannel, true); break; } } @@ -198,8 +206,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// /// Sends a uncompressed method for processing by the graphics pipeline. /// + /// GPU virtual address where the command word is located /// Method to be processed - private void Send(int offset, int argument, int subChannel, bool isLastCall) + private void Send(ulong gpuVa, int offset, int argument, int subChannel, bool isLastCall) { if (offset < 0x60) { @@ -243,7 +252,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo if ((offset & 1) != 0) { - _fifoClass.MmePushArgument(macroIndex, argument); + _fifoClass.MmePushArgument(macroIndex, gpuVa, argument); } else { diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs b/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs index b957de08..640687f0 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MME/IMacroEE.cs @@ -4,6 +4,33 @@ using System.Collections.Generic; namespace Ryujinx.Graphics.Gpu.Engine.MME { + /// + /// FIFO word. + /// + struct FifoWord + { + /// + /// GPU virtual address where the word is located in memory. + /// + public ulong GpuVa { get; } + + /// + /// Word value. + /// + public int Word { get; } + + /// + /// Creates a new FIFO word. + /// + /// GPU virtual address where the word is located in memory + /// Word value + public FifoWord(ulong gpuVa, int word) + { + GpuVa = gpuVa; + Word = word; + } + } + /// /// Macro Execution Engine interface. /// @@ -12,7 +39,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// /// Arguments FIFO. /// - Queue Fifo { get; } + Queue Fifo { get; } /// /// Should execute the GPU Macro code being passed. diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs b/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs index 1a79afb9..9d1dbc8f 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MME/Macro.cs @@ -1,4 +1,6 @@ using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Engine.GPFifo; +using Ryujinx.Graphics.Gpu.Memory; using System; namespace Ryujinx.Graphics.Gpu.Engine.MME @@ -13,10 +15,10 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// public int Position { get; } + private IMacroEE _executionEngine; private bool _executionPending; private int _argument; - - private readonly IMacroEE _executionEngine; + private MacroHLEFunctionName _hleFunction; /// /// Creates a new instance of the GPU cached macro program. @@ -26,28 +28,47 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME { Position = position; + _executionEngine = null; _executionPending = false; _argument = 0; - - if (GraphicsConfig.EnableMacroJit) - { - _executionEngine = new MacroJit(); - } - else - { - _executionEngine = new MacroInterpreter(); - } + _hleFunction = MacroHLEFunctionName.None; } /// /// Sets the first argument for the macro call. /// + /// GPU context where the macro code is being executed + /// GPU GP FIFO command processor + /// Code to be executed /// First argument - public void StartExecution(int argument) + public void StartExecution(GpuContext context, GPFifoProcessor processor, ReadOnlySpan code, int argument) { _argument = argument; _executionPending = true; + + if (_executionEngine == null) + { + if (GraphicsConfig.EnableMacroHLE && MacroHLETable.TryGetMacroHLEFunction(code.Slice(Position), context.Capabilities, out _hleFunction)) + { + _executionEngine = new MacroHLE(processor, _hleFunction); + } + else if (GraphicsConfig.EnableMacroJit) + { + _executionEngine = new MacroJit(); + } + else + { + _executionEngine = new MacroInterpreter(); + } + } + + if (_hleFunction == MacroHLEFunctionName.MultiDrawElementsIndirectCount) + { + // We don't consume the parameter buffer value, so we don't need to flush it. + // Doing so improves performance if the value was written by a GPU shader. + context.GPFifo.SetFlushSkips(2); + } } /// @@ -60,7 +81,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME if (_executionPending) { _executionPending = false; - _executionEngine?.Execute(code.Slice(Position), state, _argument); } } @@ -68,10 +88,11 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// /// Pushes an argument to the macro call argument FIFO. /// + /// GPU virtual address where the command word is located /// Argument to be pushed - public void PushArgument(int argument) + public void PushArgument(ulong gpuVa, int argument) { - _executionEngine?.Fifo.Enqueue(argument); + _executionEngine?.Fifo.Enqueue(new FifoWord(gpuVa, argument)); } } } diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs new file mode 100644 index 00000000..77b44e81 --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs @@ -0,0 +1,142 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.GPFifo; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Memory; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// Macro High-level emulation. + /// + class MacroHLE : IMacroEE + { + private readonly GPFifoProcessor _processor; + private readonly MacroHLEFunctionName _functionName; + + /// + /// Arguments FIFO. + /// + public Queue Fifo { get; } + + /// + /// Creates a new instance of the HLE macro handler. + /// + /// GPU context the macro is being executed on + /// GPU memory manager + /// 3D engine where this macro is being called + /// Name of the HLE macro function to be called + public MacroHLE(GPFifoProcessor processor, MacroHLEFunctionName functionName) + { + _processor = processor; + _functionName = functionName; + + Fifo = new Queue(); + } + + /// + /// Executes a macro program until it exits. + /// + /// Code of the program to execute + /// GPU state at the time of the call + /// Optional argument passed to the program, 0 if not used + public void Execute(ReadOnlySpan code, IDeviceState state, int arg0) + { + switch (_functionName) + { + case MacroHLEFunctionName.MultiDrawElementsIndirectCount: + MultiDrawElementsIndirectCount(state, arg0); + break; + default: + throw new NotImplementedException(_functionName.ToString()); + } + } + + /// + /// Performs a indirect multi-draw, with parameters from a GPU buffer. + /// + /// GPU state at the time of the call + /// First argument of the call + private void MultiDrawElementsIndirectCount(IDeviceState state, int arg0) + { + int arg1 = FetchParam().Word; + int arg2 = FetchParam().Word; + int arg3 = FetchParam().Word; + + int startOffset = arg0; + int endOffset = arg1; + var topology = (PrimitiveTopology)arg2; + int paddingWords = arg3; + int maxDrawCount = endOffset - startOffset; + int stride = paddingWords * 4 + 0x14; + int indirectBufferSize = maxDrawCount * stride; + + ulong parameterBufferGpuVa = FetchParam().GpuVa; + ulong indirectBufferGpuVa = 0; + + int indexCount = 0; + + for (int i = 0; i < maxDrawCount; i++) + { + var count = FetchParam(); + var instanceCount = FetchParam(); + var firstIndex = FetchParam(); + var baseVertex = FetchParam(); + var baseInstance = FetchParam(); + + if (i == 0) + { + indirectBufferGpuVa = count.GpuVa; + } + + indexCount = Math.Max(indexCount, count.Word + firstIndex.Word); + + if (i != maxDrawCount - 1) + { + for (int j = 0; j < paddingWords; j++) + { + FetchParam(); + } + } + } + + // It should be empty at this point, but clear it just to be safe. + Fifo.Clear(); + + var parameterBuffer = _processor.MemoryManager.Physical.BufferCache.GetGpuBufferRange(_processor.MemoryManager, parameterBufferGpuVa, 4); + var indirectBuffer = _processor.MemoryManager.Physical.BufferCache.GetGpuBufferRange(_processor.MemoryManager, indirectBufferGpuVa, (ulong)indirectBufferSize); + + _processor.ThreedClass.MultiDrawIndirectCount(indexCount, topology, indirectBuffer, parameterBuffer, maxDrawCount, stride); + } + + /// + /// Fetches a arguments from the arguments FIFO. + /// + /// The call argument, or a 0 value with null address if the FIFO is empty + private FifoWord FetchParam() + { + if (!Fifo.TryDequeue(out var value)) + { + Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); + + return new FifoWord(0UL, 0); + } + + return value; + } + + /// + /// Performs a GPU method call. + /// + /// Current GPU state + /// Address, in words, of the method + /// Call argument + private static void Send(IDeviceState state, int methAddr, int value) + { + state.Write(methAddr * 4, value); + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs new file mode 100644 index 00000000..60354a9b --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLEFunctionName.cs @@ -0,0 +1,11 @@ +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// Name of the High-level implementation of a Macro function. + /// + enum MacroHLEFunctionName + { + None, + MultiDrawElementsIndirectCount + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs new file mode 100644 index 00000000..77d041ad --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLETable.cs @@ -0,0 +1,89 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.MME +{ + /// + /// Table with information about High-level implementations of GPU Macro code. + /// + static class MacroHLETable + { + /// + /// Macroo High-level implementation table entry. + /// + struct TableEntry + { + /// + /// Name of the Macro function. + /// + public MacroHLEFunctionName Name { get; } + + /// + /// Hash of the original binary Macro function code. + /// + public Hash128 Hash { get; } + + /// + /// Size (in bytes) of the original binary Macro function code. + /// + public int Length { get; } + + /// + /// Creates a new table entry. + /// + /// Name of the Macro function + /// Hash of the original binary Macro function code + /// Size (in bytes) of the original binary Macro function code + public TableEntry(MacroHLEFunctionName name, Hash128 hash, int length) + { + Name = name; + Hash = hash; + Length = length; + } + } + + private static readonly TableEntry[] Table = new TableEntry[] + { + new TableEntry(MacroHLEFunctionName.MultiDrawElementsIndirectCount, new Hash128(0x890AF57ED3FB1C37, 0x35D0C95C61F5386F), 0x19C) + }; + + private static bool IsMacroHLESupported(Capabilities caps, MacroHLEFunctionName name) + { + if (name == MacroHLEFunctionName.MultiDrawElementsIndirectCount) + { + return caps.SupportsIndirectParameters; + } + + return false; + } + + /// + /// Checks if there's a fast, High-level implementation of the specified Macro code available. + /// + /// Macro code to be checked + /// Renderer capabilities to check for this macro HLE support + /// Name of the function if a implementation is available and supported, otherwise + /// True if there is a implementation available and supported, false otherwise + public static bool TryGetMacroHLEFunction(ReadOnlySpan code, Capabilities caps, out MacroHLEFunctionName name) + { + var mc = MemoryMarshal.Cast(code); + + for (int i = 0; i < Table.Length; i++) + { + ref var entry = ref Table[i]; + + var hash = XXHash128.ComputeHash(mc.Slice(0, entry.Length)); + if (hash == entry.Hash) + { + name = entry.Name; + return IsMacroHLESupported(caps, name); + } + } + + name = MacroHLEFunctionName.None; + return false; + } + } +} diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs index 0173a7fb..df6ee040 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroInterpreter.cs @@ -13,7 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// /// Arguments FIFO. /// - public Queue Fifo { get; } + public Queue Fifo { get; } private int[] _gprs; @@ -34,7 +34,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// public MacroInterpreter() { - Fifo = new Queue(); + Fifo = new Queue(); _gprs = new int[8]; } @@ -364,14 +364,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// The call argument, or 0 if the FIFO is empty private int FetchParam() { - if (!Fifo.TryDequeue(out int value)) + if (!Fifo.TryDequeue(out var value)) { Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); return 0; } - return value; + return value.Word; } /// diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs index f0393dd1..4077f74e 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJit.cs @@ -14,7 +14,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// /// Arguments FIFO. /// - public Queue Fifo => _context.Fifo; + public Queue Fifo => _context.Fifo; private MacroJitCompiler.MacroExecute _execute; diff --git a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs index aa31c9ee..52c2a11b 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitContext.cs @@ -12,22 +12,22 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME /// /// Arguments FIFO. /// - public Queue Fifo { get; } = new Queue(); + public Queue Fifo { get; } = new Queue(); /// /// Fetches a arguments from the arguments FIFO. /// - /// + /// The call argument, or 0 if the FIFO is empty public int FetchParam() { - if (!Fifo.TryDequeue(out int value)) + if (!Fifo.TryDequeue(out var value)) { Logger.Warning?.Print(LogClass.Gpu, "Macro attempted to fetch an inexistent argument."); return 0; } - return value; + return value.Word; } /// diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs index d58f175d..2443917c 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -26,6 +26,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed private int _instanceIndex; + private const int IndexBufferCountMethodOffset = 0x5f8; + /// /// Creates a new instance of the draw manager. /// @@ -304,6 +306,63 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _drawState.DrawIndexed = oldDrawIndexed; } + /// + /// Performs a indirect multi-draw, with parameters from a GPU buffer. + /// + /// 3D engine where this method is being called + /// Primitive topology + /// GPU buffer with the draw parameters, such as count, first index, etc + /// GPU buffer with the draw count + /// Maximum number of draws that can be made + /// Distance in bytes between each element on the array + public void MultiDrawIndirectCount( + ThreedClass engine, + int indexCount, + PrimitiveTopology topology, + BufferRange indirectBuffer, + BufferRange parameterBuffer, + int maxDrawCount, + int stride) + { + engine.Write(IndexBufferCountMethodOffset * 4, indexCount); + + _context.Renderer.Pipeline.SetPrimitiveTopology(topology); + _drawState.Topology = topology; + + ConditionalRenderEnabled renderEnable = ConditionalRendering.GetRenderEnable( + _context, + _channel.MemoryManager, + _state.State.RenderEnableAddress, + _state.State.RenderEnableCondition); + + if (renderEnable == ConditionalRenderEnabled.False) + { + _drawState.DrawIndexed = false; + return; + } + + _drawState.FirstIndex = _state.State.IndexBufferState.First; + _drawState.IndexCount = indexCount; + + engine.UpdateState(); + + if (_drawState.DrawIndexed) + { + _context.Renderer.Pipeline.MultiDrawIndexedIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride); + } + else + { + _context.Renderer.Pipeline.MultiDrawIndirectCount(indirectBuffer, parameterBuffer, maxDrawCount, stride); + } + + _drawState.DrawIndexed = false; + + if (renderEnable == ConditionalRenderEnabled.Host) + { + _context.Renderer.Pipeline.EndHostConditionalRendering(); + } + } + /// /// Perform any deferred draws. /// This is used for instanced draws. diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs index 37c8fec2..3d02af96 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; using System; using System.Collections.Generic; @@ -433,5 +434,25 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { return 0; } + + /// + /// Performs a indirect multi-draw, with parameters from a GPU buffer. + /// + /// Index Buffer Count + /// Primitive topology + /// GPU buffer with the draw parameters, such as count, first index, etc + /// GPU buffer with the draw count + /// Maximum number of draws that can be made + /// Distance in bytes between each element on the array + public void MultiDrawIndirectCount( + int indexCount, + PrimitiveTopology topology, + BufferRange indirectBuffer, + BufferRange parameterBuffer, + int maxDrawCount, + int stride) + { + _drawManager.MultiDrawIndirectCount(this, indexCount, topology, indirectBuffer, parameterBuffer, maxDrawCount, stride); + } } } diff --git a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs index 7ef102e2..d58b8da7 100644 --- a/Ryujinx.Graphics.Gpu/GraphicsConfig.cs +++ b/Ryujinx.Graphics.Gpu/GraphicsConfig.cs @@ -33,6 +33,11 @@ namespace Ryujinx.Graphics.Gpu /// public static bool EnableMacroJit = true; + /// + /// Enables or disables high-level emulation of common GPU Macro code. + /// + public static bool EnableMacroHLE = true; + /// /// Title id of the current running game. /// Used by the shader cache. diff --git a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs index 58dd838e..63d22150 100644 --- a/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs +++ b/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs @@ -297,6 +297,18 @@ namespace Ryujinx.Graphics.Gpu.Memory buffer.SignalModified(address, size); } + /// + /// Gets a buffer sub-range for a given GPU memory range. + /// + /// GPU memory manager where the buffer is mapped + /// Start GPU virtual address of the buffer + /// Size in bytes of the buffer + /// The buffer sub-range for the given range + public BufferRange GetGpuBufferRange(MemoryManager memoryManager, ulong gpuVa, ulong size) + { + return GetBufferRange(TranslateAndCreateBuffer(memoryManager, gpuVa, size), size); + } + /// /// Gets a buffer sub-range starting at a given memory address. /// diff --git a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index 44365ca7..dd917b7b 100644 --- a/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -13,6 +13,7 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _supportsSeamlessCubemapPerTexture = new Lazy(() => HasExtension("GL_ARB_seamless_cubemap_per_texture")); private static readonly Lazy _supportsTextureShadowLod = new Lazy(() => HasExtension("GL_EXT_texture_shadow_lod")); private static readonly Lazy _supportsViewportSwizzle = new Lazy(() => HasExtension("GL_NV_viewport_swizzle")); + private static readonly Lazy _supportsIndirectParameters = new Lazy(() => HasExtension("GL_ARB_indirect_parameters")); private static readonly Lazy _maximumComputeSharedMemorySize = new Lazy(() => GetLimit(All.MaxComputeSharedMemorySize)); private static readonly Lazy _storageBufferOffsetAlignment = new Lazy(() => GetLimit(All.ShaderStorageBufferOffsetAlignment)); @@ -46,6 +47,7 @@ namespace Ryujinx.Graphics.OpenGL public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value; public static bool SupportsTextureShadowLod => _supportsTextureShadowLod.Value; public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value; + public static bool SupportsIndirectParameters => _supportsIndirectParameters.Value; public static bool SupportsMismatchingViewFormat => _gpuVendor.Value != GpuVendor.AmdWindows && _gpuVendor.Value != GpuVendor.IntelWindows; public static bool SupportsNonConstantTextureOffset => _gpuVendor.Value == GpuVendor.Nvidia; diff --git a/Ryujinx.Graphics.OpenGL/Pipeline.cs b/Ryujinx.Graphics.OpenGL/Pipeline.cs index be526fa9..24dd97f9 100644 --- a/Ryujinx.Graphics.OpenGL/Pipeline.cs +++ b/Ryujinx.Graphics.OpenGL/Pipeline.cs @@ -166,6 +166,11 @@ namespace Ryujinx.Graphics.OpenGL } } + public void CommandBufferBarrier() + { + GL.MemoryBarrier(MemoryBarrierFlags.CommandBarrierBit); + } + public void CopyBuffer(BufferHandle source, BufferHandle destination, int srcOffset, int dstOffset, int size) { Buffer.Copy(source, destination, srcOffset, dstOffset, size); @@ -543,6 +548,57 @@ namespace Ryujinx.Graphics.OpenGL _tfEnabled = false; } + public void MultiDrawIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride) + { + if (!_program.IsLinked) + { + Logger.Debug?.Print(LogClass.Gpu, "Draw error, shader not linked."); + return; + } + + PreDraw(); + + GL.BindBuffer((BufferTarget)All.DrawIndirectBuffer, indirectBuffer.Handle.ToInt32()); + GL.BindBuffer((BufferTarget)All.ParameterBuffer, parameterBuffer.Handle.ToInt32()); + + GL.MultiDrawArraysIndirectCount( + _primitiveType, + (IntPtr)indirectBuffer.Offset, + (IntPtr)parameterBuffer.Offset, + maxDrawCount, + stride); + + PostDraw(); + } + + public void MultiDrawIndexedIndirectCount(BufferRange indirectBuffer, BufferRange parameterBuffer, int maxDrawCount, int stride) + { + if (!_program.IsLinked) + { + Logger.Debug?.Print(LogClass.Gpu, "Draw error, shader not linked."); + return; + } + + PreDraw(); + + _vertexArray.SetRangeOfIndexBuffer(); + + GL.BindBuffer((BufferTarget)All.DrawIndirectBuffer, indirectBuffer.Handle.ToInt32()); + GL.BindBuffer((BufferTarget)All.ParameterBuffer, parameterBuffer.Handle.ToInt32()); + + GL.MultiDrawElementsIndirectCount( + _primitiveType, + (Version46)_elementsType, + (IntPtr)indirectBuffer.Offset, + (IntPtr)parameterBuffer.Offset, + maxDrawCount, + stride); + + _vertexArray.RestoreIndexBuffer(); + + PostDraw(); + } + public void SetAlphaTest(bool enable, float reference, CompareOp op) { if (!enable) @@ -741,7 +797,7 @@ namespace Ryujinx.Graphics.OpenGL EnsureVertexArray(); - _vertexArray.SetIndexBuffer(buffer.Handle); + _vertexArray.SetIndexBuffer(buffer); } public void SetLogicOpState(bool enable, LogicalOp op) diff --git a/Ryujinx.Graphics.OpenGL/Renderer.cs b/Ryujinx.Graphics.OpenGL/Renderer.cs index 01072176..6b620bb8 100644 --- a/Ryujinx.Graphics.OpenGL/Renderer.cs +++ b/Ryujinx.Graphics.OpenGL/Renderer.cs @@ -107,6 +107,7 @@ namespace Ryujinx.Graphics.OpenGL HwCapabilities.SupportsNonConstantTextureOffset, HwCapabilities.SupportsTextureShadowLod, HwCapabilities.SupportsViewportSwizzle, + HwCapabilities.SupportsIndirectParameters, HwCapabilities.MaximumComputeSharedMemorySize, HwCapabilities.MaximumSupportedAnisotropy, HwCapabilities.StorageBufferOffsetAlignment); diff --git a/Ryujinx.Graphics.OpenGL/VertexArray.cs b/Ryujinx.Graphics.OpenGL/VertexArray.cs index f2fcba1f..bdf14481 100644 --- a/Ryujinx.Graphics.OpenGL/VertexArray.cs +++ b/Ryujinx.Graphics.OpenGL/VertexArray.cs @@ -20,12 +20,17 @@ namespace Ryujinx.Graphics.OpenGL private uint _vertexAttribsInUse; private uint _vertexBuffersInUse; + private BufferRange _indexBuffer; + private BufferHandle _tempIndexBuffer; + public VertexArray() { Handle = GL.GenVertexArray(); _vertexAttribs = new VertexAttribDescriptor[Constants.MaxVertexAttribs]; _vertexBuffers = new VertexBufferDescriptor[Constants.MaxVertexBuffers]; + + _tempIndexBuffer = Buffer.Create(); } public void Bind() @@ -120,9 +125,22 @@ namespace Ryujinx.Graphics.OpenGL } } - public void SetIndexBuffer(BufferHandle buffer) + public void SetIndexBuffer(BufferRange range) { - GL.BindBuffer(BufferTarget.ElementArrayBuffer, buffer.ToInt32()); + _indexBuffer = range; + GL.BindBuffer(BufferTarget.ElementArrayBuffer, range.Handle.ToInt32()); + } + + public void SetRangeOfIndexBuffer() + { + Buffer.Resize(_tempIndexBuffer, _indexBuffer.Size); + Buffer.Copy(_indexBuffer.Handle, _tempIndexBuffer, _indexBuffer.Offset, 0, _indexBuffer.Size); + GL.BindBuffer(BufferTarget.ElementArrayBuffer, _tempIndexBuffer.ToInt32()); + } + + public void RestoreIndexBuffer() + { + GL.BindBuffer(BufferTarget.ElementArrayBuffer, _indexBuffer.Handle.ToInt32()); } public void Validate()