From 3a3b51893ee272af49d762387da5b27743786d56 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 14 May 2024 11:47:16 -0300 Subject: [PATCH] Add support for bindless textures from storage buffer on Vulkan (#6721) * Halve primitive ID when converting quads to triangles * Shader cache version bump * Add support for bindless textures from storage buffer on Vulkan --- src/Ryujinx.Graphics.GAL/Capabilities.cs | 3 +++ .../Shader/DiskCache/DiskCacheGpuAccessor.cs | 14 +++++++++++-- .../Shader/DiskCache/DiskCacheHostStorage.cs | 2 +- .../DiskCache/ParallelDiskCacheLoader.cs | 8 +++++--- .../Shader/GpuAccessor.cs | 20 ++++++++++++++++--- .../Shader/GpuChannelGraphicsState.cs | 11 ++++++++-- .../Shader/ShaderCache.cs | 2 +- src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 3 ++- .../GpuGraphicsState.cs | 10 +++++++++- src/Ryujinx.Graphics.Shader/IGpuAccessor.cs | 1 + .../Instructions/InstEmitAttribute.cs | 10 ++++++++++ .../Optimizations/BindlessElimination.cs | 4 ++-- .../Translation/ShaderDefinitions.cs | 2 ++ src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 3 ++- 14 files changed, 76 insertions(+), 17 deletions(-) diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index 70736fbd..779ce5b5 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -36,6 +36,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsMismatchingViewFormat; public readonly bool SupportsCubemapView; public readonly bool SupportsNonConstantTextureOffset; + public readonly bool SupportsQuads; public readonly bool SupportsSeparateSampler; public readonly bool SupportsShaderBallot; public readonly bool SupportsShaderBarrierDivergence; @@ -93,6 +94,7 @@ namespace Ryujinx.Graphics.GAL bool supportsMismatchingViewFormat, bool supportsCubemapView, bool supportsNonConstantTextureOffset, + bool supportsQuads, bool supportsSeparateSampler, bool supportsShaderBallot, bool supportsShaderBarrierDivergence, @@ -146,6 +148,7 @@ namespace Ryujinx.Graphics.GAL SupportsMismatchingViewFormat = supportsMismatchingViewFormat; SupportsCubemapView = supportsCubemapView; SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; + SupportsQuads = supportsQuads; SupportsSeparateSampler = supportsSeparateSampler; SupportsShaderBallot = supportsShaderBallot; SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs index 45f32e2d..3c7664b7 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs @@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private readonly ShaderSpecializationState _newSpecState; private readonly int _stageIndex; private readonly bool _isVulkan; + private readonly bool _hasGeometryShader; + private readonly bool _supportsQuads; /// /// Creates a new instance of the cached GPU state accessor for shader translation. @@ -29,6 +31,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// Shader specialization state of the recompiled shader /// Resource counts shared across all shader stages /// Shader stage index + /// Indicates if a geometry shader is present public DiskCacheGpuAccessor( GpuContext context, ReadOnlyMemory data, @@ -36,7 +39,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache ShaderSpecializationState oldSpecState, ShaderSpecializationState newSpecState, ResourceCounts counts, - int stageIndex) : base(context, counts, stageIndex) + int stageIndex, + bool hasGeometryShader) : base(context, counts, stageIndex) { _data = data; _cb1Data = cb1Data; @@ -44,6 +48,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache _newSpecState = newSpecState; _stageIndex = stageIndex; _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; + _hasGeometryShader = hasGeometryShader; + _supportsQuads = context.Capabilities.SupportsQuads; if (stageIndex == (int)ShaderStage.Geometry - 1) { @@ -100,7 +106,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// public GpuGraphicsState QueryGraphicsState() { - return _oldSpecState.GraphicsState.CreateShaderGraphicsState(!_isVulkan, _isVulkan || _oldSpecState.GraphicsState.YNegateEnabled); + return _oldSpecState.GraphicsState.CreateShaderGraphicsState( + !_isVulkan, + _supportsQuads, + _hasGeometryShader, + _isVulkan || _oldSpecState.GraphicsState.YNegateEnabled); } /// diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 2c19cc4b..ea54049c 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 6577; + private const uint CodeGenVersion = 5936; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs index 153fc442..20f96462 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -601,6 +601,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache TargetApi api = _context.Capabilities.Api; + bool hasCachedGs = guestShaders[4].HasValue; + for (int stageIndex = Constants.ShaderStages - 1; stageIndex >= 0; stageIndex--) { if (guestShaders[stageIndex + 1].HasValue) @@ -610,7 +612,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache byte[] guestCode = shader.Code; byte[] cb1Data = shader.Cb1Data; - DiskCacheGpuAccessor gpuAccessor = new(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex); + DiskCacheGpuAccessor gpuAccessor = new(_context, guestCode, cb1Data, specState, newSpecState, counts, stageIndex, hasCachedGs); TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, 0); if (nextStage != null) @@ -623,7 +625,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache byte[] guestCodeA = guestShaders[0].Value.Code; byte[] cb1DataA = guestShaders[0].Value.Cb1Data; - DiskCacheGpuAccessor gpuAccessorA = new(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0); + DiskCacheGpuAccessor gpuAccessorA = new(_context, guestCodeA, cb1DataA, specState, newSpecState, counts, 0, hasCachedGs); translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0); } @@ -711,7 +713,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache GuestCodeAndCbData shader = guestShaders[0].Value; ResourceCounts counts = new(); ShaderSpecializationState newSpecState = new(ref specState.ComputeState); - DiskCacheGpuAccessor gpuAccessor = new(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); + DiskCacheGpuAccessor gpuAccessor = new(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0, false); gpuAccessor.InitializeReservedCounts(tfEnabled: false, vertexAsCompute: false); TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0); diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs index 04949690..1be75f24 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs @@ -17,6 +17,8 @@ namespace Ryujinx.Graphics.Gpu.Shader private readonly int _stageIndex; private readonly bool _compute; private readonly bool _isVulkan; + private readonly bool _hasGeometryShader; + private readonly bool _supportsQuads; /// /// Creates a new instance of the GPU state accessor for graphics shader translation. @@ -25,12 +27,20 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU channel /// Current GPU state /// Graphics shader stage index (0 = Vertex, 4 = Fragment) - public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state, int stageIndex) : base(context, state.ResourceCounts, stageIndex) + /// Indicates if a geometry shader is present + public GpuAccessor( + GpuContext context, + GpuChannel channel, + GpuAccessorState state, + int stageIndex, + bool hasGeometryShader) : base(context, state.ResourceCounts, stageIndex) { - _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; _channel = channel; _state = state; _stageIndex = stageIndex; + _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; + _hasGeometryShader = hasGeometryShader; + _supportsQuads = context.Capabilities.SupportsQuads; if (stageIndex == (int)ShaderStage.Geometry - 1) { @@ -105,7 +115,11 @@ namespace Ryujinx.Graphics.Gpu.Shader /// public GpuGraphicsState QueryGraphicsState() { - return _state.GraphicsState.CreateShaderGraphicsState(!_isVulkan, _isVulkan || _state.GraphicsState.YNegateEnabled); + return _state.GraphicsState.CreateShaderGraphicsState( + !_isVulkan, + _supportsQuads, + _hasGeometryShader, + _isVulkan || _state.GraphicsState.YNegateEnabled); } /// diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs index b5bc4df3..765bef7d 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuChannelGraphicsState.cs @@ -106,8 +106,11 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Creates a new graphics state from this state that can be used for shader generation. /// /// Indicates if the host API supports alpha test operations + /// Indicates if the host API supports quad primitives + /// Indicates if a geometry shader is used + /// If true, indicates that the fragment origin is the upper left corner of the viewport, otherwise it is the lower left corner /// GPU graphics state that can be used for shader translation - public readonly GpuGraphicsState CreateShaderGraphicsState(bool hostSupportsAlphaTest, bool originUpperLeft) + public readonly GpuGraphicsState CreateShaderGraphicsState(bool hostSupportsAlphaTest, bool hostSupportsQuads, bool hasGeometryShader, bool originUpperLeft) { AlphaTestOp alphaTestOp; @@ -130,6 +133,9 @@ namespace Ryujinx.Graphics.Gpu.Shader }; } + bool isQuad = Topology == PrimitiveTopology.Quads || Topology == PrimitiveTopology.QuadStrip; + bool halvePrimitiveId = !hostSupportsQuads && !hasGeometryShader && isQuad; + return new GpuGraphicsState( EarlyZForce, ConvertToInputTopology(Topology, TessellationMode), @@ -149,7 +155,8 @@ namespace Ryujinx.Graphics.Gpu.Shader in FragmentOutputTypes, DualSourceBlendEnable, YNegateEnabled, - originUpperLeft); + originUpperLeft, + halvePrimitiveId); } /// diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 31cc94a2..4fc66c4c 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -339,7 +339,7 @@ namespace Ryujinx.Graphics.Gpu.Shader if (gpuVa != 0) { - GpuAccessor gpuAccessor = new(_context, channel, gpuAccessorState, stageIndex); + GpuAccessor gpuAccessor = new(_context, channel, gpuAccessorState, stageIndex, addresses.Geometry != 0); TranslatorContext currentStage = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags, gpuVa); if (nextStage != null) diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index d56c40af..2a39ae44 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -161,6 +161,7 @@ namespace Ryujinx.Graphics.OpenGL supportsBgraFormat: false, supportsR4G4Format: false, supportsR4G4B4A4Format: true, + supportsScaledVertexFormats: true, supportsSnormBufferTextureFormat: false, supports5BitComponentFormat: true, supportsSparseBuffer: false, @@ -175,7 +176,7 @@ namespace Ryujinx.Graphics.OpenGL supportsMismatchingViewFormat: HwCapabilities.SupportsMismatchingViewFormat, supportsCubemapView: true, supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset, - supportsScaledVertexFormats: true, + supportsQuads: HwCapabilities.SupportsQuads, supportsSeparateSampler: false, supportsShaderBallot: HwCapabilities.SupportsShaderBallot, supportsShaderBarrierDivergence: !(intelWindows || intelUnix), diff --git a/src/Ryujinx.Graphics.Shader/GpuGraphicsState.cs b/src/Ryujinx.Graphics.Shader/GpuGraphicsState.cs index f16c71d5..38684002 100644 --- a/src/Ryujinx.Graphics.Shader/GpuGraphicsState.cs +++ b/src/Ryujinx.Graphics.Shader/GpuGraphicsState.cs @@ -102,6 +102,11 @@ namespace Ryujinx.Graphics.Shader /// public readonly bool OriginUpperLeft; + /// + /// Indicates that the primitive ID values on the shader should be halved due to quad to triangles conversion. + /// + public readonly bool HalvePrimitiveId; + /// /// Creates a new GPU graphics state. /// @@ -124,6 +129,7 @@ namespace Ryujinx.Graphics.Shader /// Indicates whether dual source blend is enabled /// Indicates if negation of the viewport Y axis is enabled /// If true, indicates that the fragment origin is the upper left corner of the viewport, otherwise it is the lower left corner + /// Indicates that the primitive ID values on the shader should be halved due to quad to triangles conversion public GpuGraphicsState( bool earlyZForce, InputTopology topology, @@ -143,7 +149,8 @@ namespace Ryujinx.Graphics.Shader in Array8 fragmentOutputTypes, bool dualSourceBlendEnable, bool yNegateEnabled, - bool originUpperLeft) + bool originUpperLeft, + bool halvePrimitiveId) { EarlyZForce = earlyZForce; Topology = topology; @@ -164,6 +171,7 @@ namespace Ryujinx.Graphics.Shader DualSourceBlendEnable = dualSourceBlendEnable; YNegateEnabled = yNegateEnabled; OriginUpperLeft = originUpperLeft; + HalvePrimitiveId = halvePrimitiveId; } } } diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs index b1a9f9f8..3dc4ad90 100644 --- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -135,6 +135,7 @@ namespace Ryujinx.Graphics.Shader default, false, false, + false, false); } diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs index 63ce38e2..c704156b 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -84,6 +84,10 @@ namespace Ryujinx.Graphics.Shader.Instructions value = context.IConvertU32ToFP32(value); } } + else if (offset == AttributeConsts.PrimitiveId && context.TranslatorContext.Definitions.HalvePrimitiveId) + { + value = context.ShiftRightS32(value, Const(1)); + } context.Copy(Register(rd), value); } @@ -187,6 +191,12 @@ namespace Ryujinx.Graphics.Shader.Instructions } } } + else if (op.Imm10 == AttributeConsts.PrimitiveId && context.TranslatorContext.Definitions.HalvePrimitiveId) + { + // If quads are used, but the host does not support them, they need to be converted to triangles. + // Since each quad becomes 2 triangles, we need to compensate here and divide primitive ID by 2. + res = context.ShiftRightS32(res, Const(1)); + } else if (op.Imm10 == AttributeConsts.FrontFacing && context.TranslatorContext.GpuAccessor.QueryHostHasFrontFacingBug()) { // gl_FrontFacing sometimes has incorrect (flipped) values depending how it is accessed on Intel GPUs. diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs index 22321543..4128af24 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs @@ -66,9 +66,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (nvHandle.AsgOp is not Operation handleOp || handleOp.Inst != Instruction.Load || - handleOp.StorageKind != StorageKind.Input) + (handleOp.StorageKind != StorageKind.Input && handleOp.StorageKind != StorageKind.StorageBuffer)) { - // Right now, we only allow bindless access when the handle comes from a shader input. + // Right now, we only allow bindless access when the handle comes from a shader input or storage buffer. // This is an artificial limitation to prevent it from being used in cases where it // would have a large performance impact of loading all textures in the pool. // It might be removed in the future, if we can mitigate the performance impact. diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs index 3246e259..f831ec94 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs @@ -45,6 +45,8 @@ namespace Ryujinx.Graphics.Shader.Translation public bool YNegateEnabled => _graphicsState.YNegateEnabled; public bool OriginUpperLeft => _graphicsState.OriginUpperLeft; + public bool HalvePrimitiveId => _graphicsState.HalvePrimitiveId; + public ImapPixelType[] ImapTypes { get; } public bool IaIndexing { get; private set; } public bool OaIndexing { get; private set; } diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index b46ba9c4..8ef05de3 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -691,6 +691,7 @@ namespace Ryujinx.Graphics.Vulkan supportsBgraFormat: true, supportsR4G4Format: false, supportsR4G4B4A4Format: supportsR4G4B4A4Format, + supportsScaledVertexFormats: FormatCapabilities.SupportsScaledVertexFormats(), supportsSnormBufferTextureFormat: true, supports5BitComponentFormat: supports5BitComponentFormat, supportsSparseBuffer: features2.Features.SparseBinding && mainQueueProperties.QueueFlags.HasFlag(QueueFlags.SparseBindingBit), @@ -705,7 +706,7 @@ namespace Ryujinx.Graphics.Vulkan supportsMismatchingViewFormat: true, supportsCubemapView: !IsAmdGcn, supportsNonConstantTextureOffset: false, - supportsScaledVertexFormats: FormatCapabilities.SupportsScaledVertexFormats(), + supportsQuads: false, supportsSeparateSampler: true, supportsShaderBallot: false, supportsShaderBarrierDivergence: Vendor != Vendor.Intel,