From f09bba82b9366e5912b639a610ae89cbb1cf352c Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 29 Aug 2023 21:10:34 -0300 Subject: [PATCH] Geometry shader emulation for macOS (#5551) * Implement vertex and geometry shader conversion to compute * Call InitializeReservedCounts for compute too * PR feedback * Set clip distance mask for geometry and tessellation shaders too * Transform feedback emulation only for vertex --- src/Ryujinx.Graphics.GAL/Capabilities.cs | 6 + .../Engine/GPFifo/GPFifoProcessor.cs | 16 +- .../ComputeDraw/VertexInfoBufferUpdater.cs | 141 ++++ .../Engine/Threed/ComputeDraw/VtgAsCompute.cs | 96 +++ .../Threed/ComputeDraw/VtgAsComputeContext.cs | 648 ++++++++++++++++++ .../Threed/ComputeDraw/VtgAsComputeState.cs | 535 +++++++++++++++ .../Engine/Threed/DrawManager.cs | 142 +++- .../Engine/Threed/DrawState.cs | 16 + .../Threed/SpecializationStateUpdater.cs | 16 +- .../Engine/Threed/StateUpdater.cs | 31 +- .../Engine/Threed/ThreedClass.cs | 29 +- src/Ryujinx.Graphics.Gpu/GpuChannel.cs | 1 + src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs | 102 +++ .../Memory/BufferManager.cs | 49 +- .../Memory/BufferUpdater.cs | 123 ++++ .../Memory/SupportBufferUpdater.cs | 85 +-- .../Shader/CachedShaderProgram.cs | 23 + .../Shader/DiskCache/DiskCacheGpuAccessor.cs | 2 +- .../Shader/DiskCache/DiskCacheHostStorage.cs | 25 +- .../DiskCache/ParallelDiskCacheLoader.cs | 16 +- .../Shader/GpuAccessor.cs | 8 +- .../Shader/GpuAccessorBase.cs | 41 +- .../Shader/ShaderAsCompute.cs | 20 + .../Shader/ShaderCache.cs | 162 +++-- .../Shader/ShaderInfoBuilder.cs | 63 +- .../Shader/ShaderSpecializationList.cs | 9 +- .../Shader/ShaderSpecializationState.cs | 35 +- src/Ryujinx.Graphics.OpenGL/HwCapabilities.cs | 2 + src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 2 + src/Ryujinx.Graphics.Shader/AttributeType.cs | 8 +- .../CodeGen/Glsl/Declarations.cs | 20 +- .../CodeGen/Glsl/Instructions/IoMap.cs | 1 + .../CodeGen/Spirv/CodeGenContext.cs | 15 - .../CodeGen/Spirv/Declarations.cs | 4 +- .../CodeGen/Spirv/IoMap.cs | 1 + .../CodeGen/Spirv/SpirvGenerator.cs | 12 +- src/Ryujinx.Graphics.Shader/Constants.cs | 6 - .../Decoders/DecodedProgram.cs | 5 + .../Decoders/Decoder.cs | 35 +- src/Ryujinx.Graphics.Shader/InputTopology.cs | 13 + .../Instructions/InstEmitAttribute.cs | 2 +- .../IntermediateRepresentation/IoVariable.cs | 1 + .../ResourceReservationCounts.cs | 22 + .../ShaderIdentification.cs | 8 - .../ShaderProgramInfo.cs | 15 +- src/Ryujinx.Graphics.Shader/SupportBuffer.cs | 25 +- .../Translation/AttributeConsts.cs | 2 + .../Translation/EmitterContext.cs | 197 +++++- .../Translation/EmitterContextInsts.cs | 5 + .../Translation/FeatureFlags.cs | 4 + .../Translation/IoUsage.cs | 28 + .../Translation/ResourceManager.cs | 46 +- .../Translation/ResourceReservations.cs | 186 +++++ .../Translation/ShaderDefinitions.cs | 59 +- .../Translation/ShaderIdentifier.cs | 187 ----- .../Translation/TransformContext.cs | 3 + .../Transforms/GeometryToCompute.cs | 378 ++++++++++ .../Translation/Transforms/TexturePass.cs | 8 +- .../Translation/Transforms/TransformPasses.cs | 2 + .../Translation/Transforms/VertexToCompute.cs | 364 ++++++++++ .../Translation/Translator.cs | 41 +- .../Translation/TranslatorContext.cs | 279 ++++++-- .../VertexInfoBuffer.cs | 59 ++ src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 2 + src/Ryujinx.ShaderTools/Program.cs | 18 +- 65 files changed, 3912 insertions(+), 593 deletions(-) create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VertexInfoBufferUpdater.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsCompute.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Memory/BufferUpdater.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Shader/ShaderAsCompute.cs create mode 100644 src/Ryujinx.Graphics.Shader/ResourceReservationCounts.cs delete mode 100644 src/Ryujinx.Graphics.Shader/ShaderIdentification.cs create mode 100644 src/Ryujinx.Graphics.Shader/Translation/IoUsage.cs create mode 100644 src/Ryujinx.Graphics.Shader/Translation/ResourceReservations.cs delete mode 100644 src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs create mode 100644 src/Ryujinx.Graphics.Shader/Translation/Transforms/GeometryToCompute.cs create mode 100644 src/Ryujinx.Graphics.Shader/Translation/Transforms/VertexToCompute.cs create mode 100644 src/Ryujinx.Graphics.Shader/VertexInfoBuffer.cs diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index d41f8e59..75642204 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -39,6 +39,7 @@ namespace Ryujinx.Graphics.GAL public readonly bool SupportsShaderBarrierDivergence; public readonly bool SupportsShaderFloat64; public readonly bool SupportsTextureShadowLod; + public readonly bool SupportsVertexStoreAndAtomics; public readonly bool SupportsViewportIndexVertexTessellation; public readonly bool SupportsViewportMask; public readonly bool SupportsViewportSwizzle; @@ -54,6 +55,7 @@ namespace Ryujinx.Graphics.GAL public readonly float MaximumSupportedAnisotropy; public readonly int ShaderSubgroupSize; public readonly int StorageBufferOffsetAlignment; + public readonly int TextureBufferOffsetAlignment; public readonly int GatherBiasPrecision; @@ -91,6 +93,7 @@ namespace Ryujinx.Graphics.GAL bool supportsShaderBarrierDivergence, bool supportsShaderFloat64, bool supportsTextureShadowLod, + bool supportsVertexStoreAndAtomics, bool supportsViewportIndexVertexTessellation, bool supportsViewportMask, bool supportsViewportSwizzle, @@ -104,6 +107,7 @@ namespace Ryujinx.Graphics.GAL float maximumSupportedAnisotropy, int shaderSubgroupSize, int storageBufferOffsetAlignment, + int textureBufferOffsetAlignment, int gatherBiasPrecision) { Api = api; @@ -139,6 +143,7 @@ namespace Ryujinx.Graphics.GAL SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence; SupportsShaderFloat64 = supportsShaderFloat64; SupportsTextureShadowLod = supportsTextureShadowLod; + SupportsVertexStoreAndAtomics = supportsVertexStoreAndAtomics; SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation; SupportsViewportMask = supportsViewportMask; SupportsViewportSwizzle = supportsViewportSwizzle; @@ -152,6 +157,7 @@ namespace Ryujinx.Graphics.GAL MaximumSupportedAnisotropy = maximumSupportedAnisotropy; ShaderSubgroupSize = shaderSubgroupSize; StorageBufferOffsetAlignment = storageBufferOffsetAlignment; + TextureBufferOffsetAlignment = textureBufferOffsetAlignment; GatherBiasPrecision = gatherBiasPrecision; } } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs index 6ba1bc22..180e2a6b 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoProcessor.cs @@ -13,7 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo /// /// Represents a GPU General Purpose FIFO command processor. /// - class GPFifoProcessor + class GPFifoProcessor : IDisposable { private const int MacrosCount = 0x80; private const int MacroIndexMask = MacrosCount - 1; @@ -327,5 +327,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo { _3dClass.PerformDeferredDraws(); } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _3dClass.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VertexInfoBufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VertexInfoBufferUpdater.cs new file mode 100644 index 00000000..65f556fc --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VertexInfoBufferUpdater.cs @@ -0,0 +1,141 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Shader; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw +{ + /// + /// Vertex info buffer data updater. + /// + class VertexInfoBufferUpdater : BufferUpdater + { + private VertexInfoBuffer _data; + + /// + /// Creates a new instance of the vertex info buffer updater. + /// + /// Renderer that the vertex info buffer will be used with + public VertexInfoBufferUpdater(IRenderer renderer) : base(renderer) + { + } + + /// + /// Sets vertex data related counts. + /// + /// Number of vertices used on the draw + /// Number of draw instances + /// Index of the first vertex on the vertex buffer + /// Index of the first instanced vertex on the vertex buffer + public void SetVertexCounts(int vertexCount, int instanceCount, int firstVertex, int firstInstance) + { + if (_data.VertexCounts.X != vertexCount) + { + _data.VertexCounts.X = vertexCount; + MarkDirty(VertexInfoBuffer.VertexCountsOffset, sizeof(int)); + } + + if (_data.VertexCounts.Y != instanceCount) + { + _data.VertexCounts.Y = instanceCount; + MarkDirty(VertexInfoBuffer.VertexCountsOffset + sizeof(int), sizeof(int)); + } + + if (_data.VertexCounts.Z != firstVertex) + { + _data.VertexCounts.Z = firstVertex; + MarkDirty(VertexInfoBuffer.VertexCountsOffset + sizeof(int) * 2, sizeof(int)); + } + + if (_data.VertexCounts.W != firstInstance) + { + _data.VertexCounts.W = firstInstance; + MarkDirty(VertexInfoBuffer.VertexCountsOffset + sizeof(int) * 3, sizeof(int)); + } + } + + /// + /// Sets vertex data related counts. + /// + /// Number of primitives consumed by the geometry shader + public void SetGeometryCounts(int primitivesCount) + { + if (_data.GeometryCounts.X != primitivesCount) + { + _data.GeometryCounts.X = primitivesCount; + MarkDirty(VertexInfoBuffer.GeometryCountsOffset, sizeof(int)); + } + } + + /// + /// Sets a vertex stride and related data. + /// + /// Index of the vertex stride to be updated + /// Stride divided by the component or format size + /// Number of components that the format has + public void SetVertexStride(int index, int stride, int componentCount) + { + if (_data.VertexStrides[index].X != stride) + { + _data.VertexStrides[index].X = stride; + MarkDirty(VertexInfoBuffer.VertexStridesOffset + index * Unsafe.SizeOf>(), sizeof(int)); + } + + for (int c = 1; c < 4; c++) + { + int value = c < componentCount ? 1 : 0; + + ref int currentValue = ref GetElementRef(ref _data.VertexStrides[index], c); + + if (currentValue != value) + { + currentValue = value; + MarkDirty(VertexInfoBuffer.VertexStridesOffset + index * Unsafe.SizeOf>() + c * sizeof(int), sizeof(int)); + } + } + } + + /// + /// Sets a vertex offset and related data. + /// + /// Index of the vertex offset to be updated + /// Offset divided by the component or format size + /// If the draw is instanced, should have the vertex divisor value, otherwise should be zero + public void SetVertexOffset(int index, int offset, int divisor) + { + if (_data.VertexOffsets[index].X != offset) + { + _data.VertexOffsets[index].X = offset; + MarkDirty(VertexInfoBuffer.VertexOffsetsOffset + index * Unsafe.SizeOf>(), sizeof(int)); + } + + if (_data.VertexOffsets[index].Y != divisor) + { + _data.VertexOffsets[index].Y = divisor; + MarkDirty(VertexInfoBuffer.VertexOffsetsOffset + index * Unsafe.SizeOf>() + sizeof(int), sizeof(int)); + } + } + + /// + /// Sets the offset of the index buffer. + /// + /// Offset divided by the component size + public void SetIndexBufferOffset(int offset) + { + if (_data.GeometryCounts.W != offset) + { + _data.GeometryCounts.W = offset; + MarkDirty(VertexInfoBuffer.GeometryCountsOffset + sizeof(int) * 3, sizeof(int)); + } + } + + /// + /// Submits all pending buffer updates to the GPU. + /// + public void Commit() + { + Commit(MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref _data, 1))); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsCompute.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsCompute.cs new file mode 100644 index 00000000..cbbfd251 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsCompute.cs @@ -0,0 +1,96 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Shader; +using System; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw +{ + /// + /// Vertex, tessellation and geometry as compute shader draw manager. + /// + class VtgAsCompute : IDisposable + { + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly DeviceStateWithShadow _state; + private readonly VtgAsComputeContext _vacContext; + + /// + /// Creates a new instance of the vertex, tessellation and geometry as compute shader draw manager. + /// + /// GPU context + /// GPU channel + /// 3D engine state + public VtgAsCompute(GpuContext context, GpuChannel channel, DeviceStateWithShadow state) + { + _context = context; + _channel = channel; + _state = state; + _vacContext = new(context); + } + + /// + /// Emulates the pre-rasterization stages of a draw operation using a compute shader. + /// + /// 3D engine + /// Vertex shader converted to compute + /// Optional geometry shader converted to compute + /// Fragment shader with a vertex passthrough shader to feed the compute output into the fragment stage + /// Primitive topology of the draw + /// Index or vertex count of the draw + /// Instance count + /// First index on the index buffer, for indexed draws + /// First vertex on the vertex buffer + /// First instance + /// Whether the draw is indexed + public void DrawAsCompute( + ThreedClass engine, + ShaderAsCompute vertexAsCompute, + ShaderAsCompute geometryAsCompute, + IProgram vertexPassthroughProgram, + PrimitiveTopology topology, + int count, + int instanceCount, + int firstIndex, + int firstVertex, + int firstInstance, + bool indexed) + { + VtgAsComputeState state = new( + _context, + _channel, + _state, + _vacContext, + engine, + vertexAsCompute, + geometryAsCompute, + vertexPassthroughProgram, + topology, + count, + instanceCount, + firstIndex, + firstVertex, + firstInstance, + indexed); + + state.RunVertex(); + state.RunGeometry(); + state.RunFragment(); + + _vacContext.FreeBuffers(); + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _vacContext.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs new file mode 100644 index 00000000..e9b754ff --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs @@ -0,0 +1,648 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw +{ + /// + /// Vertex, tessellation and geometry as compute shader context. + /// + class VtgAsComputeContext : IDisposable + { + private const int DummyBufferSize = 16; + + private readonly GpuContext _context; + + /// + /// Cache of buffer textures used for vertex and index buffers. + /// + private class BufferTextureCache : IDisposable + { + private readonly Dictionary _cache; + + /// + /// Creates a new instance of the buffer texture cache. + /// + public BufferTextureCache() + { + _cache = new(); + } + + /// + /// Gets a cached or creates and caches a buffer texture with the specified format. + /// + /// Renderer where the texture will be used + /// Format of the buffer texture + /// Buffer texture + public ITexture Get(IRenderer renderer, Format format) + { + if (!_cache.TryGetValue(format, out ITexture bufferTexture)) + { + bufferTexture = renderer.CreateTexture(new TextureCreateInfo( + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + format, + DepthStencilMode.Depth, + Target.TextureBuffer, + SwizzleComponent.Red, + SwizzleComponent.Green, + SwizzleComponent.Blue, + SwizzleComponent.Alpha)); + + _cache.Add(format, bufferTexture); + } + + return bufferTexture; + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + foreach (var texture in _cache.Values) + { + texture.Release(); + } + + _cache.Clear(); + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + } + + /// + /// Buffer state. + /// + private struct Buffer + { + /// + /// Buffer handle. + /// + public BufferHandle Handle; + + /// + /// Current free buffer offset. + /// + public int Offset; + + /// + /// Total buffer size in bytes. + /// + public int Size; + } + + /// + /// Index buffer state. + /// + private readonly struct IndexBuffer + { + /// + /// Buffer handle. + /// + public BufferHandle Handle { get; } + + /// + /// Index count. + /// + public int Count { get; } + + /// + /// Size in bytes. + /// + public int Size { get; } + + /// + /// Creates a new index buffer state. + /// + /// Buffer handle + /// Index count + /// Size in bytes + public IndexBuffer(BufferHandle handle, int count, int size) + { + Handle = handle; + Count = count; + Size = size; + } + + /// + /// Creates a full range starting from the beggining of the buffer. + /// + /// Range + public readonly BufferRange ToRange() + { + return new BufferRange(Handle, 0, Size); + } + + /// + /// Creates a range starting from the beggining of the buffer, with the specified size. + /// + /// Size in bytes of the range + /// Range + public readonly BufferRange ToRange(int size) + { + return new BufferRange(Handle, 0, size); + } + } + + private readonly BufferTextureCache[] _bufferTextures; + private BufferHandle _dummyBuffer; + private Buffer _vertexDataBuffer; + private Buffer _geometryVertexDataBuffer; + private Buffer _geometryIndexDataBuffer; + private BufferHandle _sequentialIndexBuffer; + private int _sequentialIndexBufferCount; + + private readonly Dictionary _topologyRemapBuffers; + + /// + /// Vertex information buffer updater. + /// + public VertexInfoBufferUpdater VertexInfoBufferUpdater { get; } + + /// + /// Creates a new instance of the vertex, tessellation and geometry as compute shader context. + /// + /// + public VtgAsComputeContext(GpuContext context) + { + _context = context; + _bufferTextures = new BufferTextureCache[Constants.TotalVertexBuffers + 2]; + _topologyRemapBuffers = new(); + VertexInfoBufferUpdater = new(context.Renderer); + } + + /// + /// Gets the number of complete primitives that can be formed with a given vertex count, for a given topology. + /// + /// Topology + /// Vertex count + /// Total of complete primitives + public static int GetPrimitivesCount(PrimitiveTopology primitiveType, int count) + { + return primitiveType switch + { + PrimitiveTopology.Lines => count / 2, + PrimitiveTopology.LinesAdjacency => count / 4, + PrimitiveTopology.LineLoop => count > 1 ? count : 0, + PrimitiveTopology.LineStrip => Math.Max(count - 1, 0), + PrimitiveTopology.LineStripAdjacency => Math.Max(count - 3, 0), + PrimitiveTopology.Triangles => count / 3, + PrimitiveTopology.TrianglesAdjacency => count / 6, + PrimitiveTopology.TriangleStrip or + PrimitiveTopology.TriangleFan or + PrimitiveTopology.Polygon => Math.Max(count - 2, 0), + PrimitiveTopology.TriangleStripAdjacency => Math.Max(count - 2, 0) / 2, + PrimitiveTopology.Quads => (count / 4) * 2, // In triangles. + PrimitiveTopology.QuadStrip => Math.Max((count - 2) / 2, 0) * 2, // In triangles. + _ => count, + }; + } + + /// + /// Gets the total of vertices that a single primitive has, for the specified topology. + /// + /// Topology + /// Vertex count + private static int GetVerticesPerPrimitive(PrimitiveTopology primitiveType) + { + return primitiveType switch + { + PrimitiveTopology.Lines or + PrimitiveTopology.LineLoop or + PrimitiveTopology.LineStrip => 2, + PrimitiveTopology.LinesAdjacency or + PrimitiveTopology.LineStripAdjacency => 4, + PrimitiveTopology.Triangles or + PrimitiveTopology.TriangleStrip or + PrimitiveTopology.TriangleFan or + PrimitiveTopology.Polygon => 3, + PrimitiveTopology.TrianglesAdjacency or + PrimitiveTopology.TriangleStripAdjacency => 6, + PrimitiveTopology.Quads or + PrimitiveTopology.QuadStrip => 3, // 2 triangles. + _ => 1, + }; + } + + /// + /// Gets a cached or creates a new buffer that can be used to map linear indices to ones + /// of a specified topology, and build complete primitives. + /// + /// Topology + /// Number of input vertices that needs to be mapped using that buffer + /// Remap buffer range + public BufferRange GetOrCreateTopologyRemapBuffer(PrimitiveTopology topology, int count) + { + if (!_topologyRemapBuffers.TryGetValue(topology, out IndexBuffer buffer) || buffer.Count < count) + { + if (buffer.Handle != BufferHandle.Null) + { + _context.Renderer.DeleteBuffer(buffer.Handle); + } + + buffer = CreateTopologyRemapBuffer(topology, count); + _topologyRemapBuffers[topology] = buffer; + + return buffer.ToRange(); + } + + return buffer.ToRange(Math.Max(GetPrimitivesCount(topology, count) * GetVerticesPerPrimitive(topology), 1) * sizeof(uint)); + } + + /// + /// Creates a new topology remap buffer. + /// + /// Topology + /// Maximum of vertices that will be accessed + /// Remap buffer range + private IndexBuffer CreateTopologyRemapBuffer(PrimitiveTopology topology, int count) + { + // Size can't be zero as creating zero sized buffers is invalid. + Span data = new int[Math.Max(GetPrimitivesCount(topology, count) * GetVerticesPerPrimitive(topology), 1)]; + + switch (topology) + { + case PrimitiveTopology.Points: + case PrimitiveTopology.Lines: + case PrimitiveTopology.LinesAdjacency: + case PrimitiveTopology.Triangles: + case PrimitiveTopology.TrianglesAdjacency: + case PrimitiveTopology.Patches: + for (int index = 0; index < data.Length; index++) + { + data[index] = index; + } + break; + case PrimitiveTopology.LineLoop: + data[^1] = 0; + + for (int index = 0; index < ((data.Length - 1) & ~1); index += 2) + { + data[index] = index >> 1; + data[index + 1] = (index >> 1) + 1; + } + break; + case PrimitiveTopology.LineStrip: + for (int index = 0; index < ((data.Length - 1) & ~1); index += 2) + { + data[index] = index >> 1; + data[index + 1] = (index >> 1) + 1; + } + break; + case PrimitiveTopology.TriangleStrip: + int tsTrianglesCount = data.Length / 3; + int tsOutIndex = 3; + + if (tsTrianglesCount > 0) + { + data[0] = 0; + data[1] = 1; + data[2] = 2; + } + + for (int tri = 1; tri < tsTrianglesCount; tri++) + { + int baseIndex = tri * 3; + + if ((tri & 1) != 0) + { + data[baseIndex] = tsOutIndex - 1; + data[baseIndex + 1] = tsOutIndex - 2; + data[baseIndex + 2] = tsOutIndex++; + } + else + { + data[baseIndex] = tsOutIndex - 2; + data[baseIndex + 1] = tsOutIndex - 1; + data[baseIndex + 2] = tsOutIndex++; + } + } + break; + case PrimitiveTopology.TriangleFan: + case PrimitiveTopology.Polygon: + int tfTrianglesCount = data.Length / 3; + int tfOutIndex = 1; + + for (int index = 0; index < tfTrianglesCount * 3; index += 3) + { + data[index] = 0; + data[index + 1] = tfOutIndex; + data[index + 2] = ++tfOutIndex; + } + break; + case PrimitiveTopology.Quads: + int qQuadsCount = data.Length / 6; + + for (int quad = 0; quad < qQuadsCount; quad++) + { + int index = quad * 6; + int qIndex = quad * 4; + + data[index] = qIndex; + data[index + 1] = qIndex + 1; + data[index + 2] = qIndex + 2; + data[index + 3] = qIndex; + data[index + 4] = qIndex + 2; + data[index + 5] = qIndex + 3; + } + break; + case PrimitiveTopology.QuadStrip: + int qsQuadsCount = data.Length / 6; + + if (qsQuadsCount > 0) + { + data[0] = 0; + data[1] = 1; + data[2] = 2; + data[3] = 0; + data[4] = 2; + data[5] = 3; + } + + for (int quad = 1; quad < qsQuadsCount; quad++) + { + int index = quad * 6; + int qIndex = quad * 2; + + data[index] = qIndex + 1; + data[index + 1] = qIndex; + data[index + 2] = qIndex + 2; + data[index + 3] = qIndex + 1; + data[index + 4] = qIndex + 2; + data[index + 5] = qIndex + 3; + } + break; + case PrimitiveTopology.LineStripAdjacency: + for (int index = 0; index < ((data.Length - 3) & ~3); index += 4) + { + int lIndex = index >> 2; + + data[index] = lIndex; + data[index + 1] = lIndex + 1; + data[index + 2] = lIndex + 2; + data[index + 3] = lIndex + 3; + } + break; + case PrimitiveTopology.TriangleStripAdjacency: + int tsaTrianglesCount = data.Length / 6; + int tsaOutIndex = 6; + + if (tsaTrianglesCount > 0) + { + data[0] = 0; + data[1] = 1; + data[2] = 2; + data[3] = 3; + data[4] = 4; + data[5] = 5; + } + + for (int tri = 1; tri < tsaTrianglesCount; tri++) + { + int baseIndex = tri * 6; + + if ((tri & 1) != 0) + { + data[baseIndex] = tsaOutIndex - 2; + data[baseIndex + 1] = tsaOutIndex - 1; + data[baseIndex + 2] = tsaOutIndex - 4; + data[baseIndex + 3] = tsaOutIndex - 3; + data[baseIndex + 4] = tsaOutIndex++; + data[baseIndex + 5] = tsaOutIndex++; + } + else + { + data[baseIndex] = tsaOutIndex - 4; + data[baseIndex + 1] = tsaOutIndex - 3; + data[baseIndex + 2] = tsaOutIndex - 2; + data[baseIndex + 3] = tsaOutIndex - 1; + data[baseIndex + 4] = tsaOutIndex++; + data[baseIndex + 5] = tsaOutIndex++; + } + } + break; + } + + ReadOnlySpan dataBytes = MemoryMarshal.Cast(data); + + BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length); + _context.Renderer.SetBufferData(buffer, 0, dataBytes); + + return new IndexBuffer(buffer, count, dataBytes.Length); + } + + /// + /// Gets a buffer texture with a given format, for the given index. + /// + /// Index of the buffer texture + /// Format of the buffer texture + /// Buffer texture + public ITexture EnsureBufferTexture(int index, Format format) + { + return (_bufferTextures[index] ??= new()).Get(_context.Renderer, format); + } + + /// + /// Gets the offset and size of usable storage on the output vertex buffer. + /// + /// Size in bytes that will be used + /// Usable offset and size on the buffer + public (int, int) GetVertexDataBuffer(int size) + { + return EnsureBuffer(ref _vertexDataBuffer, size); + } + + /// + /// Gets the offset and size of usable storage on the output geometry shader vertex buffer. + /// + /// Size in bytes that will be used + /// Usable offset and size on the buffer + public (int, int) GetGeometryVertexDataBuffer(int size) + { + return EnsureBuffer(ref _geometryVertexDataBuffer, size); + } + + /// + /// Gets the offset and size of usable storage on the output geometry shader index buffer. + /// + /// Size in bytes that will be used + /// Usable offset and size on the buffer + public (int, int) GetGeometryIndexDataBuffer(int size) + { + return EnsureBuffer(ref _geometryIndexDataBuffer, size); + } + + /// + /// Gets a range of the output vertex buffer for binding. + /// + /// Offset of the range + /// Size of the range in bytes + /// Range + public BufferRange GetVertexDataBufferRange(int offset, int size) + { + return new BufferRange(_vertexDataBuffer.Handle, offset, size); + } + + /// + /// Gets a range of the output geometry shader vertex buffer for binding. + /// + /// Offset of the range + /// Size of the range in bytes + /// Range + public BufferRange GetGeometryVertexDataBufferRange(int offset, int size) + { + return new BufferRange(_geometryVertexDataBuffer.Handle, offset, size); + } + + /// + /// Gets a range of the output geometry shader index buffer for binding. + /// + /// Offset of the range + /// Size of the range in bytes + /// Range + public BufferRange GetGeometryIndexDataBufferRange(int offset, int size) + { + return new BufferRange(_geometryIndexDataBuffer.Handle, offset, size); + } + + /// + /// Gets the range for a dummy 16 bytes buffer, filled with zeros. + /// + /// Dummy buffer range + public BufferRange GetDummyBufferRange() + { + if (_dummyBuffer == BufferHandle.Null) + { + _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize); + _context.Renderer.Pipeline.ClearBuffer(_dummyBuffer, 0, DummyBufferSize, 0); + } + + return new BufferRange(_dummyBuffer, 0, DummyBufferSize); + } + + /// + /// Gets the range for a sequential index buffer, with ever incrementing index values. + /// + /// Minimum number of indices that the buffer should have + /// Buffer handle + public BufferHandle GetSequentialIndexBuffer(int count) + { + if (_sequentialIndexBufferCount < count) + { + if (_sequentialIndexBuffer != BufferHandle.Null) + { + _context.Renderer.DeleteBuffer(_sequentialIndexBuffer); + } + + _sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint)); + _sequentialIndexBufferCount = count; + + Span data = new int[count]; + + for (int index = 0; index < count; index++) + { + data[index] = index; + } + + _context.Renderer.SetBufferData(_sequentialIndexBuffer, 0, MemoryMarshal.Cast(data)); + } + + return _sequentialIndexBuffer; + } + + /// + /// Ensure that a buffer exists, is large enough, and allocates a sub-region of the specified size inside the buffer. + /// + /// Buffer state + /// Required size in bytes + /// Allocated offset and size + private (int, int) EnsureBuffer(ref Buffer buffer, int size) + { + int newSize = buffer.Offset + size; + + if (buffer.Size < newSize) + { + if (buffer.Handle != BufferHandle.Null) + { + _context.Renderer.DeleteBuffer(buffer.Handle); + } + + buffer.Handle = _context.Renderer.CreateBuffer(newSize); + buffer.Size = newSize; + } + + int offset = buffer.Offset; + + buffer.Offset = BitUtils.AlignUp(newSize, _context.Capabilities.StorageBufferOffsetAlignment); + + return (offset, size); + } + + /// + /// Frees all buffer sub-regions that were previously allocated. + /// + public void FreeBuffers() + { + _vertexDataBuffer.Offset = 0; + _geometryVertexDataBuffer.Offset = 0; + _geometryIndexDataBuffer.Offset = 0; + } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + for (int index = 0; index < _bufferTextures.Length; index++) + { + _bufferTextures[index]?.Dispose(); + _bufferTextures[index] = null; + } + + DestroyIfNotNull(ref _dummyBuffer); + DestroyIfNotNull(ref _vertexDataBuffer.Handle); + DestroyIfNotNull(ref _geometryVertexDataBuffer.Handle); + DestroyIfNotNull(ref _geometryIndexDataBuffer.Handle); + DestroyIfNotNull(ref _sequentialIndexBuffer); + + foreach (var indexBuffer in _topologyRemapBuffers.Values) + { + _context.Renderer.DeleteBuffer(indexBuffer.Handle); + } + + _topologyRemapBuffers.Clear(); + } + } + + /// + /// Deletes a buffer if the handle is valid (not null), then sets the handle to null. + /// + /// Buffer handle + private void DestroyIfNotNull(ref BufferHandle handle) + { + if (handle != BufferHandle.Null) + { + _context.Renderer.DeleteBuffer(handle); + handle = BufferHandle.Null; + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs new file mode 100644 index 00000000..59a92508 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs @@ -0,0 +1,535 @@ +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Shader; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw +{ + /// + /// Vertex, tessellation and geometry as compute shader state. + /// + struct VtgAsComputeState + { + private const int ComputeLocalSize = 32; + + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly DeviceStateWithShadow _state; + private readonly VtgAsComputeContext _vacContext; + private readonly ThreedClass _engine; + private readonly ShaderAsCompute _vertexAsCompute; + private readonly ShaderAsCompute _geometryAsCompute; + private readonly IProgram _vertexPassthroughProgram; + private readonly PrimitiveTopology _topology; + private readonly int _count; + private readonly int _instanceCount; + private readonly int _firstIndex; + private readonly int _firstVertex; + private readonly int _firstInstance; + private readonly bool _indexed; + + private readonly int _vertexDataOffset; + private readonly int _vertexDataSize; + private readonly int _geometryVertexDataOffset; + private readonly int _geometryVertexDataSize; + private readonly int _geometryIndexDataOffset; + private readonly int _geometryIndexDataSize; + private readonly int _geometryIndexDataCount; + + /// + /// Creates a new vertex, tessellation and geometry as compute shader state. + /// + /// GPU context + /// GPU channel + /// 3D engine state + /// Vertex as compute context + /// 3D engine + /// Vertex shader converted to compute + /// Optional geometry shader converted to compute + /// Fragment shader with a vertex passthrough shader to feed the compute output into the fragment stage + /// Primitive topology of the draw + /// Index or vertex count of the draw + /// Instance count + /// First index on the index buffer, for indexed draws + /// First vertex on the vertex buffer + /// First instance + /// Whether the draw is indexed + public VtgAsComputeState( + GpuContext context, + GpuChannel channel, + DeviceStateWithShadow state, + VtgAsComputeContext vacContext, + ThreedClass engine, + ShaderAsCompute vertexAsCompute, + ShaderAsCompute geometryAsCompute, + IProgram vertexPassthroughProgram, + PrimitiveTopology topology, + int count, + int instanceCount, + int firstIndex, + int firstVertex, + int firstInstance, + bool indexed) + { + _context = context; + _channel = channel; + _state = state; + _vacContext = vacContext; + _engine = engine; + _vertexAsCompute = vertexAsCompute; + _geometryAsCompute = geometryAsCompute; + _vertexPassthroughProgram = vertexPassthroughProgram; + _topology = topology; + _count = count; + _instanceCount = instanceCount; + _firstIndex = firstIndex; + _firstVertex = firstVertex; + _firstInstance = firstInstance; + _indexed = indexed; + + int vertexDataSize = vertexAsCompute.Reservations.OutputSizeInBytesPerInvocation * count * instanceCount; + + (_vertexDataOffset, _vertexDataSize) = _vacContext.GetVertexDataBuffer(vertexDataSize); + + if (geometryAsCompute != null) + { + int totalPrimitivesCount = VtgAsComputeContext.GetPrimitivesCount(topology, count * instanceCount); + int maxCompleteStrips = GetMaxCompleteStrips(geometryAsCompute.Info.GeometryVerticesPerPrimitive, geometryAsCompute.Info.GeometryMaxOutputVertices); + int totalVerticesCount = totalPrimitivesCount * geometryAsCompute.Info.GeometryMaxOutputVertices * geometryAsCompute.Info.ThreadsPerInputPrimitive; + int geometryVbDataSize = totalVerticesCount * geometryAsCompute.Reservations.OutputSizeInBytesPerInvocation; + int geometryIbDataCount = totalVerticesCount + totalPrimitivesCount * maxCompleteStrips; + int geometryIbDataSize = geometryIbDataCount * sizeof(uint); + + (_geometryVertexDataOffset, _geometryVertexDataSize) = vacContext.GetGeometryVertexDataBuffer(geometryVbDataSize); + (_geometryIndexDataOffset, _geometryIndexDataSize) = vacContext.GetGeometryIndexDataBuffer(geometryIbDataSize); + + _geometryIndexDataCount = geometryIbDataCount; + } + } + + /// + /// Emulates the vertex stage using compute. + /// + public readonly void RunVertex() + { + _context.Renderer.Pipeline.SetProgram(_vertexAsCompute.HostProgram); + + int primitivesCount = VtgAsComputeContext.GetPrimitivesCount(_topology, _count); + + _vacContext.VertexInfoBufferUpdater.SetVertexCounts(_count, _instanceCount, _firstVertex, _firstInstance); + _vacContext.VertexInfoBufferUpdater.SetGeometryCounts(primitivesCount); + + for (int index = 0; index < Constants.TotalVertexAttribs; index++) + { + var vertexAttrib = _state.State.VertexAttribState[index]; + + if (!FormatTable.TryGetSingleComponentAttribFormat(vertexAttrib.UnpackFormat(), out Format format, out int componentsCount)) + { + Logger.Debug?.Print(LogClass.Gpu, $"Invalid attribute format 0x{vertexAttrib.UnpackFormat():X}."); + + format = vertexAttrib.UnpackType() switch + { + VertexAttribType.Sint => Format.R32Sint, + VertexAttribType.Uint => Format.R32Uint, + _ => Format.R32Float + }; + + componentsCount = 4; + } + + if (vertexAttrib.UnpackIsConstant()) + { + _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount); + _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0); + SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format); + continue; + } + + int bufferIndex = vertexAttrib.UnpackBufferIndex(); + + GpuVa endAddress = _state.State.VertexBufferEndAddress[bufferIndex]; + var vertexBuffer = _state.State.VertexBufferState[bufferIndex]; + bool instanced = _state.State.VertexBufferInstanced[bufferIndex]; + + ulong address = vertexBuffer.Address.Pack(); + + if (!vertexBuffer.UnpackEnable() || !_channel.MemoryManager.IsMapped(address)) + { + _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, 0, componentsCount); + _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, 0, 0); + SetDummyBufferTexture(_vertexAsCompute.Reservations, index, format); + continue; + } + + int vbStride = vertexBuffer.UnpackStride(); + ulong vbSize = GetVertexBufferSize(address, endAddress.Pack(), vbStride, _indexed, instanced, _firstVertex, _count); + + ulong oldVbSize = vbSize; + + ulong attributeOffset = (ulong)vertexAttrib.UnpackOffset(); + int componentSize = format.GetScalarSize(); + + address += attributeOffset; + + ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1); + + vbSize = Align(vbSize - attributeOffset + misalign, componentSize); + + SetBufferTexture(_vertexAsCompute.Reservations, index, format, address - misalign, vbSize); + + _vacContext.VertexInfoBufferUpdater.SetVertexStride(index, vbStride / componentSize, componentsCount); + _vacContext.VertexInfoBufferUpdater.SetVertexOffset(index, (int)misalign / componentSize, instanced ? vertexBuffer.Divisor : 0); + } + + if (_indexed) + { + SetIndexBufferTexture(_vertexAsCompute.Reservations, _firstIndex, _count, out int ibOffset); + _vacContext.VertexInfoBufferUpdater.SetIndexBufferOffset(ibOffset); + } + else + { + SetSequentialIndexBufferTexture(_vertexAsCompute.Reservations, _count); + _vacContext.VertexInfoBufferUpdater.SetIndexBufferOffset(0); + } + + int vertexInfoBinding = _vertexAsCompute.Reservations.VertexInfoConstantBufferBinding; + BufferRange vertexInfoRange = new(_vacContext.VertexInfoBufferUpdater.Handle, 0, VertexInfoBuffer.RequiredSize); + _context.Renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(vertexInfoBinding, vertexInfoRange) }); + + int vertexDataBinding = _vertexAsCompute.Reservations.VertexOutputStorageBufferBinding; + BufferRange vertexDataRange = _vacContext.GetVertexDataBufferRange(_vertexDataOffset, _vertexDataSize); + _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[] { new BufferAssignment(vertexDataBinding, vertexDataRange) }); + + _vacContext.VertexInfoBufferUpdater.Commit(); + + _context.Renderer.Pipeline.DispatchCompute( + BitUtils.DivRoundUp(_count, ComputeLocalSize), + BitUtils.DivRoundUp(_instanceCount, ComputeLocalSize), + 1); + } + + /// + /// Emulates the geometry stage using compute, if it exists, otherwise does nothing. + /// + public readonly void RunGeometry() + { + if (_geometryAsCompute == null) + { + return; + } + + int primitivesCount = VtgAsComputeContext.GetPrimitivesCount(_topology, _count); + + _vacContext.VertexInfoBufferUpdater.SetVertexCounts(_count, _instanceCount, _firstVertex, _firstInstance); + _vacContext.VertexInfoBufferUpdater.SetGeometryCounts(primitivesCount); + _vacContext.VertexInfoBufferUpdater.Commit(); + + int vertexInfoBinding = _vertexAsCompute.Reservations.VertexInfoConstantBufferBinding; + BufferRange vertexInfoRange = new(_vacContext.VertexInfoBufferUpdater.Handle, 0, VertexInfoBuffer.RequiredSize); + _context.Renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(vertexInfoBinding, vertexInfoRange) }); + + int vertexDataBinding = _vertexAsCompute.Reservations.VertexOutputStorageBufferBinding; + + // Wait until compute is done. + // TODO: Batch compute and draw operations to avoid pipeline stalls. + _context.Renderer.Pipeline.Barrier(); + _context.Renderer.Pipeline.SetProgram(_geometryAsCompute.HostProgram); + + SetTopologyRemapBufferTexture(_geometryAsCompute.Reservations, _topology, _count); + + int geometryVbBinding = _geometryAsCompute.Reservations.GeometryVertexOutputStorageBufferBinding; + int geometryIbBinding = _geometryAsCompute.Reservations.GeometryIndexOutputStorageBufferBinding; + + BufferRange vertexDataRange = _vacContext.GetVertexDataBufferRange(_vertexDataOffset, _vertexDataSize); + BufferRange vertexBuffer = _vacContext.GetGeometryVertexDataBufferRange(_geometryVertexDataOffset, _geometryVertexDataSize); + BufferRange indexBuffer = _vacContext.GetGeometryIndexDataBufferRange(_geometryIndexDataOffset, _geometryIndexDataSize); + + _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[] + { + new BufferAssignment(vertexDataBinding, vertexDataRange), + new BufferAssignment(geometryVbBinding, vertexBuffer), + new BufferAssignment(geometryIbBinding, indexBuffer), + }); + + _context.Renderer.Pipeline.DispatchCompute( + BitUtils.DivRoundUp(primitivesCount, ComputeLocalSize), + BitUtils.DivRoundUp(_instanceCount, ComputeLocalSize), + _geometryAsCompute.Info.ThreadsPerInputPrimitive); + } + + /// + /// Performs a draw using the data produced on the vertex, tessellation and geometry stages, + /// if rasterizer discard is disabled. + /// + public readonly void RunFragment() + { + bool tfEnabled = _state.State.TfEnable; + + if (!_state.State.RasterizeEnable && (!tfEnabled || !_context.Capabilities.SupportsTransformFeedback)) + { + // No need to run fragment if rasterizer discard is enabled, + // and we are emulating transform feedback or transform feedback is disabled. + + // Note: We might skip geometry shader here, but right now, this is fine, + // because the only cases that triggers VTG to compute are geometry shader + // being not supported, or the vertex pipeline doing store operations. + // If the geometry shader does not do any store and rasterizer discard is enabled, the geometry shader can be skipped. + // If the geometry shader does have stores, it would have been converted to compute too if stores are not supported. + + return; + } + + int vertexDataBinding = _vertexAsCompute.Reservations.VertexOutputStorageBufferBinding; + + _context.Renderer.Pipeline.Barrier(); + + _vacContext.VertexInfoBufferUpdater.SetVertexCounts(_count, _instanceCount, _firstVertex, _firstInstance); + _vacContext.VertexInfoBufferUpdater.Commit(); + + if (_geometryAsCompute != null) + { + BufferRange vertexBuffer = _vacContext.GetGeometryVertexDataBufferRange(_geometryVertexDataOffset, _geometryVertexDataSize); + BufferRange indexBuffer = _vacContext.GetGeometryIndexDataBufferRange(_geometryIndexDataOffset, _geometryIndexDataSize); + + _context.Renderer.Pipeline.SetProgram(_vertexPassthroughProgram); + _context.Renderer.Pipeline.SetIndexBuffer(indexBuffer, IndexType.UInt); + _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[] { new BufferAssignment(vertexDataBinding, vertexBuffer) }); + + _context.Renderer.Pipeline.SetPrimitiveRestart(true, -1); + _context.Renderer.Pipeline.SetPrimitiveTopology(GetGeometryOutputTopology(_geometryAsCompute.Info.GeometryVerticesPerPrimitive)); + + _context.Renderer.Pipeline.DrawIndexed(_geometryIndexDataCount, 1, 0, 0, 0); + + _engine.ForceStateDirtyByIndex(StateUpdater.IndexBufferStateIndex); + _engine.ForceStateDirtyByIndex(StateUpdater.PrimitiveRestartStateIndex); + } + else + { + BufferRange vertexDataRange = _vacContext.GetVertexDataBufferRange(_vertexDataOffset, _vertexDataSize); + + _context.Renderer.Pipeline.SetProgram(_vertexPassthroughProgram); + _context.Renderer.Pipeline.SetStorageBuffers(stackalloc[] { new BufferAssignment(vertexDataBinding, vertexDataRange) }); + _context.Renderer.Pipeline.Draw(_count, _instanceCount, 0, 0); + } + } + + /// + /// Gets a strip primitive topology from the vertices per primitive count. + /// + /// Vertices per primitive count + /// Primitive topology + private static PrimitiveTopology GetGeometryOutputTopology(int verticesPerPrimitive) + { + return verticesPerPrimitive switch + { + 3 => PrimitiveTopology.TriangleStrip, + 2 => PrimitiveTopology.LineStrip, + _ => PrimitiveTopology.Points, + }; + } + + /// + /// Gets the maximum number of complete primitive strips for a vertex count. + /// + /// Vertices per primitive count + /// Maximum geometry shader output vertices count + /// Maximum number of complete primitive strips + private static int GetMaxCompleteStrips(int verticesPerPrimitive, int maxOutputVertices) + { + return maxOutputVertices / verticesPerPrimitive; + } + + /// + /// Binds a dummy buffer as vertex buffer into a buffer texture. + /// + /// Shader resource binding reservations + /// Buffer texture index + /// Buffer texture format + private readonly void SetDummyBufferTexture(ResourceReservations reservations, int index, Format format) + { + ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format); + bufferTexture.SetStorage(_vacContext.GetDummyBufferRange()); + + _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.GetVertexBufferTextureBinding(index), bufferTexture, null); + } + + /// + /// Binds a vertex buffer into a buffer texture. + /// + /// Shader resource binding reservations + /// Buffer texture index + /// Buffer texture format + /// Address of the vertex buffer + /// Size of the buffer in bytes + private readonly void SetBufferTexture(ResourceReservations reservations, int index, Format format, ulong address, ulong size) + { + var memoryManager = _channel.MemoryManager; + + address = memoryManager.Translate(address); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(address, size); + + ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format); + bufferTexture.SetStorage(range); + + _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.GetVertexBufferTextureBinding(index), bufferTexture, null); + } + + /// + /// Binds the index buffer into a buffer texture. + /// + /// Shader resource binding reservations + /// First index of the index buffer + /// Index count + /// Offset that should be added when accessing the buffer texture on the shader + private readonly void SetIndexBufferTexture(ResourceReservations reservations, int firstIndex, int count, out int misalignedOffset) + { + ulong address = _state.State.IndexBufferState.Address.Pack(); + ulong indexOffset = (ulong)firstIndex; + ulong size = (ulong)count; + + int shift = 0; + Format format = Format.R8Uint; + + switch (_state.State.IndexBufferState.Type) + { + case IndexType.UShort: + shift = 1; + format = Format.R16Uint; + break; + case IndexType.UInt: + shift = 2; + format = Format.R32Uint; + break; + } + + indexOffset <<= shift; + size <<= shift; + + var memoryManager = _channel.MemoryManager; + + address = memoryManager.Translate(address + indexOffset); + ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(address - misalign, size + misalign); + misalignedOffset = (int)misalign >> shift; + + SetIndexBufferTexture(reservations, range, format); + } + + /// + /// Sets the host buffer texture for the index buffer. + /// + /// Shader resource binding reservations + /// Index buffer range + /// Index buffer format + private readonly void SetIndexBufferTexture(ResourceReservations reservations, BufferRange range, Format format) + { + ITexture bufferTexture = _vacContext.EnsureBufferTexture(0, format); + bufferTexture.SetStorage(range); + + _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.IndexBufferTextureBinding, bufferTexture, null); + } + + /// + /// Sets the host buffer texture for the topology remap buffer. + /// + /// Shader resource binding reservations + /// Input topology + /// Input vertex count + private readonly void SetTopologyRemapBufferTexture(ResourceReservations reservations, PrimitiveTopology topology, int count) + { + ITexture bufferTexture = _vacContext.EnsureBufferTexture(1, Format.R32Uint); + bufferTexture.SetStorage(_vacContext.GetOrCreateTopologyRemapBuffer(topology, count)); + + _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.TopologyRemapBufferTextureBinding, bufferTexture, null); + } + + /// + /// Sets the host buffer texture to a generated sequential index buffer. + /// + /// Shader resource binding reservations + /// Vertex count + private readonly void SetSequentialIndexBufferTexture(ResourceReservations reservations, int count) + { + BufferHandle sequentialIndexBuffer = _vacContext.GetSequentialIndexBuffer(count); + + ITexture bufferTexture = _vacContext.EnsureBufferTexture(0, Format.R32Uint); + bufferTexture.SetStorage(new BufferRange(sequentialIndexBuffer, 0, count * sizeof(uint))); + + _context.Renderer.Pipeline.SetTextureAndSampler(ShaderStage.Compute, reservations.IndexBufferTextureBinding, bufferTexture, null); + } + + /// + /// Gets the size of a vertex buffer based on the current 3D engine state. + /// + /// Vertex buffer address + /// Vertex buffer end address (exclusive) + /// Vertex buffer stride + /// Whether the draw is indexed + /// Whether the draw is instanced + /// First vertex index + /// Vertex count + /// Size of the vertex buffer, in bytes + private readonly ulong GetVertexBufferSize(ulong vbAddress, ulong vbEndAddress, int vbStride, bool indexed, bool instanced, int firstVertex, int vertexCount) + { + IndexType indexType = _state.State.IndexBufferState.Type; + bool indexTypeSmall = indexType == IndexType.UByte || indexType == IndexType.UShort; + ulong vbSize = vbEndAddress - vbAddress + 1; + ulong size; + + if (indexed || vbStride == 0 || instanced) + { + // This size may be (much) larger than the real vertex buffer size. + // Avoid calculating it this way, unless we don't have any other option. + + size = vbSize; + + if (vbStride > 0 && indexTypeSmall && indexed && !instanced) + { + // If the index type is a small integer type, then we might be still able + // to reduce the vertex buffer size based on the maximum possible index value. + + ulong maxVertexBufferSize = indexType == IndexType.UByte ? 0x100UL : 0x10000UL; + + maxVertexBufferSize += _state.State.FirstVertex; + maxVertexBufferSize *= (uint)vbStride; + + size = Math.Min(size, maxVertexBufferSize); + } + } + else + { + // For non-indexed draws, we can guess the size from the vertex count + // and stride. + + int firstInstance = (int)_state.State.FirstInstance; + + size = Math.Min(vbSize, (ulong)((firstInstance + firstVertex + vertexCount) * vbStride)); + } + + return size; + } + + /// + /// Aligns a size to a given alignment value. + /// + /// Size + /// Alignment + /// Aligned size + private static ulong Align(ulong size, int alignment) + { + ulong align = (ulong)alignment; + + size += align - 1; + + size /= align; + size *= align; + + return size; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs index d7ee24b1..18e7ac00 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw; using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Memory; using System; @@ -8,7 +9,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// /// Draw manager. /// - class DrawManager + class DrawManager : IDisposable { // Since we don't know the index buffer size for indirect draws, // we must assume a minimum and maximum size and use that for buffer data update purposes. @@ -20,6 +21,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed private readonly DeviceStateWithShadow _state; private readonly DrawState _drawState; private readonly SpecializationStateUpdater _currentSpecState; + private readonly VtgAsCompute _vtgAsCompute; private bool _topologySet; private bool _instancedDrawPending; @@ -53,6 +55,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _state = state; _drawState = drawState; _currentSpecState = spec; + _vtgAsCompute = new(context, channel, state); } /// @@ -127,7 +130,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { if (renderEnable == ConditionalRenderEnabled.False) { - PerformDeferredDraws(); + PerformDeferredDraws(engine); } _drawState.DrawIndexed = false; @@ -190,13 +193,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _channel.BufferManager.SetIndexBuffer(br, IndexType.UInt); - _context.Renderer.Pipeline.DrawIndexed(inlineIndexCount, 1, firstIndex, firstVertex, firstInstance); + DrawImpl(engine, inlineIndexCount, 1, firstIndex, firstVertex, firstInstance, indexed: true); } else if (_drawState.DrawIndexed) { int firstVertex = (int)_state.State.FirstVertex; - _context.Renderer.Pipeline.DrawIndexed(indexCount, 1, firstIndex, firstVertex, firstInstance); + DrawImpl(engine, indexCount, 1, firstIndex, firstVertex, firstInstance, indexed: true); } else { @@ -204,7 +207,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed var drawState = _state.State.VertexBufferDrawState; #pragma warning restore IDE0059 - _context.Renderer.Pipeline.Draw(drawVertexCount, 1, drawFirstVertex, firstInstance); + DrawImpl(engine, drawVertexCount, 1, 0, drawFirstVertex, firstInstance, indexed: false); } _drawState.DrawIndexed = false; @@ -219,24 +222,26 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// Starts draw. /// This sets primitive type and instanced draw parameters. /// + /// 3D engine where this method is being called /// Method call argument - public void DrawBegin(int argument) + public void DrawBegin(ThreedClass engine, int argument) { bool incrementInstance = (argument & (1 << 26)) != 0; bool resetInstance = (argument & (1 << 27)) == 0; PrimitiveType type = (PrimitiveType)(argument & 0xffff); - DrawBegin(incrementInstance, resetInstance, type); + DrawBegin(engine, incrementInstance, resetInstance, type); } /// /// Starts draw. /// This sets primitive type and instanced draw parameters. /// + /// 3D engine where this method is being called /// Indicates if the current instance should be incremented /// Indicates if the current instance should be set to zero /// Primitive type - private void DrawBegin(bool incrementInstance, bool resetInstance, PrimitiveType primitiveType) + private void DrawBegin(ThreedClass engine, bool incrementInstance, bool resetInstance, PrimitiveType primitiveType) { if (incrementInstance) { @@ -244,7 +249,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } else if (resetInstance) { - PerformDeferredDraws(); + PerformDeferredDraws(engine); _instanceIndex = 0; } @@ -364,7 +369,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// True to increment the current instance value, false otherwise private void DrawIndexBufferBeginEndInstance(ThreedClass engine, int argument, bool instanced) { - DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf)); + DrawBegin(engine, instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf)); int firstIndex = argument & 0xffff; int indexCount = (argument >> 16) & 0xfff; @@ -409,7 +414,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// True to increment the current instance value, false otherwise private void DrawVertexArrayBeginEndInstance(ThreedClass engine, int argument, bool instanced) { - DrawBegin(instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf)); + DrawBegin(engine, instanced, !instanced, (PrimitiveType)((argument >> 28) & 0xf)); int firstVertex = argument & 0xffff; int vertexCount = (argument >> 16) & 0xfff; @@ -541,23 +546,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed engine.UpdateState(); - if (instanceCount > 1) - { - // Must be called after UpdateState as it assumes the shader state - // has already been set, and that bindings have been updated already. - - _channel.BufferManager.SetInstancedDrawVertexCount(count); - } + DrawImpl(engine, count, instanceCount, firstIndex, firstVertex, firstInstance, indexed); if (indexed) { - _context.Renderer.Pipeline.DrawIndexed(count, instanceCount, firstIndex, firstVertex, firstInstance); _state.State.FirstVertex = 0; } - else - { - _context.Renderer.Pipeline.Draw(count, instanceCount, firstVertex, firstInstance); - } _state.State.FirstInstance = 0; @@ -569,6 +563,67 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } } + /// + /// Performs a indexed or non-indexed draw. + /// + /// 3D engine where this method is being called + /// Index count for indexed draws, vertex count for non-indexed draws + /// Instance count + /// First index on the index buffer for indexed draws, ignored for non-indexed draws + /// First vertex on the vertex buffer + /// First instance + /// True if the draw is indexed, false otherwise + private void DrawImpl( + ThreedClass engine, + int count, + int instanceCount, + int firstIndex, + int firstVertex, + int firstInstance, + bool indexed) + { + if (instanceCount > 1) + { + _channel.BufferManager.SetInstancedDrawVertexCount(count); + } + + if (_drawState.VertexAsCompute != null) + { + _vtgAsCompute.DrawAsCompute( + engine, + _drawState.VertexAsCompute, + _drawState.GeometryAsCompute, + _drawState.VertexPassthrough, + _drawState.Topology, + count, + instanceCount, + firstIndex, + firstVertex, + firstInstance, + indexed); + + if (_drawState.GeometryAsCompute != null) + { + // Geometry draws need to change the topology, so we need to set it here again + // if we are going to do a regular draw. + // Would have been better to do that on the callee, but doing it here + // avoids having to pass the draw manager instance. + ForceStateDirty(); + } + } + else + { + if (indexed) + { + _context.Renderer.Pipeline.DrawIndexed(count, instanceCount, firstIndex, firstVertex, firstInstance); + } + else + { + _context.Renderer.Pipeline.Draw(count, instanceCount, firstVertex, firstInstance); + } + } + } + /// /// Performs a indirect draw, with parameters from a GPU buffer. /// @@ -667,43 +722,42 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// Once we detect the last instanced draw, then we perform the host instanced draw, /// with the accumulated instance count. /// - public void PerformDeferredDraws() + /// 3D engine where this method is being called + public void PerformDeferredDraws(ThreedClass engine) { // Perform any pending instanced draw. if (_instancedDrawPending) { _instancedDrawPending = false; + int instanceCount = _instanceIndex + 1; + int firstInstance = _instancedFirstInstance; bool indexedInline = _instancedIndexedInline; if (_instancedIndexed || indexedInline) { + int indexCount = _instancedIndexCount; + if (indexedInline) { int inlineIndexCount = _drawState.IbStreamer.GetAndResetInlineIndexCount(_context.Renderer); BufferRange br = new(_drawState.IbStreamer.GetInlineIndexBuffer(), 0, inlineIndexCount * 4); _channel.BufferManager.SetIndexBuffer(br, IndexType.UInt); + indexCount = inlineIndexCount; } - _channel.BufferManager.SetInstancedDrawVertexCount(_instancedIndexCount); + int firstIndex = _instancedFirstIndex; + int firstVertex = _instancedFirstVertex; - _context.Renderer.Pipeline.DrawIndexed( - _instancedIndexCount, - _instanceIndex + 1, - _instancedFirstIndex, - _instancedFirstVertex, - _instancedFirstInstance); + DrawImpl(engine, indexCount, instanceCount, firstIndex, firstVertex, firstInstance, indexed: true); } else { - _channel.BufferManager.SetInstancedDrawVertexCount(_instancedDrawStateCount); + int vertexCount = _instancedDrawStateCount; + int firstVertex = _instancedDrawStateFirst; - _context.Renderer.Pipeline.Draw( - _instancedDrawStateCount, - _instanceIndex + 1, - _instancedDrawStateFirst, - _instancedFirstInstance); + DrawImpl(engine, vertexCount, instanceCount, 0, firstVertex, firstInstance, indexed: false); } } } @@ -866,5 +920,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _context.Renderer.Pipeline.EndHostConditionalRendering(); } } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _vtgAsCompute.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs index 12099aef..cb43b002 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawState.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Shader; namespace Ryujinx.Graphics.Gpu.Engine.Threed { @@ -61,5 +62,20 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// Index buffer data streamer for inline index buffer updates, such as those used in legacy OpenGL. /// public IbStreamer IbStreamer = new(); + + /// + /// If the vertex shader is emulated on compute, this should be set to the compute program, otherwise it should be null. + /// + public ShaderAsCompute VertexAsCompute; + + /// + /// If a geometry shader exists and is emulated on compute, this should be set to the compute program, otherwise it should be null. + /// + public ShaderAsCompute GeometryAsCompute; + + /// + /// If the vertex shader is emulated on compute, this should be set to the passthrough vertex program, otherwise it should be null. + /// + public IProgram VertexPassthrough; } } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs index e0607fbf..4fbbee3b 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/SpecializationStateUpdater.cs @@ -218,11 +218,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { bool changed = false; ref Array32 attributeTypes = ref _graphics.AttributeTypes; - bool supportsScaledFormats = _context.Capabilities.SupportsScaledVertexFormats; + bool mayConvertVtgToCompute = ShaderCache.MayConvertVtgToCompute(ref _context.Capabilities); + bool supportsScaledFormats = _context.Capabilities.SupportsScaledVertexFormats && !mayConvertVtgToCompute; for (int location = 0; location < state.Length; location++) { VertexAttribType type = state[location].UnpackType(); + VertexAttribSize size = state[location].UnpackSize(); AttributeType value; @@ -247,6 +249,18 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed }; } + if (mayConvertVtgToCompute && (size == VertexAttribSize.Rgb10A2 || size == VertexAttribSize.Rg11B10)) + { + value |= AttributeType.Packed; + + if (type == VertexAttribType.Snorm || + type == VertexAttribType.Sint || + type == VertexAttribType.Sscaled) + { + value |= AttributeType.PackedRgb10A2Signed; + } + } + if (attributeTypes[location] != value) { attributeTypes[location] = value; diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs index 1f919d9b..48a497b5 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/StateUpdater.cs @@ -20,6 +20,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed public const int RasterizerStateIndex = 15; public const int ScissorStateIndex = 16; public const int VertexBufferStateIndex = 0; + public const int IndexBufferStateIndex = 23; public const int PrimitiveRestartStateIndex = 12; public const int RenderTargetStateIndex = 27; @@ -290,7 +291,13 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed // of the shader for the new state. if (_shaderSpecState != null && _currentSpecState.HasChanged()) { - if (!_shaderSpecState.MatchesGraphics(_channel, ref _currentSpecState.GetPoolState(), ref _currentSpecState.GetGraphicsState(), _vsUsesDrawParameters, false)) + if (!_shaderSpecState.MatchesGraphics( + _channel, + ref _currentSpecState.GetPoolState(), + ref _currentSpecState.GetGraphicsState(), + _drawState.VertexAsCompute != null, + _vsUsesDrawParameters, + checkTextures: false)) { // Shader must be reloaded. _vtgWritesRtLayer should not change. UpdateShaderState(); @@ -1453,6 +1460,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _fsReadsFragCoord = false; } + if (gs.VertexAsCompute != null) + { + _drawState.VertexAsCompute = gs.VertexAsCompute; + _drawState.GeometryAsCompute = gs.GeometryAsCompute; + _drawState.VertexPassthrough = gs.HostProgram; + } + else + { + _drawState.VertexAsCompute = null; + _drawState.GeometryAsCompute = null; + _drawState.VertexPassthrough = null; + } + _context.Renderer.Pipeline.SetProgram(gs.HostProgram); } @@ -1540,5 +1560,14 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { _updateTracker.ForceDirty(ShaderStateIndex); } + + /// + /// Forces a register group as dirty, by index. + /// + /// Index of the group to be dirtied + public void ForceDirty(int groupIndex) + { + _updateTracker.ForceDirty(groupIndex); + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs index 1f662890..7bc2970f 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs @@ -13,7 +13,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// /// Represents a 3D engine class. /// - class ThreedClass : IDeviceState + class ThreedClass : IDeviceState, IDisposable { private readonly GpuContext _context; private readonly GPFifoClass _fifoClass; @@ -178,6 +178,15 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed _stateUpdater.SetDirty(offset); } + /// + /// Marks the specified register range for a group index as dirty, forcing the associated state to update on the next draw. + /// + /// Index of the group to dirty + public void ForceStateDirtyByIndex(int groupIndex) + { + _stateUpdater.ForceDirty(groupIndex); + } + /// /// Forces the shaders to be rebound on the next draw. /// @@ -207,7 +216,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// public void PerformDeferredDraws() { - _drawManager.PerformDeferredDraws(); + _drawManager.PerformDeferredDraws(this); } /// @@ -402,7 +411,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed /// Method call argument private void DrawBegin(int argument) { - _drawManager.DrawBegin(argument); + _drawManager.DrawBegin(this, argument); } /// @@ -617,5 +626,19 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { _drawManager.Clear(this, argument, layerCount); } + + protected virtual void Dispose(bool disposing) + { + if (disposing) + { + _drawManager.Dispose(); + } + } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } } } diff --git a/src/Ryujinx.Graphics.Gpu/GpuChannel.cs b/src/Ryujinx.Graphics.Gpu/GpuChannel.cs index 8fe64381..d70c9645 100644 --- a/src/Ryujinx.Graphics.Gpu/GpuChannel.cs +++ b/src/Ryujinx.Graphics.Gpu/GpuChannel.cs @@ -135,6 +135,7 @@ namespace Ryujinx.Graphics.Gpu /// private void Destroy() { + _processor.Dispose(); TextureManager.Dispose(); var oldMemoryManager = Interlocked.Exchange(ref _memoryManager, null); diff --git a/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs b/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs index fcc6b8cf..1b517e63 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/FormatTable.cs @@ -557,6 +557,91 @@ namespace Ryujinx.Graphics.Gpu.Image }; #pragma warning restore IDE0055 + // Note: Some of those formats have been changed and requires conversion on the shader, + // as GPUs don't support them when used as buffer texture format. + private static readonly Dictionary _singleComponentAttribFormats = new() + { + { VertexAttributeFormat.R8Unorm, (Format.R8Unorm, 1) }, + { VertexAttributeFormat.R8Snorm, (Format.R8Snorm, 1) }, + { VertexAttributeFormat.R8Uint, (Format.R8Uint, 1) }, + { VertexAttributeFormat.R8Sint, (Format.R8Sint, 1) }, + { VertexAttributeFormat.R16Float, (Format.R16Float, 1) }, + { VertexAttributeFormat.R16Unorm, (Format.R16Unorm, 1) }, + { VertexAttributeFormat.R16Snorm, (Format.R16Snorm, 1) }, + { VertexAttributeFormat.R16Uint, (Format.R16Uint, 1) }, + { VertexAttributeFormat.R16Sint, (Format.R16Sint, 1) }, + { VertexAttributeFormat.R32Float, (Format.R32Float, 1) }, + { VertexAttributeFormat.R32Uint, (Format.R32Uint, 1) }, + { VertexAttributeFormat.R32Sint, (Format.R32Sint, 1) }, + { VertexAttributeFormat.R8G8Unorm, (Format.R8Unorm, 2) }, + { VertexAttributeFormat.R8G8Snorm, (Format.R8Snorm, 2) }, + { VertexAttributeFormat.R8G8Uint, (Format.R8Uint, 2) }, + { VertexAttributeFormat.R8G8Sint, (Format.R8Sint, 2) }, + { VertexAttributeFormat.R16G16Float, (Format.R16Float, 2) }, + { VertexAttributeFormat.R16G16Unorm, (Format.R16Unorm, 2) }, + { VertexAttributeFormat.R16G16Snorm, (Format.R16Snorm, 2) }, + { VertexAttributeFormat.R16G16Uint, (Format.R16Uint, 2) }, + { VertexAttributeFormat.R16G16Sint, (Format.R16Sint, 2) }, + { VertexAttributeFormat.R32G32Float, (Format.R32Float, 2) }, + { VertexAttributeFormat.R32G32Uint, (Format.R32Uint, 2) }, + { VertexAttributeFormat.R32G32Sint, (Format.R32Sint, 2) }, + { VertexAttributeFormat.R8G8B8Unorm, (Format.R8Unorm, 3) }, + { VertexAttributeFormat.R8G8B8Snorm, (Format.R8Snorm, 3) }, + { VertexAttributeFormat.R8G8B8Uint, (Format.R8Uint, 3) }, + { VertexAttributeFormat.R8G8B8Sint, (Format.R8Sint, 3) }, + { VertexAttributeFormat.R16G16B16Float, (Format.R16Float, 3) }, + { VertexAttributeFormat.R16G16B16Unorm, (Format.R16Unorm, 3) }, + { VertexAttributeFormat.R16G16B16Snorm, (Format.R16Snorm, 3) }, + { VertexAttributeFormat.R16G16B16Uint, (Format.R16Uint, 3) }, + { VertexAttributeFormat.R16G16B16Sint, (Format.R16Sint, 3) }, + { VertexAttributeFormat.R32G32B32Float, (Format.R32Float, 3) }, + { VertexAttributeFormat.R32G32B32Uint, (Format.R32Uint, 3) }, + { VertexAttributeFormat.R32G32B32Sint, (Format.R32Sint, 3) }, + { VertexAttributeFormat.R8G8B8A8Unorm, (Format.R8Unorm, 4) }, + { VertexAttributeFormat.R8G8B8A8Snorm, (Format.R8Snorm, 4) }, + { VertexAttributeFormat.R8G8B8A8Uint, (Format.R8Uint, 4) }, + { VertexAttributeFormat.R8G8B8A8Sint, (Format.R8Sint, 4) }, + { VertexAttributeFormat.R16G16B16A16Float, (Format.R16Float, 4) }, + { VertexAttributeFormat.R16G16B16A16Unorm, (Format.R16Unorm, 4) }, + { VertexAttributeFormat.R16G16B16A16Snorm, (Format.R16Snorm, 4) }, + { VertexAttributeFormat.R16G16B16A16Uint, (Format.R16Uint, 4) }, + { VertexAttributeFormat.R16G16B16A16Sint, (Format.R16Sint, 4) }, + { VertexAttributeFormat.R32G32B32A32Float, (Format.R32Float, 4) }, + { VertexAttributeFormat.R32G32B32A32Uint, (Format.R32Uint, 4) }, + { VertexAttributeFormat.R32G32B32A32Sint, (Format.R32Sint, 4) }, + { VertexAttributeFormat.A2B10G10R10Unorm, (Format.R10G10B10A2Unorm, 4) }, + { VertexAttributeFormat.A2B10G10R10Uint, (Format.R10G10B10A2Uint, 4) }, + { VertexAttributeFormat.B10G11R11Float, (Format.R11G11B10Float, 3) }, + { VertexAttributeFormat.R8Uscaled, (Format.R8Uint, 1) }, // Uscaled -> Uint + { VertexAttributeFormat.R8Sscaled, (Format.R8Sint, 1) }, // Sscaled -> Sint + { VertexAttributeFormat.R16Uscaled, (Format.R16Uint, 1) }, // Uscaled -> Uint + { VertexAttributeFormat.R16Sscaled, (Format.R16Sint, 1) }, // Sscaled -> Sint + { VertexAttributeFormat.R32Uscaled, (Format.R32Uint, 1) }, // Uscaled -> Uint + { VertexAttributeFormat.R32Sscaled, (Format.R32Sint, 1) }, // Sscaled -> Sint + { VertexAttributeFormat.R8G8Uscaled, (Format.R8Uint, 2) }, // Uscaled -> Uint + { VertexAttributeFormat.R8G8Sscaled, (Format.R8Sint, 2) }, // Sscaled -> Sint + { VertexAttributeFormat.R16G16Uscaled, (Format.R16Uint, 2) }, // Uscaled -> Uint + { VertexAttributeFormat.R16G16Sscaled, (Format.R16Sint, 2) }, // Sscaled -> Sint + { VertexAttributeFormat.R32G32Uscaled, (Format.R32Uint, 2) }, // Uscaled -> Uint + { VertexAttributeFormat.R32G32Sscaled, (Format.R32Sint, 2) }, // Sscaled -> Sint + { VertexAttributeFormat.R8G8B8Uscaled, (Format.R8Uint, 3) }, // Uscaled -> Uint + { VertexAttributeFormat.R8G8B8Sscaled, (Format.R8Sint, 3) }, // Sscaled -> Sint + { VertexAttributeFormat.R16G16B16Uscaled, (Format.R16Uint, 3) }, // Uscaled -> Uint + { VertexAttributeFormat.R16G16B16Sscaled, (Format.R16Sint, 3) }, // Sscaled -> Sint + { VertexAttributeFormat.R32G32B32Uscaled, (Format.R32Uint, 3) }, // Uscaled -> Uint + { VertexAttributeFormat.R32G32B32Sscaled, (Format.R32Sint , 3) }, // Sscaled -> Sint + { VertexAttributeFormat.R8G8B8A8Uscaled, (Format.R8Uint, 4) }, // Uscaled -> Uint + { VertexAttributeFormat.R8G8B8A8Sscaled, (Format.R8Sint, 4) }, // Sscaled -> Sint + { VertexAttributeFormat.R16G16B16A16Uscaled, (Format.R16Uint, 4) }, // Uscaled -> Uint + { VertexAttributeFormat.R16G16B16A16Sscaled, (Format.R16Sint, 4) }, // Sscaled -> Sint + { VertexAttributeFormat.R32G32B32A32Uscaled, (Format.R32Uint, 4) }, // Uscaled -> Uint + { VertexAttributeFormat.R32G32B32A32Sscaled, (Format.R32Sint, 4) }, // Sscaled -> Sint + { VertexAttributeFormat.A2B10G10R10Snorm, (Format.R10G10B10A2Uint, 4) }, // Snorm -> Uint + { VertexAttributeFormat.A2B10G10R10Sint, (Format.R10G10B10A2Uint, 4) }, // Sint -> Uint + { VertexAttributeFormat.A2B10G10R10Uscaled, (Format.R10G10B10A2Uint, 4) }, // Uscaled -> Uint + { VertexAttributeFormat.A2B10G10R10Sscaled, (Format.R10G10B10A2Sint, 4) } // Sscaled -> Sint + }; + /// /// Try getting the texture format from an encoded format integer from the Maxwell texture descriptor. /// @@ -581,5 +666,22 @@ namespace Ryujinx.Graphics.Gpu.Image { return _attribFormats.TryGetValue((VertexAttributeFormat)encoded, out format); } + + /// + /// Try getting a single component vertex attribute format from an encoded format integer from Maxwell attribute registers. + /// + /// The encoded format integer from the attribute registers + /// The output single component vertex attribute format + /// Number of components that the format has + /// True if the format is valid, false otherwise + public static bool TryGetSingleComponentAttribFormat(uint encoded, out Format format, out int componentsCount) + { + bool result = _singleComponentAttribFormats.TryGetValue((VertexAttributeFormat)encoded, out var tuple); + + format = tuple.Item1; + componentsCount = tuple.Item2; + + return result; + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index bf4cb5d0..8e9b4b85 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -6,7 +6,6 @@ using Ryujinx.Graphics.Shader; using System; using System.Collections.Generic; using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; namespace Ryujinx.Graphics.Gpu.Memory { @@ -15,9 +14,6 @@ namespace Ryujinx.Graphics.Gpu.Memory /// class BufferManager { - private const int TfInfoVertexCountOffset = Constants.TotalTransformFeedbackBuffers * sizeof(int); - private const int TfInfoBufferSize = TfInfoVertexCountOffset + sizeof(int); - private readonly GpuContext _context; private readonly GpuChannel _channel; @@ -104,9 +100,6 @@ namespace Ryujinx.Graphics.Gpu.Memory private readonly BuffersPerStage[] _gpStorageBuffers; private readonly BuffersPerStage[] _gpUniformBuffers; - private BufferHandle _tfInfoBuffer; - private readonly int[] _tfInfoData; - private bool _gpStorageBuffersDirty; private bool _gpUniformBuffersDirty; @@ -146,11 +139,6 @@ namespace Ryujinx.Graphics.Gpu.Memory _bufferTextures = new List(); _ranges = new BufferAssignment[Constants.TotalGpUniformBuffers * Constants.ShaderStages]; - - if (!context.Capabilities.SupportsTransformFeedback) - { - _tfInfoData = new int[Constants.TotalTransformFeedbackBuffers]; - } } @@ -339,13 +327,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Vertex count per instance public void SetInstancedDrawVertexCount(int vertexCount) { - if (!_context.Capabilities.SupportsTransformFeedback && - HasTransformFeedbackOutputs && - _tfInfoBuffer != BufferHandle.Null) + if (!_context.Capabilities.SupportsTransformFeedback && HasTransformFeedbackOutputs) { - Span data = stackalloc byte[sizeof(int)]; - MemoryMarshal.Cast(data)[0] = vertexCount; - _context.Renderer.SetBufferData(_tfInfoBuffer, TfInfoVertexCountOffset, data); + _context.SupportBufferUpdater.SetTfeVertexCount(vertexCount); + _context.SupportBufferUpdater.Commit(); } } @@ -607,17 +592,7 @@ namespace Ryujinx.Graphics.Gpu.Memory } else if (HasTransformFeedbackOutputs) { - Span info = _tfInfoData.AsSpan(); - Span buffers = stackalloc BufferAssignment[Constants.TotalTransformFeedbackBuffers + 1]; - - bool needsDataUpdate = false; - - if (_tfInfoBuffer == BufferHandle.Null) - { - _tfInfoBuffer = _context.Renderer.CreateBuffer(TfInfoBufferSize, BufferAccess.Stream); - } - - buffers[0] = new BufferAssignment(0, new BufferRange(_tfInfoBuffer, 0, TfInfoBufferSize)); + Span buffers = stackalloc BufferAssignment[Constants.TotalTransformFeedbackBuffers]; int alignment = _context.Capabilities.StorageBufferOffsetAlignment; @@ -627,7 +602,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (tfb.Address == 0) { - buffers[1 + index] = new BufferAssignment(1 + index, BufferRange.Empty); + buffers[index] = new BufferAssignment(index, BufferRange.Empty); } else { @@ -637,22 +612,12 @@ namespace Ryujinx.Graphics.Gpu.Memory int tfeOffset = ((int)tfb.Address & (alignment - 1)) / 4; - if (info[index] != tfeOffset) - { - info[index] = tfeOffset; - needsDataUpdate = true; - } + _context.SupportBufferUpdater.SetTfeOffset(index, tfeOffset); - buffers[1 + index] = new BufferAssignment(1 + index, bufferCache.GetBufferRange(address, size, write: true)); + buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(address, size, write: true)); } } - if (needsDataUpdate) - { - Span infoData = MemoryMarshal.Cast(info); - _context.Renderer.SetBufferData(_tfInfoBuffer, 0, infoData); - } - _context.Renderer.Pipeline.SetStorageBuffers(buffers); } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferUpdater.cs new file mode 100644 index 00000000..02090c04 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferUpdater.cs @@ -0,0 +1,123 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using System; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Buffer data updater. + /// + class BufferUpdater : IDisposable + { + private BufferHandle _handle; + + /// + /// Handle of the buffer. + /// + public BufferHandle Handle => _handle; + + private readonly IRenderer _renderer; + private int _startOffset = -1; + private int _endOffset = -1; + + /// + /// Creates a new instance of the buffer updater. + /// + /// Renderer that the buffer will be used with + public BufferUpdater(IRenderer renderer) + { + _renderer = renderer; + } + + /// + /// Mark a region of the buffer as modified and needing to be sent to the GPU. + /// + /// Start offset of the region in bytes + /// Size of the region in bytes + protected void MarkDirty(int startOffset, int byteSize) + { + int endOffset = startOffset + byteSize; + + if (_startOffset == -1) + { + _startOffset = startOffset; + _endOffset = endOffset; + } + else + { + if (startOffset < _startOffset) + { + _startOffset = startOffset; + } + + if (endOffset > _endOffset) + { + _endOffset = endOffset; + } + } + } + + /// + /// Submits all pending buffer updates to the GPU. + /// + /// All data that should be sent to the GPU. Only the modified regions will be updated + /// Optional binding to bind the buffer if a new buffer was created + protected void Commit(ReadOnlySpan data, int binding = -1) + { + if (_startOffset != -1) + { + if (_handle == BufferHandle.Null) + { + _handle = _renderer.CreateBuffer(data.Length, BufferAccess.Stream); + _renderer.Pipeline.ClearBuffer(_handle, 0, data.Length, 0); + + if (binding >= 0) + { + var range = new BufferRange(_handle, 0, data.Length); + _renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, range) }); + } + }; + + _renderer.SetBufferData(_handle, _startOffset, data[_startOffset.._endOffset]); + + _startOffset = -1; + _endOffset = -1; + } + } + + /// + /// Gets a reference to a given element of a vector. + /// + /// Vector to get the element reference from + /// Element index + /// Reference to the specified element + protected static ref T GetElementRef(ref Vector4 vector, int elementIndex) + { + switch (elementIndex) + { + case 0: + return ref vector.X; + case 1: + return ref vector.Y; + case 2: + return ref vector.Z; + case 3: + return ref vector.W; + default: + throw new ArgumentOutOfRangeException(nameof(elementIndex)); + } + } + + /// + /// Destroys the buffer. + /// + public void Dispose() + { + if (_handle != BufferHandle.Null) + { + _renderer.DeleteBuffer(_handle); + _handle = BufferHandle.Null; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs b/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs index c1e91c54..fb141db4 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/SupportBufferUpdater.cs @@ -9,56 +9,21 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Support buffer data updater. /// - class SupportBufferUpdater : IDisposable + class SupportBufferUpdater : BufferUpdater { private SupportBuffer _data; - private BufferHandle _handle; - - private readonly IRenderer _renderer; - private int _startOffset = -1; - private int _endOffset = -1; /// /// Creates a new instance of the support buffer updater. /// /// Renderer that the support buffer will be used with - public SupportBufferUpdater(IRenderer renderer) + public SupportBufferUpdater(IRenderer renderer) : base(renderer) { - _renderer = renderer; - var defaultScale = new Vector4 { X = 1f, Y = 0f, Z = 0f, W = 0f }; _data.RenderScale.AsSpan().Fill(defaultScale); DirtyRenderScale(0, SupportBuffer.RenderScaleMaxCount); } - /// - /// Mark a region of the support buffer as modified and needing to be sent to the GPU. - /// - /// Start offset of the region in bytes - /// Size of the region in bytes - private void MarkDirty(int startOffset, int byteSize) - { - int endOffset = startOffset + byteSize; - - if (_startOffset == -1) - { - _startOffset = startOffset; - _endOffset = endOffset; - } - else - { - if (startOffset < _startOffset) - { - _startOffset = startOffset; - } - - if (endOffset > _endOffset) - { - _endOffset = endOffset; - } - } - } - /// /// Marks the fragment render scale count as being modified. /// @@ -220,40 +185,40 @@ namespace Ryujinx.Graphics.Gpu.Memory } /// - /// Submits all pending buffer updates to the GPU. + /// Sets offset for the misaligned portion of a transform feedback buffer, and the buffer size, for transform feedback emulation. /// - public void Commit() + /// Index of the transform feedback buffer + /// Misaligned offset of the buffer + public void SetTfeOffset(int bufferIndex, int offset) { - if (_startOffset != -1) + ref int currentOffset = ref GetElementRef(ref _data.TfeOffset, bufferIndex); + + if (currentOffset != offset) { - if (_handle == BufferHandle.Null) - { - _handle = _renderer.CreateBuffer(SupportBuffer.RequiredSize, BufferAccess.Stream); - _renderer.Pipeline.ClearBuffer(_handle, 0, SupportBuffer.RequiredSize, 0); - - var range = new BufferRange(_handle, 0, SupportBuffer.RequiredSize); - _renderer.Pipeline.SetUniformBuffers(stackalloc[] { new BufferAssignment(0, range) }); - } - - ReadOnlySpan data = MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref _data, 1)); - - _renderer.SetBufferData(_handle, _startOffset, data[_startOffset.._endOffset]); - - _startOffset = -1; - _endOffset = -1; + currentOffset = offset; + MarkDirty(SupportBuffer.TfeOffsetOffset + bufferIndex * sizeof(int), sizeof(int)); } } /// - /// Destroys the support buffer. + /// Sets the vertex count used for transform feedback emulation with instanced draws. /// - public void Dispose() + /// Vertex count of the instanced draw + public void SetTfeVertexCount(int vertexCount) { - if (_handle != BufferHandle.Null) + if (_data.TfeVertexCount.X != vertexCount) { - _renderer.DeleteBuffer(_handle); - _handle = BufferHandle.Null; + _data.TfeVertexCount.X = vertexCount; + MarkDirty(SupportBuffer.TfeVertexCountOffset, sizeof(int)); } } + + /// + /// Submits all pending buffer updates to the GPU. + /// + public void Commit() + { + Commit(MemoryMarshal.Cast(MemoryMarshal.CreateSpan(ref _data, 1)), SupportBuffer.Binding); + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs index ff9c39a1..600c8a98 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/CachedShaderProgram.cs @@ -14,6 +14,16 @@ namespace Ryujinx.Graphics.Gpu.Shader /// public IProgram HostProgram { get; } + /// + /// Optional vertex shader converted to compute. + /// + public ShaderAsCompute VertexAsCompute { get; } + + /// + /// Optional geometry shader converted to compute. + /// + public ShaderAsCompute GeometryAsCompute { get; } + /// /// GPU state used to create this version of the shader. /// @@ -45,12 +55,25 @@ namespace Ryujinx.Graphics.Gpu.Shader Bindings = new CachedShaderBindings(shaders.Length == 1, shaders); } + public CachedShaderProgram( + IProgram hostProgram, + ShaderAsCompute vertexAsCompute, + ShaderAsCompute geometryAsCompute, + ShaderSpecializationState specializationState, + CachedShaderStage[] shaders) : this(hostProgram, specializationState, shaders) + { + VertexAsCompute = vertexAsCompute; + GeometryAsCompute = geometryAsCompute; + } + /// /// Dispose of the host shader resources. /// public void Dispose() { HostProgram.Dispose(); + VertexAsCompute?.HostProgram.Dispose(); + GeometryAsCompute?.HostProgram.Dispose(); } } } diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs index 93d293f6..de6432bc 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheGpuAccessor.cs @@ -35,7 +35,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache ShaderSpecializationState oldSpecState, ShaderSpecializationState newSpecState, ResourceCounts counts, - int stageIndex) : base(context, counts, stageIndex, oldSpecState.TransformFeedbackDescriptors != null) + int stageIndex) : base(context, counts, stageIndex) { _data = data; _cb1Data = cb1Data; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 08096bd5..99ef89ed 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 5609; + private const uint CodeGenVersion = 5551; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; @@ -140,6 +140,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache /// public ShaderStage Stage; + /// + /// Number of vertices that each output primitive has on a geometry shader. + /// + public byte GeometryVerticesPerPrimitive; + + /// + /// Maximum number of vertices that a geometry shader may generate. + /// + public ushort GeometryMaxOutputVertices; + + /// + /// Number of invocations per primitive on tessellation or geometry shaders. + /// + public ushort ThreadsPerInputPrimitive; + /// /// Indicates if the fragment shader accesses the fragment coordinate built-in variable. /// @@ -783,9 +798,10 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache sBuffers, textures, images, - ShaderIdentification.None, - 0, dataInfo.Stage, + dataInfo.GeometryVerticesPerPrimitive, + dataInfo.GeometryMaxOutputVertices, + dataInfo.ThreadsPerInputPrimitive, dataInfo.UsesFragCoord, dataInfo.UsesInstanceId, dataInfo.UsesDrawParameters, @@ -813,6 +829,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache TexturesCount = (ushort)info.Textures.Count, ImagesCount = (ushort)info.Images.Count, Stage = info.Stage, + GeometryVerticesPerPrimitive = (byte)info.GeometryVerticesPerPrimitive, + GeometryMaxOutputVertices = (ushort)info.GeometryMaxOutputVertices, + ThreadsPerInputPrimitive = (ushort)info.ThreadsPerInputPrimitive, UsesFragCoord = info.UsesFragCoord, UsesInstanceId = info.UsesInstanceId, UsesDrawParameters = info.UsesDrawParameters, diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs index 8c2108bf..153fc442 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/ParallelDiskCacheLoader.cs @@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache ResourceCounts counts = new(); + DiskCacheGpuAccessor[] gpuAccessors = new DiskCacheGpuAccessor[Constants.ShaderStages]; TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; TranslatorContext nextStage = null; @@ -626,14 +627,22 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache translatorContexts[0] = DecodeGraphicsShader(gpuAccessorA, api, DefaultFlags | TranslationFlags.VertexA, 0); } + gpuAccessors[stageIndex] = gpuAccessor; translatorContexts[stageIndex + 1] = currentStage; nextStage = currentStage; } } - if (!_context.Capabilities.SupportsGeometryShader) + bool hasGeometryShader = translatorContexts[4] != null; + bool vertexHasStore = translatorContexts[1] != null && translatorContexts[1].HasStore; + bool geometryHasStore = hasGeometryShader && translatorContexts[4].HasStore; + bool vertexToCompute = ShouldConvertVertexToCompute(_context, vertexHasStore, geometryHasStore, hasGeometryShader); + + // We don't support caching shader stages that have been converted to compute currently, + // so just eliminate them if they exist in the cache. + if (vertexToCompute) { - ShaderCache.TryRemoveGeometryStage(translatorContexts); + return; } CachedShaderStage[] shaders = new CachedShaderStage[guestShaders.Length]; @@ -647,6 +656,8 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache if (currentStage != null) { + gpuAccessors[stageIndex].InitializeReservedCounts(specState.TransformFeedbackDescriptors != null, vertexToCompute); + ShaderProgram program; byte[] guestCode = guestShaders[stageIndex + 1].Value.Code; @@ -701,6 +712,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache ResourceCounts counts = new(); ShaderSpecializationState newSpecState = new(ref specState.ComputeState); DiskCacheGpuAccessor gpuAccessor = new(_context, shader.Code, shader.Cb1Data, specState, newSpecState, counts, 0); + gpuAccessor.InitializeReservedCounts(tfEnabled: false, vertexAsCompute: false); TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, 0); diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs index b4f4a439..1d84d0e4 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs @@ -25,11 +25,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU channel /// Current GPU state /// Graphics shader stage index (0 = Vertex, 4 = Fragment) - public GpuAccessor( - GpuContext context, - GpuChannel channel, - GpuAccessorState state, - int stageIndex) : base(context, state.ResourceCounts, stageIndex, state.TransformFeedbackDescriptors != null) + public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state, int stageIndex) : base(context, state.ResourceCounts, stageIndex) { _isVulkan = context.Capabilities.Api == TargetApi.Vulkan; _channel = channel; @@ -49,7 +45,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU context /// GPU channel /// Current GPU state - public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0, false) + public GpuAccessor(GpuContext context, GpuChannel channel, GpuAccessorState state) : base(context, state.ResourceCounts, 0) { _channel = channel; _state = state; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs index 52193940..9d030cd6 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/GpuAccessorBase.cs @@ -15,8 +15,10 @@ namespace Ryujinx.Graphics.Gpu.Shader private readonly ResourceCounts _resourceCounts; private readonly int _stageIndex; - private readonly int _reservedConstantBuffers; - private readonly int _reservedStorageBuffers; + private int _reservedConstantBuffers; + private int _reservedStorageBuffers; + private int _reservedTextures; + private int _reservedImages; /// /// Creates a new GPU accessor. @@ -24,15 +26,26 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU context /// Counter of GPU resources used by the shader /// Index of the shader stage, 0 for compute - /// Indicates if the current graphics shader is used with transform feedback enabled - public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex, bool tfEnabled) + public GpuAccessorBase(GpuContext context, ResourceCounts resourceCounts, int stageIndex) { _context = context; _resourceCounts = resourceCounts; _stageIndex = stageIndex; + } - _reservedConstantBuffers = 1; // For the support buffer. - _reservedStorageBuffers = !context.Capabilities.SupportsTransformFeedback && tfEnabled ? 5 : 0; + /// + /// Initializes counts for bindings that will be reserved for emulator use. + /// + /// Indicates if the current graphics shader is used with transform feedback enabled + /// Indicates that the vertex shader will be emulated on a compute shader + public void InitializeReservedCounts(bool tfEnabled, bool vertexAsCompute) + { + ResourceReservationCounts rrc = new(!_context.Capabilities.SupportsTransformFeedback && tfEnabled, vertexAsCompute); + + _reservedConstantBuffers = rrc.ReservedConstantBuffers; + _reservedStorageBuffers = rrc.ReservedStorageBuffers; + _reservedTextures = rrc.ReservedTextures; + _reservedImages = rrc.ReservedImages; } public int QueryBindingConstantBuffer(int index) @@ -69,6 +82,8 @@ namespace Ryujinx.Graphics.Gpu.Shader public int QueryBindingTexture(int index, bool isBuffer) { + int binding; + if (_context.Capabilities.Api == TargetApi.Vulkan) { if (isBuffer) @@ -76,16 +91,20 @@ namespace Ryujinx.Graphics.Gpu.Shader index += (int)_context.Capabilities.MaximumTexturesPerStage; } - return GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture"); + binding = GetBindingFromIndex(index, _context.Capabilities.MaximumTexturesPerStage * 2, "Texture"); } else { - return _resourceCounts.TexturesCount++; + binding = _resourceCounts.TexturesCount++; } + + return binding + _reservedTextures; } public int QueryBindingImage(int index, bool isBuffer) { + int binding; + if (_context.Capabilities.Api == TargetApi.Vulkan) { if (isBuffer) @@ -93,12 +112,14 @@ namespace Ryujinx.Graphics.Gpu.Shader index += (int)_context.Capabilities.MaximumImagesPerStage; } - return GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image"); + binding = GetBindingFromIndex(index, _context.Capabilities.MaximumImagesPerStage * 2, "Image"); } else { - return _resourceCounts.ImagesCount++; + binding = _resourceCounts.ImagesCount++; } + + return binding + _reservedImages; } private int GetBindingFromIndex(int index, uint maxPerStage, string resourceName) diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderAsCompute.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAsCompute.cs new file mode 100644 index 00000000..71540a13 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderAsCompute.cs @@ -0,0 +1,20 @@ +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Shader; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Gpu.Shader +{ + class ShaderAsCompute + { + public IProgram HostProgram { get; } + public ShaderProgramInfo Info { get; } + public ResourceReservations Reservations { get; } + + public ShaderAsCompute(IProgram hostProgram, ShaderProgramInfo info, ResourceReservations reservations) + { + HostProgram = hostProgram; + Info = info; + Reservations = reservations; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index 97d7a720..38be262a 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -215,9 +215,10 @@ namespace Ryujinx.Graphics.Gpu.Shader ShaderSpecializationState specState = new(ref computeState); GpuAccessorState gpuAccessorState = new(poolState, computeState, default, specState); GpuAccessor gpuAccessor = new(_context, channel, gpuAccessorState); + gpuAccessor.InitializeReservedCounts(tfEnabled: false, vertexAsCompute: false); TranslatorContext translatorContext = DecodeComputeShader(gpuAccessor, _context.Capabilities.Api, gpuVa); - TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode); + TranslatedShader translatedShader = TranslateShader(_dumper, channel, translatorContext, cachedGuestCode, asCompute: false); ShaderSource[] shaderSourcesArray = new ShaderSource[] { CreateShaderSource(translatedShader.Program) }; ShaderInfo info = ShaderInfoBuilder.BuildForCompute(_context, translatedShader.Program.Info); @@ -321,6 +322,7 @@ namespace Ryujinx.Graphics.Gpu.Shader ReadOnlySpan addressesSpan = addresses.AsSpan(); + GpuAccessor[] gpuAccessors = new GpuAccessor[Constants.ShaderStages]; TranslatorContext[] translatorContexts = new TranslatorContext[Constants.ShaderStages + 1]; TranslatorContext nextStage = null; @@ -345,22 +347,31 @@ namespace Ryujinx.Graphics.Gpu.Shader translatorContexts[0] = DecodeGraphicsShader(gpuAccessor, api, DefaultFlags | TranslationFlags.VertexA, addresses.VertexA); } + gpuAccessors[stageIndex] = gpuAccessor; translatorContexts[stageIndex + 1] = currentStage; nextStage = currentStage; } } - if (!_context.Capabilities.SupportsGeometryShader) - { - TryRemoveGeometryStage(translatorContexts); - } + bool hasGeometryShader = translatorContexts[4] != null; + bool vertexHasStore = translatorContexts[1] != null && translatorContexts[1].HasStore; + bool geometryHasStore = hasGeometryShader && translatorContexts[4].HasStore; + bool vertexToCompute = ShouldConvertVertexToCompute(_context, vertexHasStore, geometryHasStore, hasGeometryShader); + bool geometryToCompute = ShouldConvertGeometryToCompute(_context, geometryHasStore); CachedShaderStage[] shaders = new CachedShaderStage[Constants.ShaderStages + 1]; List shaderSources = new(); TranslatorContext previousStage = null; + ShaderInfoBuilder infoBuilder = new(_context, transformFeedbackDescriptors != null, vertexToCompute); - ShaderInfoBuilder infoBuilder = new(_context, transformFeedbackDescriptors != null); + if (geometryToCompute && translatorContexts[4] != null) + { + translatorContexts[4].SetVertexOutputMapForGeometryAsCompute(translatorContexts[1]); + } + + ShaderAsCompute vertexAsCompute = null; + ShaderAsCompute geometryAsCompute = null; for (int stageIndex = 0; stageIndex < Constants.ShaderStages; stageIndex++) { @@ -368,8 +379,12 @@ namespace Ryujinx.Graphics.Gpu.Shader if (currentStage != null) { + gpuAccessors[stageIndex].InitializeReservedCounts(transformFeedbackDescriptors != null, vertexToCompute); + ShaderProgram program; + bool asCompute = (stageIndex == 0 && vertexToCompute) || (stageIndex == 3 && geometryToCompute); + if (stageIndex == 0 && translatorContexts[0] != null) { TranslatedShaderVertexPair translatedShader = TranslateShader( @@ -378,7 +393,8 @@ namespace Ryujinx.Graphics.Gpu.Shader currentStage, translatorContexts[0], cachedGuestCode.VertexACode, - cachedGuestCode.VertexBCode); + cachedGuestCode.VertexBCode, + asCompute); shaders[0] = translatedShader.VertexA; shaders[1] = translatedShader.VertexB; @@ -388,12 +404,31 @@ namespace Ryujinx.Graphics.Gpu.Shader { byte[] code = cachedGuestCode.GetByIndex(stageIndex); - TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code); + TranslatedShader translatedShader = TranslateShader(_dumper, channel, currentStage, code, asCompute); shaders[stageIndex + 1] = translatedShader.Shader; program = translatedShader.Program; } + if (asCompute) + { + bool tfEnabled = transformFeedbackDescriptors != null; + + if (stageIndex == 0) + { + vertexAsCompute = CreateHostVertexAsComputeProgram(program, currentStage, tfEnabled); + + TranslatorContext lastInVertexPipeline = geometryToCompute ? translatorContexts[4] ?? currentStage : currentStage; + + program = lastInVertexPipeline.GenerateVertexPassthroughForCompute(); + } + else + { + geometryAsCompute = CreateHostVertexAsComputeProgram(program, currentStage, tfEnabled); + program = null; + } + } + if (program != null) { shaderSources.Add(CreateShaderSource(program)); @@ -418,46 +453,81 @@ namespace Ryujinx.Graphics.Gpu.Shader IProgram hostProgram = _context.Renderer.CreateProgram(shaderSourcesArray, info); - gpShaders = new CachedShaderProgram(hostProgram, specState, shaders); + gpShaders = new(hostProgram, vertexAsCompute, geometryAsCompute, specState, shaders); _graphicsShaderCache.Add(gpShaders); - EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray); + + // We don't currently support caching shaders that have been converted to compute. + if (vertexAsCompute == null) + { + EnqueueProgramToSave(gpShaders, hostProgram, shaderSourcesArray); + } + _gpPrograms[addresses] = gpShaders; return gpShaders; } /// - /// Tries to eliminate the geometry stage from the array of translator contexts. + /// Checks if a vertex shader should be converted to a compute shader due to it making use of + /// features that are not supported on the host. /// - /// Array of translator contexts - public static void TryRemoveGeometryStage(TranslatorContext[] translatorContexts) + /// GPU context of the shader + /// Whether the vertex shader has image or storage buffer store operations + /// Whether the geometry shader has image or storage buffer store operations, if one exists + /// Whether a geometry shader exists + /// True if the vertex shader should be converted to compute, false otherwise + public static bool ShouldConvertVertexToCompute(GpuContext context, bool vertexHasStore, bool geometryHasStore, bool hasGeometryShader) { - if (translatorContexts[4] != null) + // If the host does not support store operations on vertex, + // we need to emulate it on a compute shader. + if (!context.Capabilities.SupportsVertexStoreAndAtomics && vertexHasStore) { - // We have a geometry shader, but geometry shaders are not supported. - // Try to eliminate the geometry shader. - - ShaderProgramInfo info = translatorContexts[4].Translate().Info; - - if (info.Identification == ShaderIdentification.GeometryLayerPassthrough) - { - // We managed to identify that this geometry shader is only used to set the output Layer value, - // we can set the Layer on the previous stage instead (usually the vertex stage) and eliminate it. - - for (int i = 3; i >= 1; i--) - { - if (translatorContexts[i] != null) - { - translatorContexts[i].SetGeometryShaderLayerInputAttribute(info.GpLayerInputAttribute); - translatorContexts[i].SetLastInVertexPipeline(); - break; - } - } - - translatorContexts[4] = null; - } + return true; } + + // If any stage after the vertex stage is converted to compute, + // we need to convert vertex to compute too. + return hasGeometryShader && ShouldConvertGeometryToCompute(context, geometryHasStore); + } + + /// + /// Checks if a geometry shader should be converted to a compute shader due to it making use of + /// features that are not supported on the host. + /// + /// GPU context of the shader + /// Whether the geometry shader has image or storage buffer store operations, if one exists + /// True if the geometry shader should be converted to compute, false otherwise + public static bool ShouldConvertGeometryToCompute(GpuContext context, bool geometryHasStore) + { + return (!context.Capabilities.SupportsVertexStoreAndAtomics && geometryHasStore) || + !context.Capabilities.SupportsGeometryShader; + } + + /// + /// Checks if it might be necessary for any vertex, tessellation or geometry shader to be converted to compute, + /// based on the supported host features. + /// + /// Host capabilities + /// True if the possibility of a shader being converted to compute exists, false otherwise + public static bool MayConvertVtgToCompute(ref Capabilities capabilities) + { + return !capabilities.SupportsVertexStoreAndAtomics || !capabilities.SupportsGeometryShader; + } + + /// + /// Creates a compute shader from a vertex, tessellation or geometry shader that has been converted to compute. + /// + /// Shader program + /// Translation context of the shader + /// Whether transform feedback is enabled + /// Compute shader + private ShaderAsCompute CreateHostVertexAsComputeProgram(ShaderProgram program, TranslatorContext context, bool tfEnabled) + { + ShaderSource source = new(program.Code, program.BinaryCode, ShaderStage.Compute, program.Language); + ShaderInfo info = ShaderInfoBuilder.BuildForVertexAsCompute(_context, program.Info, tfEnabled); + + return new(_context.Renderer.CreateProgram(new[] { source }, info), program.Info, context.GetResourceReservations()); } /// @@ -573,9 +643,16 @@ namespace Ryujinx.Graphics.Gpu.Shader } } + bool vertexAsCompute = gpShaders.VertexAsCompute != null; bool usesDrawParameters = gpShaders.Shaders[1]?.Info.UsesDrawParameters ?? false; - return gpShaders.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true); + return gpShaders.SpecializationState.MatchesGraphics( + channel, + ref poolState, + ref graphicsState, + vertexAsCompute, + usesDrawParameters, + checkTextures: true); } /// @@ -636,6 +713,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Optional translator context of the shader that should be combined /// Optional Maxwell binary code of the Vertex A shader, if present /// Optional Maxwell binary code of the Vertex B or current stage shader, if present on cache + /// Indicates that the vertex shader should be converted to a compute shader /// Compiled graphics shader code private static TranslatedShaderVertexPair TranslateShader( ShaderDumper dumper, @@ -643,7 +721,8 @@ namespace Ryujinx.Graphics.Gpu.Shader TranslatorContext currentStage, TranslatorContext vertexA, byte[] codeA, - byte[] codeB) + byte[] codeB, + bool asCompute) { ulong cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(0, 1); @@ -663,7 +742,7 @@ namespace Ryujinx.Graphics.Gpu.Shader pathsB = dumper.Dump(codeB, compute: false); } - ShaderProgram program = currentStage.Translate(vertexA); + ShaderProgram program = currentStage.Translate(vertexA, asCompute); pathsB.Prepend(program); pathsA.Prepend(program); @@ -681,8 +760,9 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU channel using the shader /// Translator context of the stage to be translated /// Optional Maxwell binary code of the current stage shader, if present on cache + /// Indicates that the vertex shader should be converted to a compute shader /// Compiled graphics shader code - private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code) + private static TranslatedShader TranslateShader(ShaderDumper dumper, GpuChannel channel, TranslatorContext context, byte[] code, bool asCompute) { var memoryManager = channel.MemoryManager; @@ -694,7 +774,7 @@ namespace Ryujinx.Graphics.Gpu.Shader code ??= memoryManager.GetSpan(context.Address, context.Size).ToArray(); ShaderDumpPaths paths = dumper?.Dump(code, context.Stage == ShaderStage.Compute) ?? default; - ShaderProgram program = context.Translate(); + ShaderProgram program = context.Translate(asCompute); paths.Prepend(program); diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs index af1e1ee3..bea916a6 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs @@ -33,6 +33,8 @@ namespace Ryujinx.Graphics.Gpu.Shader private readonly int _reservedConstantBuffers; private readonly int _reservedStorageBuffers; + private readonly int _reservedTextures; + private readonly int _reservedImages; private readonly List[] _resourceDescriptors; private readonly List[] _resourceUsages; @@ -42,7 +44,8 @@ namespace Ryujinx.Graphics.Gpu.Shader /// /// GPU context that owns the shaders that will be added to the builder /// Indicates if the graphics shader is used with transform feedback enabled - public ShaderInfoBuilder(GpuContext context, bool tfEnabled) + /// Indicates that the vertex shader will be emulated on a compute shader + public ShaderInfoBuilder(GpuContext context, bool tfEnabled, bool vertexAsCompute = false) { _context = context; @@ -60,27 +63,34 @@ namespace Ryujinx.Graphics.Gpu.Shader AddDescriptor(SupportBufferStages, ResourceType.UniformBuffer, UniformSetIndex, 0, 1); AddUsage(SupportBufferStages, ResourceType.UniformBuffer, ResourceAccess.Read, UniformSetIndex, 0, 1); - _reservedConstantBuffers = 1; // For the support buffer. + ResourceReservationCounts rrc = new(!context.Capabilities.SupportsTransformFeedback && tfEnabled, vertexAsCompute); - if (!context.Capabilities.SupportsTransformFeedback && tfEnabled) - { - _reservedStorageBuffers = 5; + _reservedConstantBuffers = rrc.ReservedConstantBuffers; + _reservedStorageBuffers = rrc.ReservedStorageBuffers; + _reservedTextures = rrc.ReservedTextures; + _reservedImages = rrc.ReservedImages; - AddDescriptor(VtgStages, ResourceType.StorageBuffer, StorageSetIndex, 0, 5); - AddUsage(VtgStages, ResourceType.StorageBuffer, ResourceAccess.Read, StorageSetIndex, 0, 1); - AddUsage(VtgStages, ResourceType.StorageBuffer, ResourceAccess.Write, StorageSetIndex, 1, 4); - } - else - { - _reservedStorageBuffers = 0; - } + // TODO: Handle that better? Maybe we should only set the binding that are really needed on each shader. + ResourceStages stages = vertexAsCompute ? ResourceStages.Compute | ResourceStages.Vertex : VtgStages; + + PopulateDescriptorAndUsages(stages, ResourceType.UniformBuffer, ResourceAccess.Read, UniformSetIndex, 1, rrc.ReservedConstantBuffers - 1); + PopulateDescriptorAndUsages(stages, ResourceType.StorageBuffer, ResourceAccess.ReadWrite, StorageSetIndex, 0, rrc.ReservedStorageBuffers); + PopulateDescriptorAndUsages(stages, ResourceType.BufferTexture, ResourceAccess.Read, TextureSetIndex, 0, rrc.ReservedTextures); + PopulateDescriptorAndUsages(stages, ResourceType.BufferImage, ResourceAccess.ReadWrite, ImageSetIndex, 0, rrc.ReservedImages); + } + + private void PopulateDescriptorAndUsages(ResourceStages stages, ResourceType type, ResourceAccess access, int setIndex, int start, int count) + { + AddDescriptor(stages, type, setIndex, start, count); + AddUsage(stages, type, access, setIndex, start, count); } /// /// Adds information from a given shader stage. /// /// Shader stage information - public void AddStageInfo(ShaderProgramInfo info) + /// True if the shader stage has been converted into a compute shader + public void AddStageInfo(ShaderProgramInfo info, bool vertexAsCompute = false) { if (info.Stage == ShaderStage.Fragment) { @@ -96,7 +106,7 @@ namespace Ryujinx.Graphics.Gpu.Shader _ => 0, }); - ResourceStages stages = info.Stage switch + ResourceStages stages = vertexAsCompute ? ResourceStages.Compute : info.Stage switch { ShaderStage.Compute => ResourceStages.Compute, ShaderStage.Vertex => ResourceStages.Vertex, @@ -114,8 +124,8 @@ namespace Ryujinx.Graphics.Gpu.Shader int uniformBinding = _reservedConstantBuffers + stageIndex * uniformsPerStage; int storageBinding = _reservedStorageBuffers + stageIndex * storagesPerStage; - int textureBinding = stageIndex * texturesPerStage * 2; - int imageBinding = stageIndex * imagesPerStage * 2; + int textureBinding = _reservedTextures + stageIndex * texturesPerStage * 2; + int imageBinding = _reservedImages + stageIndex * imagesPerStage * 2; AddDescriptor(stages, ResourceType.UniformBuffer, UniformSetIndex, uniformBinding, uniformsPerStage); AddDescriptor(stages, ResourceType.StorageBuffer, StorageSetIndex, storageBinding, storagesPerStage); @@ -285,11 +295,28 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Shader information public static ShaderInfo BuildForCompute(GpuContext context, ShaderProgramInfo info, bool fromCache = false) { - ShaderInfoBuilder builder = new(context, tfEnabled: false); + ShaderInfoBuilder builder = new(context, tfEnabled: false, vertexAsCompute: false); builder.AddStageInfo(info); return builder.Build(null, fromCache); } + + /// + /// Builds shader information for a vertex or geometry shader thas was converted to compute shader. + /// + /// GPU context that owns the shader + /// Compute shader information + /// Indicates if the graphics shader is used with transform feedback enabled + /// True if the compute shader comes from a disk cache, false otherwise + /// Shader information + public static ShaderInfo BuildForVertexAsCompute(GpuContext context, ShaderProgramInfo info, bool tfEnabled, bool fromCache = false) + { + ShaderInfoBuilder builder = new(context, tfEnabled, vertexAsCompute: true); + + builder.AddStageInfo(info, vertexAsCompute: true); + + return builder.Build(null, fromCache); + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs index e57e1df1..3c2f0b9b 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationList.cs @@ -35,9 +35,16 @@ namespace Ryujinx.Graphics.Gpu.Shader { foreach (var entry in _entries) { + bool vertexAsCompute = entry.VertexAsCompute != null; bool usesDrawParameters = entry.Shaders[1]?.Info.UsesDrawParameters ?? false; - if (entry.SpecializationState.MatchesGraphics(channel, ref poolState, ref graphicsState, usesDrawParameters, true)) + if (entry.SpecializationState.MatchesGraphics( + channel, + ref poolState, + ref graphicsState, + vertexAsCompute, + usesDrawParameters, + checkTextures: true)) { program = entry; return true; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs index fcd95375..a41f761b 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderSpecializationState.cs @@ -457,6 +457,7 @@ namespace Ryujinx.Graphics.Gpu.Shader /// GPU channel /// Texture pool state /// Graphics state + /// Indicates that the vertex shader has been converted into a compute shader /// Indicates whether the vertex shader accesses draw parameters /// Indicates whether texture descriptors should be checked /// True if the state matches, false otherwise @@ -464,6 +465,7 @@ namespace Ryujinx.Graphics.Gpu.Shader GpuChannel channel, ref GpuChannelPoolState poolState, ref GpuChannelGraphicsState graphicsState, + bool vertexAsCompute, bool usesDrawParameters, bool checkTextures) { @@ -497,9 +499,25 @@ namespace Ryujinx.Graphics.Gpu.Shader return false; } - if (!graphicsState.AttributeTypes.AsSpan().SequenceEqual(GraphicsState.AttributeTypes.AsSpan())) + if (ShaderCache.MayConvertVtgToCompute(ref channel.Capabilities) && !vertexAsCompute) { - return false; + for (int index = 0; index < graphicsState.AttributeTypes.Length; index++) + { + AttributeType lType = FilterAttributeType(channel, graphicsState.AttributeTypes[index]); + AttributeType rType = FilterAttributeType(channel, GraphicsState.AttributeTypes[index]); + + if (lType != rType) + { + return false; + } + } + } + else + { + if (!graphicsState.AttributeTypes.AsSpan().SequenceEqual(GraphicsState.AttributeTypes.AsSpan())) + { + return false; + } } if (usesDrawParameters && graphicsState.HasConstantBufferDrawParameters != GraphicsState.HasConstantBufferDrawParameters) @@ -530,6 +548,19 @@ namespace Ryujinx.Graphics.Gpu.Shader return Matches(channel, ref poolState, checkTextures, isCompute: false); } + private static AttributeType FilterAttributeType(GpuChannel channel, AttributeType type) + { + type &= ~(AttributeType.Packed | AttributeType.PackedRgb10A2Signed); + + if (channel.Capabilities.SupportsScaledVertexFormats && + (type == AttributeType.Sscaled || type == AttributeType.Uscaled)) + { + type = AttributeType.Float; + } + + return type; + } + /// /// Checks if the recorded state matches the current GPU compute engine state. /// diff --git a/src/Ryujinx.Graphics.OpenGL/HwCapabilities.cs b/src/Ryujinx.Graphics.OpenGL/HwCapabilities.cs index 617b129a..cf0b0645 100644 --- a/src/Ryujinx.Graphics.OpenGL/HwCapabilities.cs +++ b/src/Ryujinx.Graphics.OpenGL/HwCapabilities.cs @@ -29,6 +29,7 @@ namespace Ryujinx.Graphics.OpenGL private static readonly Lazy _maximumComputeSharedMemorySize = new(() => GetLimit(All.MaxComputeSharedMemorySize)); private static readonly Lazy _storageBufferOffsetAlignment = new(() => GetLimit(All.ShaderStorageBufferOffsetAlignment)); + private static readonly Lazy _textureBufferOffsetAlignment = new(() => GetLimit(All.TextureBufferOffsetAlignment)); public enum GpuVendor { @@ -78,6 +79,7 @@ namespace Ryujinx.Graphics.OpenGL public static int MaximumComputeSharedMemorySize => _maximumComputeSharedMemorySize.Value; public static int StorageBufferOffsetAlignment => _storageBufferOffsetAlignment.Value; + public static int TextureBufferOffsetAlignment => _textureBufferOffsetAlignment.Value; public static float MaximumSupportedAnisotropy => _maxSupportedAnisotropy.Value; diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 35d1569f..3eba15e3 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -164,6 +164,7 @@ namespace Ryujinx.Graphics.OpenGL supportsShaderBarrierDivergence: !(intelWindows || intelUnix), supportsShaderFloat64: true, supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod, + supportsVertexStoreAndAtomics: true, supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray, supportsViewportMask: HwCapabilities.SupportsViewportArray2, supportsViewportSwizzle: HwCapabilities.SupportsViewportSwizzle, @@ -177,6 +178,7 @@ namespace Ryujinx.Graphics.OpenGL maximumSupportedAnisotropy: HwCapabilities.MaximumSupportedAnisotropy, shaderSubgroupSize: Constants.MaxSubgroupSize, storageBufferOffsetAlignment: HwCapabilities.StorageBufferOffsetAlignment, + textureBufferOffsetAlignment: HwCapabilities.TextureBufferOffsetAlignment, gatherBiasPrecision: intelWindows || amdWindows ? 8 : 0); // Precision is 8 for these vendors on Vulkan. } diff --git a/src/Ryujinx.Graphics.Shader/AttributeType.cs b/src/Ryujinx.Graphics.Shader/AttributeType.cs index 1d950773..d2d146ec 100644 --- a/src/Ryujinx.Graphics.Shader/AttributeType.cs +++ b/src/Ryujinx.Graphics.Shader/AttributeType.cs @@ -11,13 +11,17 @@ namespace Ryujinx.Graphics.Shader Uint, Sscaled, Uscaled, + + Packed = 1 << 6, + PackedRgb10A2Signed = 1 << 7, + AnyPacked = Packed | PackedRgb10A2Signed, } static class AttributeTypeExtensions { public static AggregateType ToAggregateType(this AttributeType type) { - return type switch + return (type & ~AttributeType.AnyPacked) switch { AttributeType.Float => AggregateType.FP32, AttributeType.Sint => AggregateType.S32, @@ -28,7 +32,7 @@ namespace Ryujinx.Graphics.Shader public static AggregateType ToAggregateType(this AttributeType type, bool supportsScaledFormats) { - return type switch + return (type & ~AttributeType.AnyPacked) switch { AttributeType.Float => AggregateType.FP32, AttributeType.Sint => AggregateType.S32, diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index 607ff431..500de71f 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -100,10 +100,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl else { string outPrimitive = context.Definitions.OutputTopology.ToGlslString(); - - int maxOutputVertices = context.Definitions.GpPassthrough - ? context.Definitions.InputTopology.ToInputVertices() - : context.Definitions.MaxOutputVertices; + int maxOutputVertices = context.Definitions.MaxOutputVertices; context.AppendLine($"layout ({outPrimitive}, max_vertices = {maxOutputVertices}) out;"); } @@ -320,15 +317,22 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { string typeName = GetVarTypeName(context, memory.Type & ~AggregateType.Array); - if (memory.ArrayLength > 0) + if (memory.Type.HasFlag(AggregateType.Array)) { - string arraySize = memory.ArrayLength.ToString(CultureInfo.InvariantCulture); + if (memory.ArrayLength > 0) + { + string arraySize = memory.ArrayLength.ToString(CultureInfo.InvariantCulture); - context.AppendLine($"{prefix}{typeName} {memory.Name}[{arraySize}];"); + context.AppendLine($"{prefix}{typeName} {memory.Name}[{arraySize}];"); + } + else + { + context.AppendLine($"{prefix}{typeName} {memory.Name}[];"); + } } else { - context.AppendLine($"{prefix}{typeName} {memory.Name}[];"); + context.AppendLine($"{prefix}{typeName} {memory.Name};"); } } } diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs index b5f453ae..caa6ef64 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs @@ -31,6 +31,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions IoVariable.FrontColorDiffuse => ("gl_FrontColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. IoVariable.FrontColorSpecular => ("gl_FrontSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. IoVariable.FrontFacing => ("gl_FrontFacing", AggregateType.Bool), + IoVariable.GlobalId => ("gl_GlobalInvocationID", AggregateType.Vector3 | AggregateType.U32), IoVariable.InstanceId => ("gl_InstanceID", AggregateType.S32), IoVariable.InstanceIndex => ("gl_InstanceIndex", AggregateType.S32), IoVariable.InvocationId => ("gl_InvocationID", AggregateType.S32), diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs index d385782a..9f9411a9 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -27,8 +27,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public ILogger Logger { get; } public TargetApi TargetApi { get; } - public int InputVertices { get; } - public Dictionary ConstantBuffers { get; } = new(); public Dictionary StorageBuffers { get; } = new(); @@ -101,19 +99,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Logger = parameters.Logger; TargetApi = parameters.TargetApi; - if (parameters.Definitions.Stage == ShaderStage.Geometry) - { - InputVertices = parameters.Definitions.InputTopology switch - { - InputTopology.Points => 1, - InputTopology.Lines => 2, - InputTopology.LinesAdjacency => 2, - InputTopology.Triangles => 3, - InputTopology.TrianglesAdjacency => 3, - _ => throw new InvalidOperationException($"Invalid input topology \"{parameters.Definitions.InputTopology}\"."), - }; - } - AddCapability(Capability.Shader); AddCapability(Capability.Float64); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs index b0659ba4..54767c2f 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -369,7 +369,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv if (context.Definitions.Stage != ShaderStage.Vertex) { var perVertexInputStructType = CreatePerVertexStructType(context); - int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.InputVertices : 32; + int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.Definitions.InputTopology.ToInputVertices() : 32; var perVertexInputArrayType = context.TypeArray(perVertexInputStructType, context.Constant(context.TypeU32(), arraySize)); var perVertexInputPointerType = context.TypePointer(StorageClass.Input, perVertexInputArrayType); var perVertexInputVariable = context.Variable(perVertexInputPointerType, StorageClass.Input); @@ -506,7 +506,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv if (!isPerPatch && IoMap.IsPerVertex(ioVariable, context.Definitions.Stage, isOutput)) { - int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.InputVertices : 32; + int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.Definitions.InputTopology.ToInputVertices() : 32; spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), arraySize)); if (context.Definitions.GpPassthrough && context.HostCapabilities.SupportsGeometryShaderPassthrough) diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs index 08d403e2..7b4e14ff 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs @@ -22,6 +22,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv IoVariable.FragmentCoord => (BuiltIn.FragCoord, AggregateType.Vector4 | AggregateType.FP32), IoVariable.FragmentOutputDepth => (BuiltIn.FragDepth, AggregateType.FP32), IoVariable.FrontFacing => (BuiltIn.FrontFacing, AggregateType.Bool), + IoVariable.GlobalId => (BuiltIn.GlobalInvocationId, AggregateType.Vector3 | AggregateType.U32), IoVariable.InstanceId => (BuiltIn.InstanceId, AggregateType.S32), IoVariable.InstanceIndex => (BuiltIn.InstanceIndex, AggregateType.S32), IoVariable.InvocationId => (BuiltIn.InvocationId, AggregateType.S32), diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs index 70f1dd3c..44d3e985 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -239,9 +239,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv _ => throw new InvalidOperationException($"Invalid output topology \"{context.Definitions.OutputTopology}\"."), }); - int maxOutputVertices = context.Definitions.GpPassthrough ? context.InputVertices : context.Definitions.MaxOutputVertices; - - context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)maxOutputVertices); + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)context.Definitions.MaxOutputVertices); } else if (context.Definitions.Stage == ShaderStage.Fragment) { @@ -279,6 +277,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv localSizeZ); } + if (context.Definitions.Stage != ShaderStage.Fragment && + context.Definitions.Stage != ShaderStage.Geometry && + context.Definitions.Stage != ShaderStage.Compute && + context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.Layer))) + { + context.AddCapability(Capability.ShaderLayer); + } + if (context.Definitions.TransformFeedbackEnabled && context.Definitions.LastInVertexPipeline) { context.AddExecutionMode(spvFunc, ExecutionMode.Xfb); diff --git a/src/Ryujinx.Graphics.Shader/Constants.cs b/src/Ryujinx.Graphics.Shader/Constants.cs index cff2c37a..6317369f 100644 --- a/src/Ryujinx.Graphics.Shader/Constants.cs +++ b/src/Ryujinx.Graphics.Shader/Constants.cs @@ -10,11 +10,5 @@ namespace Ryujinx.Graphics.Shader public const int NvnBaseVertexByteOffset = 0x640; public const int NvnBaseInstanceByteOffset = 0x644; public const int NvnDrawIndexByteOffset = 0x648; - - // Transform Feedback emulation. - - public const int TfeInfoBinding = 0; - public const int TfeBufferBaseBinding = 1; - public const int TfeBuffersCount = 4; } } diff --git a/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs index 67304d02..fdf3eacc 100644 --- a/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs +++ b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs @@ -60,6 +60,11 @@ namespace Ryujinx.Graphics.Shader.Decoders _functionsWithId.Add(function); } + public IoUsage GetIoUsage() + { + return new IoUsage(UsedFeatures, ClipDistancesWritten, AttributeUsage.UsedOutputAttributes); + } + public IEnumerator GetEnumerator() { return _functions.Values.GetEnumerator(); diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index 4266dedc..1211e561 100644 --- a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -297,6 +297,9 @@ namespace Ryujinx.Graphics.Shader.Decoders case InstName.Ssy: block.AddPushOp(op); break; + case InstName.Shfl: + context.SetUsedFeature(FeatureFlags.Shuffle); + break; case InstName.Ldl: case InstName.Stl: context.SetUsedFeature(FeatureFlags.LocalMemory); @@ -307,8 +310,22 @@ namespace Ryujinx.Graphics.Shader.Decoders case InstName.Sts: context.SetUsedFeature(FeatureFlags.SharedMemory); break; - case InstName.Shfl: - context.SetUsedFeature(FeatureFlags.Shuffle); + case InstName.Atom: + case InstName.AtomCas: + case InstName.Red: + case InstName.Stg: + case InstName.Suatom: + case InstName.SuatomB: + case InstName.SuatomB2: + case InstName.SuatomCas: + case InstName.SuatomCasB: + case InstName.Sured: + case InstName.SuredB: + case InstName.Sust: + case InstName.SustB: + case InstName.SustD: + case InstName.SustDB: + context.SetUsedFeature(FeatureFlags.Store); break; } @@ -424,6 +441,12 @@ namespace Ryujinx.Graphics.Shader.Decoders context.SetUsedFeature(FeatureFlags.RtLayer); } break; + case AttributeConsts.ViewportIndex: + if (definitions.Stage != ShaderStage.Fragment) + { + context.SetUsedFeature(FeatureFlags.ViewportIndex); + } + break; case AttributeConsts.ClipDistance0: case AttributeConsts.ClipDistance1: case AttributeConsts.ClipDistance2: @@ -432,11 +455,17 @@ namespace Ryujinx.Graphics.Shader.Decoders case AttributeConsts.ClipDistance5: case AttributeConsts.ClipDistance6: case AttributeConsts.ClipDistance7: - if (definitions.Stage == ShaderStage.Vertex) + if (definitions.Stage.IsVtg()) { context.SetClipDistanceWritten((attr - AttributeConsts.ClipDistance0) / 4); } break; + case AttributeConsts.ViewportMask: + if (definitions.Stage != ShaderStage.Fragment) + { + context.SetUsedFeature(FeatureFlags.ViewportMask); + } + break; } } else diff --git a/src/Ryujinx.Graphics.Shader/InputTopology.cs b/src/Ryujinx.Graphics.Shader/InputTopology.cs index ebd2930e..9438263d 100644 --- a/src/Ryujinx.Graphics.Shader/InputTopology.cs +++ b/src/Ryujinx.Graphics.Shader/InputTopology.cs @@ -25,6 +25,19 @@ namespace Ryujinx.Graphics.Shader } public static int ToInputVertices(this InputTopology topology) + { + return topology switch + { + InputTopology.Points => 1, + InputTopology.Lines => 2, + InputTopology.LinesAdjacency => 4, + InputTopology.Triangles => 3, + InputTopology.TrianglesAdjacency => 6, + _ => 1, + }; + } + + public static int ToInputVerticesNoAdjacency(this InputTopology topology) { return topology switch { diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs index 53d774d6..63ce38e2 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -63,7 +63,7 @@ namespace Ryujinx.Graphics.Shader.Instructions { value = AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P); - if (!context.TranslatorContext.Definitions.SupportsScaledVertexFormats && + if ((!context.TranslatorContext.Definitions.SupportsScaledVertexFormats || context.VertexAsCompute) && context.TranslatorContext.Stage == ShaderStage.Vertex && !op.O && offset >= 0x80 && diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs index fdee8345..21e20863 100644 --- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs @@ -18,6 +18,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation FrontColorDiffuse, FrontColorSpecular, FrontFacing, + GlobalId, InstanceId, InstanceIndex, InvocationId, diff --git a/src/Ryujinx.Graphics.Shader/ResourceReservationCounts.cs b/src/Ryujinx.Graphics.Shader/ResourceReservationCounts.cs new file mode 100644 index 00000000..c0bae8ea --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/ResourceReservationCounts.cs @@ -0,0 +1,22 @@ +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader +{ + public readonly struct ResourceReservationCounts + { + public readonly int ReservedConstantBuffers { get; } + public readonly int ReservedStorageBuffers { get; } + public readonly int ReservedTextures { get; } + public readonly int ReservedImages { get; } + + public ResourceReservationCounts(bool isTransformFeedbackEmulated, bool vertexAsCompute) + { + ResourceReservations reservations = new(isTransformFeedbackEmulated, vertexAsCompute); + + ReservedConstantBuffers = reservations.ReservedConstantBuffers; + ReservedStorageBuffers = reservations.ReservedStorageBuffers; + ReservedTextures = reservations.ReservedTextures; + ReservedImages = reservations.ReservedImages; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs b/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs deleted file mode 100644 index 551e318c..00000000 --- a/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Ryujinx.Graphics.Shader -{ - public enum ShaderIdentification - { - None, - GeometryLayerPassthrough, - } -} diff --git a/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs index f9776afc..22823ac3 100644 --- a/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs +++ b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs @@ -10,9 +10,10 @@ namespace Ryujinx.Graphics.Shader public ReadOnlyCollection Textures { get; } public ReadOnlyCollection Images { get; } - public ShaderIdentification Identification { get; } - public int GpLayerInputAttribute { get; } public ShaderStage Stage { get; } + public int GeometryVerticesPerPrimitive { get; } + public int GeometryMaxOutputVertices { get; } + public int ThreadsPerInputPrimitive { get; } public bool UsesFragCoord { get; } public bool UsesInstanceId { get; } public bool UsesDrawParameters { get; } @@ -25,9 +26,10 @@ namespace Ryujinx.Graphics.Shader BufferDescriptor[] sBuffers, TextureDescriptor[] textures, TextureDescriptor[] images, - ShaderIdentification identification, - int gpLayerInputAttribute, ShaderStage stage, + int geometryVerticesPerPrimitive, + int geometryMaxOutputVertices, + int threadsPerInputPrimitive, bool usesFragCoord, bool usesInstanceId, bool usesDrawParameters, @@ -40,9 +42,10 @@ namespace Ryujinx.Graphics.Shader Textures = Array.AsReadOnly(textures); Images = Array.AsReadOnly(images); - Identification = identification; - GpLayerInputAttribute = gpLayerInputAttribute; Stage = stage; + GeometryVerticesPerPrimitive = geometryVerticesPerPrimitive; + GeometryMaxOutputVertices = geometryMaxOutputVertices; + ThreadsPerInputPrimitive = threadsPerInputPrimitive; UsesFragCoord = usesFragCoord; UsesInstanceId = usesInstanceId; UsesDrawParameters = usesDrawParameters; diff --git a/src/Ryujinx.Graphics.Shader/SupportBuffer.cs b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs index 0b7a2edd..d4d3cbf8 100644 --- a/src/Ryujinx.Graphics.Shader/SupportBuffer.cs +++ b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs @@ -22,11 +22,13 @@ namespace Ryujinx.Graphics.Shader ViewportSize, FragmentRenderScaleCount, RenderScale, + TfeOffset, + TfeVertexCount, } public struct SupportBuffer { - internal const int Binding = 0; + public const int Binding = 0; public static readonly int FieldSize; public static readonly int RequiredSize; @@ -38,6 +40,8 @@ namespace Ryujinx.Graphics.Shader public static readonly int FragmentRenderScaleCountOffset; public static readonly int GraphicsRenderScaleOffset; public static readonly int ComputeRenderScaleOffset; + public static readonly int TfeOffsetOffset; + public static readonly int TfeVertexCountOffset; public const int FragmentIsBgraCount = 8; // One for the render target, 64 for the textures, and 8 for the images. @@ -62,18 +66,22 @@ namespace Ryujinx.Graphics.Shader FragmentRenderScaleCountOffset = OffsetOf(ref instance, ref instance.FragmentRenderScaleCount); GraphicsRenderScaleOffset = OffsetOf(ref instance, ref instance.RenderScale); ComputeRenderScaleOffset = GraphicsRenderScaleOffset + FieldSize; + TfeOffsetOffset = OffsetOf(ref instance, ref instance.TfeOffset); + TfeVertexCountOffset = OffsetOf(ref instance, ref instance.TfeVertexCount); } internal static StructureType GetStructureType() { return new StructureType(new[] { - new StructureField(AggregateType.U32, "s_alpha_test"), - new StructureField(AggregateType.Array | AggregateType.U32, "s_is_bgra", FragmentIsBgraCount), - new StructureField(AggregateType.Vector4 | AggregateType.FP32, "s_viewport_inverse"), - new StructureField(AggregateType.Vector4 | AggregateType.FP32, "s_viewport_size"), - new StructureField(AggregateType.S32, "s_frag_scale_count"), - new StructureField(AggregateType.Array | AggregateType.FP32, "s_render_scale", RenderScaleMaxCount), + new StructureField(AggregateType.U32, "alpha_test"), + new StructureField(AggregateType.Array | AggregateType.U32, "is_bgra", FragmentIsBgraCount), + new StructureField(AggregateType.Vector4 | AggregateType.FP32, "viewport_inverse"), + new StructureField(AggregateType.Vector4 | AggregateType.FP32, "viewport_size"), + new StructureField(AggregateType.S32, "frag_scale_count"), + new StructureField(AggregateType.Array | AggregateType.FP32, "render_scale", RenderScaleMaxCount), + new StructureField(AggregateType.Vector4 | AggregateType.S32, "tfe_offset"), + new StructureField(AggregateType.S32, "tfe_vertex_count"), }); } @@ -85,5 +93,8 @@ namespace Ryujinx.Graphics.Shader // Render scale max count: 1 + 64 + 8. First scale is fragment output scale, others are textures/image inputs. public Array73> RenderScale; + + public Vector4 TfeOffset; + public Vector4 TfeVertexCount; } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs index f749cecb..c4bd2cbf 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs @@ -4,6 +4,7 @@ namespace Ryujinx.Graphics.Shader.Translation { public const int PrimitiveId = 0x060; public const int Layer = 0x064; + public const int ViewportIndex = 0x068; public const int PositionX = 0x070; public const int PositionY = 0x074; public const int FrontColorDiffuseR = 0x280; @@ -24,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation public const int TexCoordCount = 10; public const int TexCoordBase = 0x300; public const int TexCoordEnd = TexCoordBase + TexCoordCount * 16; + public const int ViewportMask = 0x3a0; public const int FrontFacing = 0x3fc; public const int UserAttributesCount = 32; diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs index 43263dd4..f1dffb35 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -14,6 +14,8 @@ namespace Ryujinx.Graphics.Shader.Translation public TranslatorContext TranslatorContext { get; } public ResourceManager ResourceManager { get; } + public bool VertexAsCompute { get; } + public bool IsNonMain { get; } public Block CurrBlock { get; set; } @@ -59,11 +61,13 @@ namespace Ryujinx.Graphics.Shader.Translation TranslatorContext translatorContext, ResourceManager resourceManager, DecodedProgram program, + bool vertexAsCompute, bool isNonMain) : this() { TranslatorContext = translatorContext; ResourceManager = resourceManager; Program = program; + VertexAsCompute = vertexAsCompute; IsNonMain = isNonMain; EmitStart(); @@ -71,13 +75,87 @@ namespace Ryujinx.Graphics.Shader.Translation private void EmitStart() { - if (TranslatorContext.Definitions.Stage == ShaderStage.Vertex && - TranslatorContext.Options.TargetApi == TargetApi.Vulkan && - (TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0) + if (TranslatorContext.Options.Flags.HasFlag(TranslationFlags.VertexA)) + { + return; + } + + if (TranslatorContext.Definitions.Stage == ShaderStage.Vertex && TranslatorContext.Options.TargetApi == TargetApi.Vulkan) { // Vulkan requires the point size to be always written on the shader if the primitive topology is points. this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(TranslatorContext.Definitions.PointSize)); } + + if (VertexAsCompute) + { + int vertexInfoCbBinding = ResourceManager.Reservations.VertexInfoConstantBufferBinding; + int countFieldIndex = TranslatorContext.Stage == ShaderStage.Vertex + ? (int)VertexInfoBufferField.VertexCounts + : (int)VertexInfoBufferField.GeometryCounts; + + Operand outputVertexOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(0)); + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const(countFieldIndex), Const(0)); + Operand isVertexOob = this.ICompareGreaterOrEqualUnsigned(outputVertexOffset, vertexCount); + + Operand lblVertexInBounds = Label(); + + this.BranchIfFalse(lblVertexInBounds, isVertexOob); + this.Return(); + this.MarkLabel(lblVertexInBounds); + + Operand outputInstanceOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(1)); + Operand instanceCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(1)); + Operand firstVertex = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(2)); + Operand firstInstance = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(3)); + Operand ibBaseOffset = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.GeometryCounts), Const(3)); + Operand isInstanceOob = this.ICompareGreaterOrEqualUnsigned(outputInstanceOffset, instanceCount); + + Operand lblInstanceInBounds = Label(); + + this.BranchIfFalse(lblInstanceInBounds, isInstanceOob); + this.Return(); + this.MarkLabel(lblInstanceInBounds); + + if (TranslatorContext.Stage == ShaderStage.Vertex) + { + Operand vertexIndexVr = Local(); + + this.TextureSample( + SamplerType.TextureBuffer, + TextureFlags.IntCoords, + ResourceManager.Reservations.IndexBufferTextureBinding, + 1, + new[] { vertexIndexVr }, + new[] { this.IAdd(ibBaseOffset, outputVertexOffset) }); + + this.Store(StorageKind.LocalMemory, ResourceManager.LocalVertexIndexVertexRateMemoryId, this.IAdd(firstVertex, vertexIndexVr)); + this.Store(StorageKind.LocalMemory, ResourceManager.LocalVertexIndexInstanceRateMemoryId, this.IAdd(firstInstance, outputInstanceOffset)); + } + else if (TranslatorContext.Stage == ShaderStage.Geometry) + { + int inputVertices = TranslatorContext.Definitions.InputTopology.ToInputVertices(); + + Operand baseVertex = this.IMultiply(outputVertexOffset, Const(inputVertices)); + + for (int index = 0; index < inputVertices; index++) + { + Operand vertexIndex = Local(); + + this.TextureSample( + SamplerType.TextureBuffer, + TextureFlags.IntCoords, + ResourceManager.Reservations.TopologyRemapBufferTextureBinding, + 1, + new[] { vertexIndex }, + new[] { this.IAdd(baseVertex, Const(index)) }); + + this.Store(StorageKind.LocalMemory, ResourceManager.LocalTopologyRemapMemoryId, Const(index), vertexIndex); + } + + this.Store(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputVertexCountMemoryId, Const(0)); + this.Store(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputIndexCountMemoryId, Const(0)); + } + } } public T GetOp() where T : unmanaged @@ -166,16 +244,21 @@ namespace Ryujinx.Graphics.Shader.Translation public void PrepareForVertexReturn() { - if (!TranslatorContext.GpuAccessor.QueryHostSupportsTransformFeedback() && TranslatorContext.GpuAccessor.QueryTransformFeedbackEnabled()) - { - Operand vertexCount = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(1)); + // TODO: Support transform feedback emulation on stages other than vertex. + // Those stages might produce more primitives, so it needs a way to "compact" the output after it is written. - for (int tfbIndex = 0; tfbIndex < Constants.TfeBuffersCount; tfbIndex++) + if (!TranslatorContext.GpuAccessor.QueryHostSupportsTransformFeedback() && + TranslatorContext.GpuAccessor.QueryTransformFeedbackEnabled() && + TranslatorContext.Stage == ShaderStage.Vertex) + { + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, SupportBuffer.Binding, Const((int)SupportBufferField.TfeVertexCount)); + + for (int tfbIndex = 0; tfbIndex < ResourceReservations.TfeBuffersCount; tfbIndex++) { var locations = TranslatorContext.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); var stride = TranslatorContext.GpuAccessor.QueryTransformFeedbackStride(tfbIndex); - Operand baseOffset = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(0), Const(tfbIndex)); + Operand baseOffset = this.Load(StorageKind.ConstantBuffer, SupportBuffer.Binding, Const((int)SupportBufferField.TfeOffset), Const(tfbIndex)); Operand baseVertex = this.Load(StorageKind.Input, IoVariable.BaseVertex); Operand baseInstance = this.Load(StorageKind.Input, IoVariable.BaseInstance); Operand vertexIndex = this.Load(StorageKind.Input, IoVariable.VertexIndex); @@ -200,7 +283,9 @@ namespace Ryujinx.Graphics.Shader.Translation Operand offset = this.IAdd(baseOffset, Const(j)); Operand value = Instructions.AttributeMap.GenerateAttributeLoad(this, null, location * 4, isOutput: true, isPerPatch: false); - this.Store(StorageKind.StorageBuffer, Constants.TfeBufferBaseBinding + tfbIndex, Const(0), offset, value); + int binding = ResourceManager.Reservations.GetTfeBufferStorageBufferBinding(tfbIndex); + + this.Store(StorageKind.StorageBuffer, binding, Const(0), offset, value); } } } @@ -225,16 +310,6 @@ namespace Ryujinx.Graphics.Shader.Translation this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW)); } - - if (TranslatorContext.Definitions.Stage != ShaderStage.Geometry && TranslatorContext.HasLayerInputAttribute) - { - int attrVecIndex = TranslatorContext.GpLayerInputAttribute >> 2; - int attrComponentIndex = TranslatorContext.GpLayerInputAttribute & 3; - - Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex)); - - this.Store(StorageKind.Output, IoVariable.Layer, null, layer); - } } public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal) @@ -308,9 +383,30 @@ namespace Ryujinx.Graphics.Shader.Translation if (TranslatorContext.Definitions.GpPassthrough && !TranslatorContext.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) { - int inputVertices = TranslatorContext.Definitions.InputTopology.ToInputVertices(); + int inputStart, inputEnd, inputStep; - for (int primIndex = 0; primIndex < inputVertices; primIndex++) + InputTopology topology = TranslatorContext.Definitions.InputTopology; + + if (topology == InputTopology.LinesAdjacency) + { + inputStart = 1; + inputEnd = 3; + inputStep = 1; + } + else if (topology == InputTopology.TrianglesAdjacency) + { + inputStart = 0; + inputEnd = 6; + inputStep = 2; + } + else + { + inputStart = 0; + inputEnd = topology.ToInputVerticesNoAdjacency(); + inputStep = 1; + } + + for (int primIndex = inputStart; primIndex < inputEnd; primIndex += inputStep) { WritePositionOutput(primIndex); @@ -428,6 +524,65 @@ namespace Ryujinx.Graphics.Shader.Translation } } + if (VertexAsCompute) + { + if (TranslatorContext.Stage == ShaderStage.Vertex) + { + int vertexInfoCbBinding = ResourceManager.Reservations.VertexInfoConstantBufferBinding; + int vertexOutputSbBinding = ResourceManager.Reservations.VertexOutputStorageBufferBinding; + int stride = ResourceManager.Reservations.OutputSizePerInvocation; + + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(0)); + + Operand outputVertexOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(0)); + Operand outputInstanceOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(1)); + + Operand outputBaseVertex = this.IMultiply(outputInstanceOffset, vertexCount); + + Operand baseOffset = this.IMultiply(this.IAdd(outputBaseVertex, outputVertexOffset), Const(stride)); + + for (int offset = 0; offset < stride; offset++) + { + Operand vertexOffset = this.IAdd(baseOffset, Const(offset)); + Operand value = this.Load(StorageKind.LocalMemory, ResourceManager.LocalVertexDataMemoryId, Const(offset)); + + this.Store(StorageKind.StorageBuffer, vertexOutputSbBinding, Const(0), vertexOffset, value); + } + } + else if (TranslatorContext.Stage == ShaderStage.Geometry) + { + Operand lblLoopHead = Label(); + Operand lblExit = Label(); + + this.MarkLabel(lblLoopHead); + + Operand writtenIndices = this.Load(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputIndexCountMemoryId); + + int maxIndicesPerPrimitiveInvocation = TranslatorContext.Definitions.GetGeometryOutputIndexBufferStridePerInstance(); + int maxIndicesPerPrimitive = maxIndicesPerPrimitiveInvocation * TranslatorContext.Definitions.ThreadsPerInputPrimitive; + + this.BranchIfTrue(lblExit, this.ICompareGreaterOrEqualUnsigned(writtenIndices, Const(maxIndicesPerPrimitiveInvocation))); + + int vertexInfoCbBinding = ResourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand primitiveIndex = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(0)); + Operand instanceIndex = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(1)); + Operand invocationId = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(2)); + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(0)); + Operand primitiveId = this.IAdd(this.IMultiply(instanceIndex, vertexCount), primitiveIndex); + Operand ibOffset = this.IMultiply(primitiveId, Const(maxIndicesPerPrimitive)); + ibOffset = this.IAdd(ibOffset, this.IMultiply(invocationId, Const(maxIndicesPerPrimitiveInvocation))); + ibOffset = this.IAdd(ibOffset, writtenIndices); + + this.Store(StorageKind.StorageBuffer, ResourceManager.Reservations.GeometryIndexOutputStorageBufferBinding, Const(0), ibOffset, Const(-1)); + this.Store(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputIndexCountMemoryId, this.IAdd(writtenIndices, Const(1))); + + this.Branch(lblLoopHead); + + this.MarkLabel(lblExit); + } + } + return true; } diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index a08c8ea9..afa830de 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -831,6 +831,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.Store, storageKind, null, e0, e1, value); } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, Const(binding), value); + } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand value) { return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, value); diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs index 552a3f31..88525462 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -19,8 +19,12 @@ namespace Ryujinx.Graphics.Shader.Translation DrawParameters = 1 << 4, RtLayer = 1 << 5, Shuffle = 1 << 6, + ViewportIndex = 1 << 7, + ViewportMask = 1 << 8, FixedFuncAttr = 1 << 9, LocalMemory = 1 << 10, SharedMemory = 1 << 11, + Store = 1 << 12, + VtgAsCompute = 1 << 13, } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/IoUsage.cs b/src/Ryujinx.Graphics.Shader/Translation/IoUsage.cs new file mode 100644 index 00000000..8ce2da4a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/IoUsage.cs @@ -0,0 +1,28 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + readonly struct IoUsage + { + private readonly FeatureFlags _usedFeatures; + + public readonly bool UsesRtLayer => _usedFeatures.HasFlag(FeatureFlags.RtLayer); + public readonly bool UsesViewportIndex => _usedFeatures.HasFlag(FeatureFlags.ViewportIndex); + public readonly bool UsesViewportMask => _usedFeatures.HasFlag(FeatureFlags.ViewportMask); + public readonly byte ClipDistancesWritten { get; } + public readonly int UserDefinedMap { get; } + + public IoUsage(FeatureFlags usedFeatures, byte clipDistancesWritten, int userDefinedMap) + { + _usedFeatures = usedFeatures; + ClipDistancesWritten = clipDistancesWritten; + UserDefinedMap = userDefinedMap; + } + + public readonly IoUsage Combine(IoUsage other) + { + return new IoUsage( + _usedFeatures | other._usedFeatures, + (byte)(ClipDistancesWritten | other.ClipDistancesWritten), + UserDefinedMap | other.UserDefinedMap); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index d07d8dce..9c487c46 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -48,12 +48,22 @@ namespace Ryujinx.Graphics.Shader.Translation public int LocalMemoryId { get; private set; } public int SharedMemoryId { get; private set; } + public int LocalVertexDataMemoryId { get; private set; } + public int LocalTopologyRemapMemoryId { get; private set; } + public int LocalVertexIndexVertexRateMemoryId { get; private set; } + public int LocalVertexIndexInstanceRateMemoryId { get; private set; } + public int LocalGeometryOutputVertexCountMemoryId { get; private set; } + public int LocalGeometryOutputIndexCountMemoryId { get; private set; } + public ShaderProperties Properties { get; } - public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor) + public ResourceReservations Reservations { get; } + + public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ResourceReservations reservations = null) { _gpuAccessor = gpuAccessor; Properties = new(); + Reservations = reservations; _stage = stage; _stagePrefix = GetShaderStagePrefix(stage); @@ -114,6 +124,29 @@ namespace Ryujinx.Graphics.Shader.Translation } } + public void SetVertexAsComputeLocalMemories(ShaderStage stage, InputTopology inputTopology) + { + LocalVertexDataMemoryId = AddMemoryDefinition("local_vertex_data", AggregateType.Array | AggregateType.FP32, Reservations.OutputSizePerInvocation); + + if (stage == ShaderStage.Vertex) + { + LocalVertexIndexVertexRateMemoryId = AddMemoryDefinition("local_vertex_index_vr", AggregateType.U32); + LocalVertexIndexInstanceRateMemoryId = AddMemoryDefinition("local_vertex_index_ir", AggregateType.U32); + } + else if (stage == ShaderStage.Geometry) + { + LocalTopologyRemapMemoryId = AddMemoryDefinition("local_topology_remap", AggregateType.Array | AggregateType.U32, inputTopology.ToInputVertices()); + + LocalGeometryOutputVertexCountMemoryId = AddMemoryDefinition("local_geometry_output_vertex", AggregateType.U32); + LocalGeometryOutputIndexCountMemoryId = AddMemoryDefinition("local_geometry_output_index", AggregateType.U32); + } + } + + private int AddMemoryDefinition(string name, AggregateType type, int arrayLength = 1) + { + return Properties.AddLocalMemory(new MemoryDefinition(name, type, arrayLength)); + } + public int GetConstantBufferBinding(int slot) { int binding = _cbSlotToBindingMap[slot]; @@ -465,17 +498,22 @@ namespace Ryujinx.Graphics.Shader.Translation return descriptors; } - public (int, int) GetCbufSlotAndHandleForTexture(int binding) + public bool TryGetCbufSlotAndHandleForTexture(int binding, out int cbufSlot, out int handle) { foreach ((TextureInfo info, TextureMeta meta) in _usedTextures) { if (meta.Binding == binding) { - return (info.CbufSlot, info.Handle); + cbufSlot = info.CbufSlot; + handle = info.Handle; + + return true; } } - throw new ArgumentException($"Binding {binding} is invalid."); + cbufSlot = 0; + handle = 0; + return false; } private static int FindDescriptorIndex(TextureDescriptor[] array, int binding) diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceReservations.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceReservations.cs new file mode 100644 index 00000000..d559f669 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceReservations.cs @@ -0,0 +1,186 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + public class ResourceReservations + { + public const int TfeBuffersCount = 4; + + public const int MaxVertexBufferTextures = 32; + + public int VertexInfoConstantBufferBinding { get; } + public int VertexOutputStorageBufferBinding { get; } + public int GeometryVertexOutputStorageBufferBinding { get; } + public int GeometryIndexOutputStorageBufferBinding { get; } + public int IndexBufferTextureBinding { get; } + public int TopologyRemapBufferTextureBinding { get; } + + public int ReservedConstantBuffers { get; } + public int ReservedStorageBuffers { get; } + public int ReservedTextures { get; } + public int ReservedImages { get; } + public int InputSizePerInvocation { get; } + public int OutputSizePerInvocation { get; } + public int OutputSizeInBytesPerInvocation => OutputSizePerInvocation * sizeof(uint); + + private readonly int _tfeBufferSbBaseBinding; + private readonly int _vertexBufferTextureBaseBinding; + + private readonly Dictionary _offsets; + internal IReadOnlyDictionary Offsets => _offsets; + + internal ResourceReservations(bool isTransformFeedbackEmulated, bool vertexAsCompute) + { + // All stages reserves the first constant buffer binding for the support buffer. + ReservedConstantBuffers = 1; + ReservedStorageBuffers = 0; + ReservedTextures = 0; + ReservedImages = 0; + + if (isTransformFeedbackEmulated) + { + // Transform feedback emulation currently always uses 4 storage buffers. + _tfeBufferSbBaseBinding = ReservedStorageBuffers; + ReservedStorageBuffers = TfeBuffersCount; + } + + if (vertexAsCompute) + { + // One constant buffer reserved for vertex related state. + VertexInfoConstantBufferBinding = ReservedConstantBuffers++; + + // One storage buffer for the output vertex data. + VertexOutputStorageBufferBinding = ReservedStorageBuffers++; + + // One storage buffer for the output geometry vertex data. + GeometryVertexOutputStorageBufferBinding = ReservedStorageBuffers++; + + // One storage buffer for the output geometry index data. + GeometryIndexOutputStorageBufferBinding = ReservedStorageBuffers++; + + // Enough textures reserved for all vertex attributes, plus the index buffer. + IndexBufferTextureBinding = ReservedTextures; + TopologyRemapBufferTextureBinding = ReservedTextures + 1; + _vertexBufferTextureBaseBinding = ReservedTextures + 2; + ReservedTextures += 2 + MaxVertexBufferTextures; + } + } + + internal ResourceReservations( + IGpuAccessor gpuAccessor, + bool isTransformFeedbackEmulated, + bool vertexAsCompute, + IoUsage? vacInput, + IoUsage vacOutput) : this(isTransformFeedbackEmulated, vertexAsCompute) + { + if (vertexAsCompute) + { + _offsets = new(); + + if (vacInput.HasValue) + { + InputSizePerInvocation = FillIoOffsetMap(gpuAccessor, StorageKind.Input, vacInput.Value); + } + + OutputSizePerInvocation = FillIoOffsetMap(gpuAccessor, StorageKind.Output, vacOutput); + } + } + + private int FillIoOffsetMap(IGpuAccessor gpuAccessor, StorageKind storageKind, IoUsage vacUsage) + { + int offset = 0; + + for (int c = 0; c < 4; c++) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.Position, 0, c), offset++); + } + + _offsets.Add(new IoDefinition(storageKind, IoVariable.PointSize), offset++); + + int clipDistancesWrittenMap = vacUsage.ClipDistancesWritten; + + while (clipDistancesWrittenMap != 0) + { + int index = BitOperations.TrailingZeroCount(clipDistancesWrittenMap); + + _offsets.Add(new IoDefinition(storageKind, IoVariable.ClipDistance, 0, index), offset++); + + clipDistancesWrittenMap &= ~(1 << index); + } + + if (vacUsage.UsesRtLayer) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.Layer), offset++); + } + + if (vacUsage.UsesViewportIndex && gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation()) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.VertexIndex), offset++); + } + + if (vacUsage.UsesViewportMask && gpuAccessor.QueryHostSupportsViewportMask()) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.ViewportMask), offset++); + } + + int usedDefinedMap = vacUsage.UserDefinedMap; + + while (usedDefinedMap != 0) + { + int location = BitOperations.TrailingZeroCount(usedDefinedMap); + + for (int c = 0; c < 4; c++) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.UserDefined, location, c), offset++); + } + + usedDefinedMap &= ~(1 << location); + } + + return offset; + } + + internal static bool IsVectorOrArrayVariable(IoVariable variable) + { + return variable switch + { + IoVariable.ClipDistance or + IoVariable.Position => true, + _ => false, + }; + } + + public int GetTfeBufferStorageBufferBinding(int bufferIndex) + { + return _tfeBufferSbBaseBinding + bufferIndex; + } + + public int GetVertexBufferTextureBinding(int vaLocation) + { + return _vertexBufferTextureBaseBinding + vaLocation; + } + + internal bool TryGetOffset(StorageKind storageKind, int location, int component, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, IoVariable.UserDefined, location, component), out offset); + } + + internal bool TryGetOffset(StorageKind storageKind, IoVariable ioVariable, int location, int component, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, ioVariable, location, component), out offset); + } + + internal bool TryGetOffset(StorageKind storageKind, IoVariable ioVariable, int component, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, ioVariable, 0, component), out offset); + } + + internal bool TryGetOffset(StorageKind storageKind, IoVariable ioVariable, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, ioVariable, 0, 0), out offset); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs index 204f4278..3246e259 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs @@ -32,7 +32,7 @@ namespace Ryujinx.Graphics.Shader.Translation public bool GpPassthrough { get; } public bool LastInVertexPipeline { get; set; } - public int ThreadsPerInputPrimitive { get; } + public int ThreadsPerInputPrimitive { get; private set; } public InputTopology InputTopology => _graphicsState.Topology; public OutputTopology OutputTopology { get; } @@ -97,9 +97,14 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly Dictionary _transformFeedbackDefinitions; - public ShaderDefinitions(ShaderStage stage) + public ShaderDefinitions(ShaderStage stage, ulong transformFeedbackVecMap, TransformFeedbackOutput[] transformFeedbackOutputs) { Stage = stage; + TransformFeedbackEnabled = transformFeedbackOutputs != null; + _transformFeedbackOutputs = transformFeedbackOutputs; + _transformFeedbackDefinitions = new(); + + PopulateTransformFeedbackDefinitions(transformFeedbackVecMap, transformFeedbackOutputs); } public ShaderDefinitions( @@ -142,7 +147,6 @@ namespace Ryujinx.Graphics.Shader.Translation bool omapSampleMask, bool omapDepth, bool supportsScaledVertexFormats, - bool transformFeedbackEnabled, ulong transformFeedbackVecMap, TransformFeedbackOutput[] transformFeedbackOutputs) { @@ -151,17 +155,22 @@ namespace Ryujinx.Graphics.Shader.Translation GpPassthrough = gpPassthrough; ThreadsPerInputPrimitive = threadsPerInputPrimitive; OutputTopology = outputTopology; - MaxOutputVertices = maxOutputVertices; + MaxOutputVertices = gpPassthrough ? graphicsState.Topology.ToInputVerticesNoAdjacency() : maxOutputVertices; ImapTypes = imapTypes; OmapTargets = omapTargets; OmapSampleMask = omapSampleMask; OmapDepth = omapDepth; LastInVertexPipeline = stage < ShaderStage.Fragment; SupportsScaledVertexFormats = supportsScaledVertexFormats; - TransformFeedbackEnabled = transformFeedbackEnabled; + TransformFeedbackEnabled = transformFeedbackOutputs != null; _transformFeedbackOutputs = transformFeedbackOutputs; _transformFeedbackDefinitions = new(); + PopulateTransformFeedbackDefinitions(transformFeedbackVecMap, transformFeedbackOutputs); + } + + private void PopulateTransformFeedbackDefinitions(ulong transformFeedbackVecMap, TransformFeedbackOutput[] transformFeedbackOutputs) + { while (transformFeedbackVecMap != 0) { int vecIndex = BitOperations.TrailingZeroCount(transformFeedbackVecMap); @@ -200,16 +209,6 @@ namespace Ryujinx.Graphics.Shader.Translation OaIndexing = true; } - public TransformFeedbackOutput[] GetTransformFeedbackOutputs() - { - if (!HasTransformFeedbackOutputs()) - { - return null; - } - - return _transformFeedbackOutputs; - } - public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput) { if (!HasTransformFeedbackOutputs()) @@ -320,5 +319,35 @@ namespace Ryujinx.Graphics.Shader.Translation { return _graphicsState.AttributeTypes[location]; } + + public bool IsAttributeSint(int location) + { + return (_graphicsState.AttributeTypes[location] & ~AttributeType.AnyPacked) == AttributeType.Sint; + } + + public bool IsAttributePacked(int location) + { + return _graphicsState.AttributeTypes[location].HasFlag(AttributeType.Packed); + } + + public bool IsAttributePackedRgb10A2Signed(int location) + { + return _graphicsState.AttributeTypes[location].HasFlag(AttributeType.PackedRgb10A2Signed); + } + + public int GetGeometryOutputIndexBufferStridePerInstance() + { + return MaxOutputVertices + OutputTopology switch + { + OutputTopology.LineStrip => MaxOutputVertices / 2, + OutputTopology.TriangleStrip => MaxOutputVertices / 3, + _ => MaxOutputVertices, + }; + } + + public int GetGeometryOutputIndexBufferStride() + { + return GetGeometryOutputIndexBufferStridePerInstance() * ThreadsPerInputPrimitive; + } } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs deleted file mode 100644 index c077e1cd..00000000 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs +++ /dev/null @@ -1,187 +0,0 @@ -using Ryujinx.Graphics.Shader.IntermediateRepresentation; -using System.Collections.Generic; - -namespace Ryujinx.Graphics.Shader.Translation -{ - static class ShaderIdentifier - { - public static ShaderIdentification Identify( - IReadOnlyList functions, - IGpuAccessor gpuAccessor, - ShaderStage stage, - InputTopology inputTopology, - out int layerInputAttr) - { - if (stage == ShaderStage.Geometry && - inputTopology == InputTopology.Triangles && - !gpuAccessor.QueryHostSupportsGeometryShader() && - IsLayerPassthroughGeometryShader(functions, out layerInputAttr)) - { - return ShaderIdentification.GeometryLayerPassthrough; - } - - layerInputAttr = 0; - return ShaderIdentification.None; - } - - private static bool IsLayerPassthroughGeometryShader(IReadOnlyList functions, out int layerInputAttr) - { - bool writesLayer = false; - layerInputAttr = 0; - - if (functions.Count != 1) - { - return false; - } - - int verticesCount = 0; - int totalVerticesCount = 0; - - foreach (BasicBlock block in functions[0].Blocks) - { - // We are not expecting loops or any complex control flow here, so fail in those cases. - if (block.Branch != null && block.Branch.Index <= block.Index) - { - return false; - } - - foreach (INode node in block.Operations) - { - if (node is not Operation operation) - { - continue; - } - - if (IsResourceWrite(operation.Inst, operation.StorageKind)) - { - return false; - } - - if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output) - { - Operand src = operation.GetSource(operation.SourcesCount - 1); - Operation srcAttributeAsgOp = null; - - if (src.Type == OperandType.LocalVariable && - src.AsgOp is Operation asgOp && - asgOp.Inst == Instruction.Load && - asgOp.StorageKind.IsInputOrOutput()) - { - if (asgOp.StorageKind != StorageKind.Input) - { - return false; - } - - srcAttributeAsgOp = asgOp; - } - - if (srcAttributeAsgOp != null) - { - IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value; - IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value; - - if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined) - { - if (srcAttributeAsgOp.SourcesCount != 4) - { - return false; - } - - writesLayer = true; - layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value; - } - else - { - if (dstAttribute != srcAttribute) - { - return false; - } - - int inputsCount = operation.SourcesCount - 2; - - if (dstAttribute == IoVariable.UserDefined) - { - if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value) - { - return false; - } - - inputsCount--; - } - - for (int i = 0; i < inputsCount; i++) - { - int dstIndex = operation.SourcesCount - 2 - i; - int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i; - - if ((dstIndex | srcIndex) < 0) - { - return false; - } - - if (operation.GetSource(dstIndex).Type != OperandType.Constant || - srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant || - operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value) - { - return false; - } - } - } - } - else if (src.Type == OperandType.Constant) - { - int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value; - float expectedValue = dstComponent == 3 ? 1f : 0f; - - if (src.AsFloat() != expectedValue) - { - return false; - } - } - else - { - return false; - } - } - else if (operation.Inst == Instruction.EmitVertex) - { - verticesCount++; - } - else if (operation.Inst == Instruction.EndPrimitive) - { - totalVerticesCount += verticesCount; - verticesCount = 0; - } - } - } - - return totalVerticesCount + verticesCount == 3 && writesLayer; - } - - private static bool IsResourceWrite(Instruction inst, StorageKind storageKind) - { - switch (inst) - { - case Instruction.AtomicAdd: - case Instruction.AtomicAnd: - case Instruction.AtomicCompareAndSwap: - case Instruction.AtomicMaxS32: - case Instruction.AtomicMaxU32: - case Instruction.AtomicMinS32: - case Instruction.AtomicMinU32: - case Instruction.AtomicOr: - case Instruction.AtomicSwap: - case Instruction.AtomicXor: - case Instruction.ImageAtomic: - case Instruction.ImageStore: - return true; - case Instruction.Store: - return storageKind == StorageKind.StorageBuffer || - storageKind == StorageKind.SharedMemory || - storageKind == StorageKind.LocalMemory; - } - - return false; - } - } -} diff --git a/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs index fa687eca..87ebb8e7 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.Shader.Translation { public readonly HelperFunctionManager Hfm; public readonly BasicBlock[] Blocks; + public readonly ShaderDefinitions Definitions; public readonly ResourceManager ResourceManager; public readonly IGpuAccessor GpuAccessor; public readonly TargetLanguage TargetLanguage; @@ -15,6 +16,7 @@ namespace Ryujinx.Graphics.Shader.Translation public TransformContext( HelperFunctionManager hfm, BasicBlock[] blocks, + ShaderDefinitions definitions, ResourceManager resourceManager, IGpuAccessor gpuAccessor, TargetLanguage targetLanguage, @@ -23,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation { Hfm = hfm; Blocks = blocks; + Definitions = definitions; ResourceManager = resourceManager; GpuAccessor = gpuAccessor; TargetLanguage = targetLanguage; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/GeometryToCompute.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/GeometryToCompute.cs new file mode 100644 index 00000000..0013cf0e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/GeometryToCompute.cs @@ -0,0 +1,378 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class GeometryToCompute : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return usedFeatures.HasFlag(FeatureFlags.VtgAsCompute); + } + + public static LinkedListNode RunPass(TransformContext context, LinkedListNode node) + { + if (context.Definitions.Stage != ShaderStage.Geometry) + { + return node; + } + + Operation operation = (Operation)node.Value; + + LinkedListNode newNode = node; + + switch (operation.Inst) + { + case Instruction.EmitVertex: + newNode = GenerateEmitVertex(context.Definitions, context.ResourceManager, node); + break; + case Instruction.EndPrimitive: + newNode = GenerateEndPrimitive(context.Definitions, context.ResourceManager, node); + break; + case Instruction.Load: + if (operation.StorageKind == StorageKind.Input) + { + IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value; + + if (TryGetOffset(context.ResourceManager, operation, StorageKind.Input, out int inputOffset)) + { + Operand primVertex = ioVariable == IoVariable.UserDefined + ? operation.GetSource(2) + : operation.GetSource(1); + + Operand vertexElemOffset = GenerateVertexOffset(context.ResourceManager, node, inputOffset, primVertex); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.StorageBuffer, + operation.Dest, + new[] { Const(context.ResourceManager.Reservations.VertexOutputStorageBufferBinding), Const(0), vertexElemOffset })); + } + else + { + switch (ioVariable) + { + case IoVariable.InvocationId: + newNode = GenerateInvocationId(node, operation.Dest); + break; + case IoVariable.PrimitiveId: + newNode = GeneratePrimitiveId(context.ResourceManager, node, operation.Dest); + break; + case IoVariable.GlobalId: + case IoVariable.SubgroupEqMask: + case IoVariable.SubgroupGeMask: + case IoVariable.SubgroupGtMask: + case IoVariable.SubgroupLaneId: + case IoVariable.SubgroupLeMask: + case IoVariable.SubgroupLtMask: + // Those are valid or expected for geometry shaders. + break; + default: + context.GpuAccessor.Log($"Invalid input \"{ioVariable}\"."); + break; + } + } + } + else if (operation.StorageKind == StorageKind.Output) + { + if (TryGetOffset(context.ResourceManager, operation, StorageKind.Output, out int outputOffset)) + { + newNode = node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + operation.Dest, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset) })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + break; + case Instruction.Store: + if (operation.StorageKind == StorageKind.Output) + { + if (TryGetOffset(context.ResourceManager, operation, StorageKind.Output, out int outputOffset)) + { + Operand value = operation.GetSource(operation.SourcesCount - 1); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.LocalMemory, + (Operand)null, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset), value })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + break; + } + + if (newNode != node) + { + Utils.DeleteNode(node, operation); + } + + return newNode; + } + + private static LinkedListNode GenerateEmitVertex(ShaderDefinitions definitions, ResourceManager resourceManager, LinkedListNode node) + { + int vbOutputBinding = resourceManager.Reservations.GeometryVertexOutputStorageBufferBinding; + int ibOutputBinding = resourceManager.Reservations.GeometryIndexOutputStorageBufferBinding; + int stride = resourceManager.Reservations.OutputSizePerInvocation; + + Operand outputPrimVertex = IncrementLocalMemory(node, resourceManager.LocalGeometryOutputVertexCountMemoryId); + Operand baseVertexOffset = GenerateBaseOffset( + resourceManager, + node, + definitions.MaxOutputVertices * definitions.ThreadsPerInputPrimitive, + definitions.ThreadsPerInputPrimitive); + Operand outputBaseVertex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, outputBaseVertex, new[] { baseVertexOffset, outputPrimVertex })); + + Operand outputPrimIndex = IncrementLocalMemory(node, resourceManager.LocalGeometryOutputIndexCountMemoryId); + Operand baseIndexOffset = GenerateBaseOffset( + resourceManager, + node, + definitions.GetGeometryOutputIndexBufferStride(), + definitions.ThreadsPerInputPrimitive); + Operand outputBaseIndex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, outputBaseIndex, new[] { baseIndexOffset, outputPrimIndex })); + + node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.StorageBuffer, + null, + new[] { Const(ibOutputBinding), Const(0), outputBaseIndex, outputBaseVertex })); + + Operand baseOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseOffset, new[] { outputBaseVertex, Const(stride) })); + + LinkedListNode newNode = node; + + for (int offset = 0; offset < stride; offset++) + { + Operand vertexOffset; + + if (offset > 0) + { + vertexOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, vertexOffset, new[] { baseOffset, Const(offset) })); + } + else + { + vertexOffset = baseOffset; + } + + Operand value = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + value, + new[] { Const(resourceManager.LocalVertexDataMemoryId), Const(offset) })); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.StorageBuffer, + null, + new[] { Const(vbOutputBinding), Const(0), vertexOffset, value })); + } + + return newNode; + } + + private static LinkedListNode GenerateEndPrimitive(ShaderDefinitions definitions, ResourceManager resourceManager, LinkedListNode node) + { + int ibOutputBinding = resourceManager.Reservations.GeometryIndexOutputStorageBufferBinding; + + Operand outputPrimIndex = IncrementLocalMemory(node, resourceManager.LocalGeometryOutputIndexCountMemoryId); + Operand baseIndexOffset = GenerateBaseOffset( + resourceManager, + node, + definitions.GetGeometryOutputIndexBufferStride(), + definitions.ThreadsPerInputPrimitive); + Operand outputBaseIndex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, outputBaseIndex, new[] { baseIndexOffset, outputPrimIndex })); + + return node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.StorageBuffer, + null, + new[] { Const(ibOutputBinding), Const(0), outputBaseIndex, Const(-1) })); + } + + private static Operand GenerateBaseOffset(ResourceManager resourceManager, LinkedListNode node, int stride, int threadsPerInputPrimitive) + { + Operand primitiveId = Local(); + GeneratePrimitiveId(resourceManager, node, primitiveId); + + Operand baseOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseOffset, new[] { primitiveId, Const(stride) })); + + Operand invocationId = Local(); + GenerateInvocationId(node, invocationId); + + Operand invocationOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, invocationOffset, new[] { invocationId, Const(stride / threadsPerInputPrimitive) })); + + Operand combinedOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, combinedOffset, new[] { baseOffset, invocationOffset })); + + return combinedOffset; + } + + private static Operand IncrementLocalMemory(LinkedListNode node, int memoryId) + { + Operand oldValue = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + oldValue, + new[] { Const(memoryId) })); + + Operand newValue = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, newValue, new[] { oldValue, Const(1) })); + + node.List.AddBefore(node, new Operation(Instruction.Store, StorageKind.LocalMemory, null, new[] { Const(memoryId), newValue })); + + return oldValue; + } + + private static Operand GenerateVertexOffset( + ResourceManager resourceManager, + LinkedListNode node, + int elementOffset, + Operand primVertex) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand vertexCount = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + vertexCount, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(0) })); + + Operand primInputVertex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + primInputVertex, + new[] { Const(resourceManager.LocalTopologyRemapMemoryId), primVertex })); + + Operand instanceIndex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + instanceIndex, + new[] { Const((int)IoVariable.GlobalId), Const(1) })); + + Operand baseVertex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseVertex, new[] { instanceIndex, vertexCount })); + + Operand vertexIndex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, vertexIndex, new[] { baseVertex, primInputVertex })); + + Operand vertexBaseOffset = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Multiply, + vertexBaseOffset, + new[] { vertexIndex, Const(resourceManager.Reservations.InputSizePerInvocation) })); + + Operand vertexElemOffset; + + if (elementOffset != 0) + { + vertexElemOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.Add, vertexElemOffset, new[] { vertexBaseOffset, Const(elementOffset) })); + } + else + { + vertexElemOffset = vertexBaseOffset; + } + + return vertexElemOffset; + } + + private static LinkedListNode GeneratePrimitiveId(ResourceManager resourceManager, LinkedListNode node, Operand dest) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand vertexCount = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + vertexCount, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(0) })); + + Operand vertexIndex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + vertexIndex, + new[] { Const((int)IoVariable.GlobalId), Const(0) })); + + Operand instanceIndex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + instanceIndex, + new[] { Const((int)IoVariable.GlobalId), Const(1) })); + + Operand baseVertex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseVertex, new[] { instanceIndex, vertexCount })); + + return node.List.AddBefore(node, new Operation(Instruction.Add, dest, new[] { baseVertex, vertexIndex })); + } + + private static LinkedListNode GenerateInvocationId(LinkedListNode node, Operand dest) + { + return node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + dest, + new[] { Const((int)IoVariable.GlobalId), Const(2) })); + } + + private static bool TryGetOffset(ResourceManager resourceManager, Operation operation, StorageKind storageKind, out int outputOffset) + { + bool isStore = operation.Inst == Instruction.Store; + + IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value; + + bool isValidOutput; + + if (ioVariable == IoVariable.UserDefined) + { + int lastIndex = operation.SourcesCount - (isStore ? 2 : 1); + + int location = operation.GetSource(1).Value; + int component = operation.GetSource(lastIndex).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(storageKind, location, component, out outputOffset); + } + else + { + if (ResourceReservations.IsVectorOrArrayVariable(ioVariable)) + { + int component = operation.GetSource(operation.SourcesCount - (isStore ? 2 : 1)).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(storageKind, ioVariable, component, out outputOffset); + } + else + { + isValidOutput = resourceManager.Reservations.TryGetOffset(storageKind, ioVariable, out outputOffset); + } + } + + return isValidOutput; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs index 5ceed4b7..2479d85f 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs @@ -153,15 +153,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; - if (isBindless) + if (isBindless || !resourceManager.TryGetCbufSlotAndHandleForTexture(texOp.Binding, out int cbufSlot, out int handle)) { return node; } bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; - (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); - bool isCoordNormalized = gpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot); if (isCoordNormalized || intCoords) @@ -607,13 +605,11 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms // We can't query the format of a bindless texture, // because the handle is unknown, it can have any format. - if (texOp.Flags.HasFlag(TextureFlags.Bindless)) + if (texOp.Flags.HasFlag(TextureFlags.Bindless) || !resourceManager.TryGetCbufSlotAndHandleForTexture(texOp.Binding, out int cbufSlot, out int handle)) { return node; } - (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); - TextureFormat format = gpuAccessor.QueryTextureFormat(handle, cbufSlot); int maxPositive = format switch diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs index 29393880..7ff3b8bf 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs @@ -14,6 +14,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms RunPass(context); RunPass(context); RunPass(context); + RunPass(context); + RunPass(context); } private static void RunPass(TransformContext context) where T : ITransformPass diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/VertexToCompute.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VertexToCompute.cs new file mode 100644 index 00000000..d71ada86 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VertexToCompute.cs @@ -0,0 +1,364 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class VertexToCompute : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return usedFeatures.HasFlag(FeatureFlags.VtgAsCompute); + } + + public static LinkedListNode RunPass(TransformContext context, LinkedListNode node) + { + if (context.Definitions.Stage != ShaderStage.Vertex) + { + return node; + } + + Operation operation = (Operation)node.Value; + + LinkedListNode newNode = node; + + if (operation.Inst == Instruction.Load && operation.StorageKind == StorageKind.Input) + { + Operand dest = operation.Dest; + + switch ((IoVariable)operation.GetSource(0).Value) + { + case IoVariable.BaseInstance: + newNode = GenerateBaseInstanceLoad(context.ResourceManager, node, dest); + break; + case IoVariable.BaseVertex: + newNode = GenerateBaseVertexLoad(context.ResourceManager, node, dest); + break; + case IoVariable.InstanceId: + newNode = GenerateInstanceIdLoad(node, dest); + break; + case IoVariable.InstanceIndex: + newNode = GenerateInstanceIndexLoad(context.ResourceManager, node, dest); + break; + case IoVariable.VertexId: + case IoVariable.VertexIndex: + newNode = GenerateVertexIndexLoad(context.ResourceManager, node, dest); + break; + case IoVariable.UserDefined: + int location = operation.GetSource(1).Value; + int component = operation.GetSource(2).Value; + + if (context.Definitions.IsAttributePacked(location)) + { + bool needsSextNorm = context.Definitions.IsAttributePackedRgb10A2Signed(location); + + Operand temp = needsSextNorm ? Local() : dest; + Operand vertexElemOffset = GenerateVertexOffset(context.ResourceManager, node, location, 0); + + newNode = node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSample, + SamplerType.TextureBuffer, + TextureFormat.Unknown, + TextureFlags.IntCoords, + context.ResourceManager.Reservations.GetVertexBufferTextureBinding(location), + 1 << component, + new[] { temp }, + new[] { vertexElemOffset })); + + if (needsSextNorm) + { + bool sint = context.Definitions.IsAttributeSint(location); + CopySignExtendedNormalized(node, component == 3 ? 2 : 10, !sint, dest, temp); + } + } + else + { + Operand temp = component > 0 ? Local() : dest; + Operand vertexElemOffset = GenerateVertexOffset(context.ResourceManager, node, location, component); + + newNode = node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSample, + SamplerType.TextureBuffer, + TextureFormat.Unknown, + TextureFlags.IntCoords, + context.ResourceManager.Reservations.GetVertexBufferTextureBinding(location), + 1, + new[] { temp }, + new[] { vertexElemOffset })); + + if (component > 0) + { + newNode = CopyMasked(context.ResourceManager, newNode, location, component, dest, temp); + } + } + break; + case IoVariable.GlobalId: + case IoVariable.SubgroupEqMask: + case IoVariable.SubgroupGeMask: + case IoVariable.SubgroupGtMask: + case IoVariable.SubgroupLaneId: + case IoVariable.SubgroupLeMask: + case IoVariable.SubgroupLtMask: + // Those are valid or expected for vertex shaders. + break; + default: + context.GpuAccessor.Log($"Invalid input \"{(IoVariable)operation.GetSource(0).Value}\"."); + break; + } + } + else if (operation.Inst == Instruction.Load && operation.StorageKind == StorageKind.Output) + { + if (TryGetOutputOffset(context.ResourceManager, operation, out int outputOffset)) + { + newNode = node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + operation.Dest, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset) })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + else if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output) + { + if (TryGetOutputOffset(context.ResourceManager, operation, out int outputOffset)) + { + Operand value = operation.GetSource(operation.SourcesCount - 1); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.LocalMemory, + (Operand)null, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset), value })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + + if (newNode != node) + { + Utils.DeleteNode(node, operation); + } + + return newNode; + } + + private static Operand GenerateVertexOffset(ResourceManager resourceManager, LinkedListNode node, int location, int component) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand vertexIdVr = Local(); + GenerateVertexIdVertexRateLoad(resourceManager, node, vertexIdVr); + + Operand vertexIdIr = Local(); + GenerateVertexIdInstanceRateLoad(resourceManager, node, vertexIdIr); + + Operand attributeOffset = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + attributeOffset, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexOffsets), Const(location), Const(0) })); + + Operand isInstanceRate = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + isInstanceRate, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexOffsets), Const(location), Const(1) })); + + Operand vertexId = Local(); + node.List.AddBefore(node, new Operation( + Instruction.ConditionalSelect, + vertexId, + new[] { isInstanceRate, vertexIdIr, vertexIdVr })); + + Operand vertexStride = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + vertexStride, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexStrides), Const(location), Const(0) })); + + Operand vertexBaseOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, vertexBaseOffset, new[] { vertexId, vertexStride })); + + Operand vertexOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, vertexOffset, new[] { attributeOffset, vertexBaseOffset })); + + Operand vertexElemOffset; + + if (component != 0) + { + vertexElemOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.Add, vertexElemOffset, new[] { vertexOffset, Const(component) })); + } + else + { + vertexElemOffset = vertexOffset; + } + + return vertexElemOffset; + } + + private static LinkedListNode CopySignExtendedNormalized(LinkedListNode node, int bits, bool normalize, Operand dest, Operand src) + { + Operand leftShifted = Local(); + node = node.List.AddAfter(node, new Operation( + Instruction.ShiftLeft, + leftShifted, + new[] { src, Const(32 - bits) })); + + Operand rightShifted = normalize ? Local() : dest; + node = node.List.AddAfter(node, new Operation( + Instruction.ShiftRightS32, + rightShifted, + new[] { leftShifted, Const(32 - bits) })); + + if (normalize) + { + Operand asFloat = Local(); + node = node.List.AddAfter(node, new Operation(Instruction.ConvertS32ToFP32, asFloat, new[] { rightShifted })); + node = node.List.AddAfter(node, new Operation( + Instruction.FP32 | Instruction.Multiply, + dest, + new[] { asFloat, ConstF(1f / (1 << (bits - 1))) })); + } + + return node; + } + + private static LinkedListNode CopyMasked( + ResourceManager resourceManager, + LinkedListNode node, + int location, + int component, + Operand dest, + Operand src) + { + Operand componentExists = Local(); + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + node = node.List.AddAfter(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + componentExists, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexStrides), Const(location), Const(component) })); + + return node.List.AddAfter(node, new Operation( + Instruction.ConditionalSelect, + dest, + new[] { componentExists, src, ConstF(component == 3 ? 1f : 0f) })); + } + + private static LinkedListNode GenerateBaseVertexLoad(ResourceManager resourceManager, LinkedListNode node, Operand dest) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + return node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + dest, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(2) })); + } + + private static LinkedListNode GenerateBaseInstanceLoad(ResourceManager resourceManager, LinkedListNode node, Operand dest) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + return node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + dest, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(3) })); + } + + private static LinkedListNode GenerateVertexIndexLoad(ResourceManager resourceManager, LinkedListNode node, Operand dest) + { + Operand baseVertex = Local(); + Operand vertexId = Local(); + + GenerateBaseVertexLoad(resourceManager, node, baseVertex); + GenerateVertexIdVertexRateLoad(resourceManager, node, vertexId); + + return node.List.AddBefore(node, new Operation(Instruction.Add, dest, new[] { baseVertex, vertexId })); + } + + private static LinkedListNode GenerateInstanceIndexLoad(ResourceManager resourceManager, LinkedListNode node, Operand dest) + { + Operand baseInstance = Local(); + Operand instanceId = Local(); + + GenerateBaseInstanceLoad(resourceManager, node, baseInstance); + + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + instanceId, + new[] { Const((int)IoVariable.GlobalId), Const(1) })); + + return node.List.AddBefore(node, new Operation(Instruction.Add, dest, new[] { baseInstance, instanceId })); + } + + private static LinkedListNode GenerateVertexIdVertexRateLoad(ResourceManager resourceManager, LinkedListNode node, Operand dest) + { + Operand[] sources = new Operand[] { Const(resourceManager.LocalVertexIndexVertexRateMemoryId) }; + + return node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.LocalMemory, dest, sources)); + } + + private static LinkedListNode GenerateVertexIdInstanceRateLoad(ResourceManager resourceManager, LinkedListNode node, Operand dest) + { + Operand[] sources = new Operand[] { Const(resourceManager.LocalVertexIndexInstanceRateMemoryId) }; + + return node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.LocalMemory, dest, sources)); + } + + private static LinkedListNode GenerateInstanceIdLoad(LinkedListNode node, Operand dest) + { + Operand[] sources = new Operand[] { Const((int)IoVariable.GlobalId), Const(1) }; + + return node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, dest, sources)); + } + + private static bool TryGetOutputOffset(ResourceManager resourceManager, Operation operation, out int outputOffset) + { + bool isStore = operation.Inst == Instruction.Store; + + IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value; + + bool isValidOutput; + + if (ioVariable == IoVariable.UserDefined) + { + int lastIndex = operation.SourcesCount - (isStore ? 2 : 1); + + int location = operation.GetSource(1).Value; + int component = operation.GetSource(lastIndex).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(StorageKind.Output, location, component, out outputOffset); + } + else + { + if (ResourceReservations.IsVectorOrArrayVariable(ioVariable)) + { + int component = operation.GetSource(operation.SourcesCount - (isStore ? 2 : 1)).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(StorageKind.Output, ioVariable, component, out outputOffset); + } + else + { + isValidOutput = resourceManager.Reservations.TryGetOffset(StorageKind.Output, ioVariable, out outputOffset); + } + } + + return isValidOutput; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs index 93a70ace..6a31ea2e 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -77,12 +77,32 @@ namespace Ryujinx.Graphics.Shader.Translation } private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header) + { + TransformFeedbackOutput[] transformFeedbackOutputs = GetTransformFeedbackOutputs(gpuAccessor, out ulong transformFeedbackVecMap); + + return new ShaderDefinitions( + header.Stage, + gpuAccessor.QueryGraphicsState(), + header.Stage == ShaderStage.Geometry && header.GpPassthrough, + header.ThreadsPerInputPrimitive, + header.OutputTopology, + header.MaxOutputVertexCount, + header.ImapTypes, + header.OmapTargets, + header.OmapSampleMask, + header.OmapDepth, + gpuAccessor.QueryHostSupportsScaledVertexFormats(), + transformFeedbackVecMap, + transformFeedbackOutputs); + } + + internal static TransformFeedbackOutput[] GetTransformFeedbackOutputs(IGpuAccessor gpuAccessor, out ulong transformFeedbackVecMap) { bool transformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled() && gpuAccessor.QueryHostSupportsTransformFeedback(); TransformFeedbackOutput[] transformFeedbackOutputs = null; - ulong transformFeedbackVecMap = 0UL; + transformFeedbackVecMap = 0UL; if (transformFeedbackEnabled) { @@ -105,21 +125,7 @@ namespace Ryujinx.Graphics.Shader.Translation } } - return new ShaderDefinitions( - header.Stage, - gpuAccessor.QueryGraphicsState(), - header.Stage == ShaderStage.Geometry && header.GpPassthrough, - header.ThreadsPerInputPrimitive, - header.OutputTopology, - header.MaxOutputVertexCount, - header.ImapTypes, - header.OmapTargets, - header.OmapSampleMask, - header.OmapDepth, - gpuAccessor.QueryHostSupportsScaledVertexFormats(), - transformFeedbackEnabled, - transformFeedbackVecMap, - transformFeedbackOutputs); + return transformFeedbackOutputs; } private static int GetLocalMemorySize(ShaderHeader header) @@ -131,6 +137,7 @@ namespace Ryujinx.Graphics.Shader.Translation TranslatorContext translatorContext, ResourceManager resourceManager, DecodedProgram program, + bool vertexAsCompute, bool initializeOutputs, out int initializationOperations) { @@ -147,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Translation for (int index = 0; index < functions.Length; index++) { - EmitterContext context = new(translatorContext, resourceManager, program, index != 0); + EmitterContext context = new(translatorContext, resourceManager, program, vertexAsCompute, index != 0); if (initializeOutputs && index == 0) { diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index 39ce92c9..f1226ae6 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -8,7 +8,6 @@ using Ryujinx.Graphics.Shader.Translation.Optimizations; using Ryujinx.Graphics.Shader.Translation.Transforms; using System; using System.Collections.Generic; -using System.Linq; using System.Numerics; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; using static Ryujinx.Graphics.Shader.Translation.Translator; @@ -19,14 +18,12 @@ namespace Ryujinx.Graphics.Shader.Translation { private readonly DecodedProgram _program; private readonly int _localMemorySize; + private IoUsage _vertexOutput; public ulong Address { get; } public int Size { get; } public int Cb1DataSize => _program.Cb1DataSize; - internal bool HasLayerInputAttribute { get; private set; } - internal int GpLayerInputAttribute { get; private set; } - internal AttributeUsage AttributeUsage => _program.AttributeUsage; internal ShaderDefinitions Definitions { get; } @@ -37,7 +34,8 @@ namespace Ryujinx.Graphics.Shader.Translation internal TranslationOptions Options { get; } - internal FeatureFlags UsedFeatures { get; private set; } + private bool IsTransformFeedbackEmulated => !GpuAccessor.QueryHostSupportsTransformFeedback() && GpuAccessor.QueryTransformFeedbackEnabled(); + public bool HasStore => _program.UsedFeatures.HasFlag(FeatureFlags.Store) || (IsTransformFeedbackEmulated && Definitions.LastInVertexPipeline); public bool LayerOutputWritten { get; private set; } public int LayerOutputAttribute { get; private set; } @@ -55,10 +53,10 @@ namespace Ryujinx.Graphics.Shader.Translation Size = size; _program = program; _localMemorySize = localMemorySize; + _vertexOutput = new IoUsage(FeatureFlags.None, 0, -1); Definitions = definitions; GpuAccessor = gpuAccessor; Options = options; - UsedFeatures = program.UsedFeatures; } private static bool IsLoadUserDefined(Operation operation) @@ -171,13 +169,6 @@ namespace Ryujinx.Graphics.Shader.Translation LayerOutputAttribute = attr; } - public void SetGeometryShaderLayerInputAttribute(int attr) - { - UsedFeatures |= FeatureFlags.RtLayer; - HasLayerInputAttribute = true; - GpLayerInputAttribute = attr; - } - public void SetLastInVertexPipeline() { Definitions.LastInVertexPipeline = true; @@ -187,7 +178,7 @@ namespace Ryujinx.Graphics.Shader.Translation { AttributeUsage.MergeFromtNextStage( Definitions.GpPassthrough, - nextStage.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr), + nextStage._program.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr), nextStage.AttributeUsage); // We don't consider geometry shaders using the geometry shader passthrough feature @@ -200,9 +191,9 @@ namespace Ryujinx.Graphics.Shader.Translation } } - public ShaderProgram Translate() + public ShaderProgram Translate(bool asCompute = false) { - ResourceManager resourceManager = CreateResourceManager(); + ResourceManager resourceManager = CreateResourceManager(asCompute); bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); @@ -215,36 +206,42 @@ namespace Ryujinx.Graphics.Shader.Translation resourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory); } - FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: true, out _); + FunctionCode[] code = EmitShader(this, resourceManager, _program, asCompute, initializeOutputs: true, out _); - return Translate(code, resourceManager, UsedFeatures, _program.ClipDistancesWritten); + return Translate(code, resourceManager, _program.UsedFeatures, _program.ClipDistancesWritten, asCompute); } - public ShaderProgram Translate(TranslatorContext other) + public ShaderProgram Translate(TranslatorContext other, bool asCompute = false) { - ResourceManager resourceManager = CreateResourceManager(); + ResourceManager resourceManager = CreateResourceManager(asCompute); bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory); - FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: false, out _); + FunctionCode[] code = EmitShader(this, resourceManager, _program, asCompute, initializeOutputs: false, out _); bool otherUsesLocalMemory = other._program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); resourceManager.SetCurrentLocalMemory(other._localMemorySize, otherUsesLocalMemory); - FunctionCode[] otherCode = EmitShader(other, resourceManager, other._program, initializeOutputs: true, out int aStart); + FunctionCode[] otherCode = EmitShader(other, resourceManager, other._program, asCompute, initializeOutputs: true, out int aStart); code = Combine(otherCode, code, aStart); return Translate( code, resourceManager, - UsedFeatures | other.UsedFeatures, - (byte)(_program.ClipDistancesWritten | other._program.ClipDistancesWritten)); + _program.UsedFeatures | other._program.UsedFeatures, + (byte)(_program.ClipDistancesWritten | other._program.ClipDistancesWritten), + asCompute); } - private ShaderProgram Translate(FunctionCode[] functions, ResourceManager resourceManager, FeatureFlags usedFeatures, byte clipDistancesWritten) + private ShaderProgram Translate(FunctionCode[] functions, ResourceManager resourceManager, FeatureFlags usedFeatures, byte clipDistancesWritten, bool asCompute) { + if (asCompute) + { + usedFeatures |= FeatureFlags.VtgAsCompute; + } + var cfgs = new ControlFlowGraph[functions.Length]; var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length]; @@ -294,6 +291,7 @@ namespace Ryujinx.Graphics.Shader.Translation TransformContext context = new( hfm, cfg.Blocks, + Definitions, resourceManager, GpuAccessor, Options.TargetLanguage, @@ -307,28 +305,24 @@ namespace Ryujinx.Graphics.Shader.Translation funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount); } - var identification = ShaderIdentifier.Identify(funcs, GpuAccessor, Definitions.Stage, Definitions.InputTopology, out int layerInputAttr); - return Generate( funcs, AttributeUsage, + GetDefinitions(asCompute), Definitions, resourceManager, usedFeatures, - clipDistancesWritten, - identification, - layerInputAttr); + clipDistancesWritten); } private ShaderProgram Generate( IReadOnlyList funcs, AttributeUsage attributeUsage, ShaderDefinitions definitions, + ShaderDefinitions originalDefinitions, ResourceManager resourceManager, FeatureFlags usedFeatures, - byte clipDistancesWritten, - ShaderIdentification identification = ShaderIdentification.None, - int layerInputAttr = 0) + byte clipDistancesWritten) { var sInfo = StructuredProgram.MakeStructuredProgram( funcs, @@ -337,20 +331,28 @@ namespace Ryujinx.Graphics.Shader.Translation resourceManager, Options.Flags.HasFlag(TranslationFlags.DebugMode)); + int geometryVerticesPerPrimitive = Definitions.OutputTopology switch + { + OutputTopology.LineStrip => 2, + OutputTopology.TriangleStrip => 3, + _ => 1 + }; + var info = new ShaderProgramInfo( resourceManager.GetConstantBufferDescriptors(), resourceManager.GetStorageBufferDescriptors(), resourceManager.GetTextureDescriptors(), resourceManager.GetImageDescriptors(), - identification, - layerInputAttr, - definitions.Stage, + originalDefinitions.Stage, + geometryVerticesPerPrimitive, + originalDefinitions.MaxOutputVertices, + originalDefinitions.ThreadsPerInputPrimitive, usedFeatures.HasFlag(FeatureFlags.FragCoordXY), usedFeatures.HasFlag(FeatureFlags.InstanceId), usedFeatures.HasFlag(FeatureFlags.DrawParameters), usedFeatures.HasFlag(FeatureFlags.RtLayer), clipDistancesWritten, - definitions.OmapTargets); + originalDefinitions.OmapTargets); var hostCapabilities = new HostCapabilities( GpuAccessor.QueryHostReducedPrecision(), @@ -372,37 +374,203 @@ namespace Ryujinx.Graphics.Shader.Translation }; } - private ResourceManager CreateResourceManager() + private ResourceManager CreateResourceManager(bool vertexAsCompute) { - ResourceManager resourceManager = new(Definitions.Stage, GpuAccessor); + ResourceManager resourceManager = new(Definitions.Stage, GpuAccessor, GetResourceReservations()); - if (!GpuAccessor.QueryHostSupportsTransformFeedback() && GpuAccessor.QueryTransformFeedbackEnabled()) + if (IsTransformFeedbackEmulated) { - StructureType tfeInfoStruct = new(new StructureField[] - { - new StructureField(AggregateType.Array | AggregateType.U32, "base_offset", 4), - new StructureField(AggregateType.U32, "vertex_count") - }); - - BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct); - resourceManager.Properties.AddOrUpdateStorageBuffer(tfeInfoBuffer); - StructureType tfeDataStruct = new(new StructureField[] { new StructureField(AggregateType.Array | AggregateType.U32, "data", 0) }); - for (int i = 0; i < Constants.TfeBuffersCount; i++) + for (int i = 0; i < ResourceReservations.TfeBuffersCount; i++) { - int binding = Constants.TfeBufferBaseBinding + i; + int binding = resourceManager.Reservations.GetTfeBufferStorageBufferBinding(i); BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct); resourceManager.Properties.AddOrUpdateStorageBuffer(tfeDataBuffer); } } + if (vertexAsCompute) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + BufferDefinition vertexInfoBuffer = new(BufferLayout.Std140, 0, vertexInfoCbBinding, "vb_info", VertexInfoBuffer.GetStructureType()); + resourceManager.Properties.AddOrUpdateConstantBuffer(vertexInfoBuffer); + + StructureType vertexOutputStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.FP32, "data", 0) + }); + + int vertexOutputSbBinding = resourceManager.Reservations.VertexOutputStorageBufferBinding; + BufferDefinition vertexOutputBuffer = new(BufferLayout.Std430, 1, vertexOutputSbBinding, "vertex_output", vertexOutputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(vertexOutputBuffer); + + if (Stage == ShaderStage.Vertex) + { + int ibBinding = resourceManager.Reservations.IndexBufferTextureBinding; + TextureDefinition indexBuffer = new(2, ibBinding, "ib_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None); + resourceManager.Properties.AddOrUpdateTexture(indexBuffer); + + int inputMap = _program.AttributeUsage.UsedInputAttributes; + + while (inputMap != 0) + { + int location = BitOperations.TrailingZeroCount(inputMap); + int binding = resourceManager.Reservations.GetVertexBufferTextureBinding(location); + TextureDefinition vaBuffer = new(2, binding, $"vb_data{location}", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None); + resourceManager.Properties.AddOrUpdateTexture(vaBuffer); + + inputMap &= ~(1 << location); + } + } + else if (Stage == ShaderStage.Geometry) + { + int trbBinding = resourceManager.Reservations.TopologyRemapBufferTextureBinding; + TextureDefinition remapBuffer = new(2, trbBinding, "trb_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None); + resourceManager.Properties.AddOrUpdateTexture(remapBuffer); + + int geometryVbOutputSbBinding = resourceManager.Reservations.GeometryVertexOutputStorageBufferBinding; + BufferDefinition geometryVbOutputBuffer = new(BufferLayout.Std430, 1, geometryVbOutputSbBinding, "geometry_vb_output", vertexOutputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(geometryVbOutputBuffer); + + StructureType geometryIbOutputStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.U32, "data", 0) + }); + + int geometryIbOutputSbBinding = resourceManager.Reservations.GeometryIndexOutputStorageBufferBinding; + BufferDefinition geometryIbOutputBuffer = new(BufferLayout.Std430, 1, geometryIbOutputSbBinding, "geometry_ib_output", geometryIbOutputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(geometryIbOutputBuffer); + } + + resourceManager.SetVertexAsComputeLocalMemories(Definitions.Stage, Definitions.InputTopology); + } + return resourceManager; } + private ShaderDefinitions GetDefinitions(bool vertexAsCompute) + { + if (vertexAsCompute) + { + return new ShaderDefinitions(ShaderStage.Compute, 32, 32, 1); + } + else + { + return Definitions; + } + } + + public ResourceReservations GetResourceReservations() + { + IoUsage ioUsage = _program.GetIoUsage(); + + if (Definitions.GpPassthrough) + { + ioUsage = ioUsage.Combine(_vertexOutput); + } + + return new ResourceReservations(GpuAccessor, IsTransformFeedbackEmulated, vertexAsCompute: true, _vertexOutput, ioUsage); + } + + public void SetVertexOutputMapForGeometryAsCompute(TranslatorContext vertexContext) + { + _vertexOutput = vertexContext._program.GetIoUsage(); + } + + public ShaderProgram GenerateVertexPassthroughForCompute() + { + var attributeUsage = new AttributeUsage(GpuAccessor); + var resourceManager = new ResourceManager(ShaderStage.Vertex, GpuAccessor); + + var reservations = GetResourceReservations(); + + int vertexInfoCbBinding = reservations.VertexInfoConstantBufferBinding; + + if (Stage == ShaderStage.Vertex) + { + BufferDefinition vertexInfoBuffer = new(BufferLayout.Std140, 0, vertexInfoCbBinding, "vb_info", VertexInfoBuffer.GetStructureType()); + resourceManager.Properties.AddOrUpdateConstantBuffer(vertexInfoBuffer); + } + + StructureType vertexInputStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.FP32, "data", 0) + }); + + int vertexDataSbBinding = reservations.VertexOutputStorageBufferBinding; + BufferDefinition vertexOutputBuffer = new(BufferLayout.Std430, 1, vertexDataSbBinding, "vb_input", vertexInputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(vertexOutputBuffer); + + var context = new EmitterContext(); + + Operand vertexIndex = Options.TargetApi == TargetApi.OpenGL + ? context.Load(StorageKind.Input, IoVariable.VertexId) + : context.Load(StorageKind.Input, IoVariable.VertexIndex); + + if (Stage == ShaderStage.Vertex) + { + Operand vertexCount = context.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(0)); + + // Base instance will be always zero when this shader is used, so which one we use here doesn't really matter. + Operand instanceId = Options.TargetApi == TargetApi.OpenGL + ? context.Load(StorageKind.Input, IoVariable.InstanceId) + : context.Load(StorageKind.Input, IoVariable.InstanceIndex); + + vertexIndex = context.IAdd(context.IMultiply(instanceId, vertexCount), vertexIndex); + } + + Operand baseOffset = context.IMultiply(vertexIndex, Const(reservations.OutputSizePerInvocation)); + + foreach ((IoDefinition ioDefinition, int inputOffset) in reservations.Offsets) + { + if (ioDefinition.StorageKind != StorageKind.Output) + { + continue; + } + + Operand vertexOffset = inputOffset != 0 ? context.IAdd(baseOffset, Const(inputOffset)) : baseOffset; + Operand value = context.Load(StorageKind.StorageBuffer, vertexDataSbBinding, Const(0), vertexOffset); + + if (ioDefinition.IoVariable == IoVariable.UserDefined) + { + context.Store(StorageKind.Output, ioDefinition.IoVariable, null, Const(ioDefinition.Location), Const(ioDefinition.Component), value); + attributeUsage.SetOutputUserAttribute(ioDefinition.Location); + } + else if (ResourceReservations.IsVectorOrArrayVariable(ioDefinition.IoVariable)) + { + context.Store(StorageKind.Output, ioDefinition.IoVariable, null, Const(ioDefinition.Component), value); + } + else + { + context.Store(StorageKind.Output, ioDefinition.IoVariable, null, value); + } + } + + var operations = context.GetOperations(); + var cfg = ControlFlowGraph.Create(operations); + var function = new Function(cfg.Blocks, "main", false, 0, 0); + + var transformFeedbackOutputs = GetTransformFeedbackOutputs(GpuAccessor, out ulong transformFeedbackVecMap); + + var definitions = new ShaderDefinitions(ShaderStage.Vertex, transformFeedbackVecMap, transformFeedbackOutputs) + { + LastInVertexPipeline = true + }; + + return Generate( + new[] { function }, + attributeUsage, + definitions, + definitions, + resourceManager, + FeatureFlags.None, + 0); + } + public ShaderProgram GenerateGeometryPassthrough() { int outputAttributesMask = AttributeUsage.UsedOutputAttributes; @@ -484,7 +652,14 @@ namespace Ryujinx.Graphics.Shader.Translation outputTopology, maxOutputVertices); - return Generate(new[] { function }, attributeUsage, definitions, resourceManager, FeatureFlags.RtLayer, 0); + return Generate( + new[] { function }, + attributeUsage, + definitions, + definitions, + resourceManager, + FeatureFlags.RtLayer, + 0); } } } diff --git a/src/Ryujinx.Graphics.Shader/VertexInfoBuffer.cs b/src/Ryujinx.Graphics.Shader/VertexInfoBuffer.cs new file mode 100644 index 00000000..845135f8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/VertexInfoBuffer.cs @@ -0,0 +1,59 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Shader +{ + enum VertexInfoBufferField + { + // Must match the order of the fields on the struct. + VertexCounts, + GeometryCounts, + VertexStrides, + VertexOffsets, + } + + public struct VertexInfoBuffer + { + public static readonly int RequiredSize; + + public static readonly int VertexCountsOffset; + public static readonly int GeometryCountsOffset; + public static readonly int VertexStridesOffset; + public static readonly int VertexOffsetsOffset; + + private static int OffsetOf(ref VertexInfoBuffer storage, ref T target) + { + return (int)Unsafe.ByteOffset(ref Unsafe.As(ref storage), ref target); + } + + static VertexInfoBuffer() + { + RequiredSize = Unsafe.SizeOf(); + + VertexInfoBuffer instance = new(); + + VertexCountsOffset = OffsetOf(ref instance, ref instance.VertexCounts); + GeometryCountsOffset = OffsetOf(ref instance, ref instance.GeometryCounts); + VertexStridesOffset = OffsetOf(ref instance, ref instance.VertexStrides); + VertexOffsetsOffset = OffsetOf(ref instance, ref instance.VertexOffsets); + } + + internal static StructureType GetStructureType() + { + return new StructureType(new[] + { + new StructureField(AggregateType.Vector4 | AggregateType.U32, "vertex_counts"), + new StructureField(AggregateType.Vector4 | AggregateType.U32, "geometry_counts"), + new StructureField(AggregateType.Array | AggregateType.Vector4 | AggregateType.U32, "vertex_strides", ResourceReservations.MaxVertexBufferTextures), + new StructureField(AggregateType.Array | AggregateType.Vector4 | AggregateType.U32, "vertex_offsets", ResourceReservations.MaxVertexBufferTextures), + }); + } + + public Vector4 VertexCounts; + public Vector4 GeometryCounts; + public Array32> VertexStrides; + public Array32> VertexOffsets; + } +} diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 67551229..7b6b89a7 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -605,6 +605,7 @@ namespace Ryujinx.Graphics.Vulkan supportsShaderBarrierDivergence: Vendor != Vendor.Intel, supportsShaderFloat64: Capabilities.SupportsShaderFloat64, supportsTextureShadowLod: false, + supportsVertexStoreAndAtomics: features2.Features.VertexPipelineStoresAndAtomics, supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex, supportsViewportMask: Capabilities.SupportsViewportArray2, supportsViewportSwizzle: false, @@ -618,6 +619,7 @@ namespace Ryujinx.Graphics.Vulkan maximumSupportedAnisotropy: (int)limits.MaxSamplerAnisotropy, shaderSubgroupSize: (int)Capabilities.SubgroupSize, storageBufferOffsetAlignment: (int)limits.MinStorageBufferOffsetAlignment, + textureBufferOffsetAlignment: (int)limits.MinTexelBufferOffsetAlignment, gatherBiasPrecision: IsIntelWindows || IsAmdWindows ? (int)Capabilities.SubTexelPrecisionBits : 0); } diff --git a/src/Ryujinx.ShaderTools/Program.cs b/src/Ryujinx.ShaderTools/Program.cs index 55ff12be..4211ab49 100644 --- a/src/Ryujinx.ShaderTools/Program.cs +++ b/src/Ryujinx.ShaderTools/Program.cs @@ -29,6 +29,12 @@ namespace Ryujinx.ShaderTools [Option("compute", Required = false, Default = false, HelpText = "Indicate that the shader is a compute shader.")] public bool Compute { get; set; } + [Option("vertex-as-compute", Required = false, Default = false, HelpText = "Indicate that the shader is a vertex shader and should be converted to compute.")] + public bool VertexAsCompute { get; set; } + + [Option("vertex-passthrough", Required = false, Default = false, HelpText = "Indicate that the shader is a vertex passthrough shader for compute output.")] + public bool VertexPassthrough { get; set; } + [Option("target-language", Required = false, Default = TargetLanguage.Glsl, HelpText = "Indicate the target shader language to use.")] public TargetLanguage TargetLanguage { get; set; } @@ -54,8 +60,18 @@ namespace Ryujinx.ShaderTools byte[] data = File.ReadAllBytes(options.InputPath); TranslationOptions translationOptions = new(options.TargetLanguage, options.TargetApi, flags); + TranslatorContext translatorContext = Translator.CreateContext(0, new GpuAccessor(data), translationOptions); - ShaderProgram program = Translator.CreateContext(0, new GpuAccessor(data), translationOptions).Translate(); + ShaderProgram program; + + if (options.VertexPassthrough) + { + program = translatorContext.GenerateVertexPassthroughForCompute(); + } + else + { + program = translatorContext.Translate(options.VertexAsCompute); + } if (options.OutputPath == null) {