diff --git a/Ryujinx.Graphics.Gpu/Engine/Compute.cs b/Ryujinx.Graphics.Gpu/Engine/Compute.cs index d24d2d8d..9178cfb0 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Compute.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Compute.cs @@ -17,29 +17,31 @@ namespace Ryujinx.Graphics.Gpu.Engine /// Method call argument public void Dispatch(GpuState state, int argument) { - uint dispatchParamsAddress = (uint)state.Get(MethodOffset.DispatchParamsAddress); + uint qmdAddress = (uint)state.Get(MethodOffset.DispatchParamsAddress); - var dispatchParams = _context.MemoryAccessor.Read((ulong)dispatchParamsAddress << 8); + var qmd = _context.MemoryAccessor.Read((ulong)qmdAddress << 8); GpuVa shaderBaseAddress = state.Get(MethodOffset.ShaderBaseAddress); - ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)dispatchParams.ShaderOffset; + ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)qmd.ProgramOffset; - // Note: A size of 0 is also invalid, the size must be at least 1. - int sharedMemorySize = Math.Clamp(dispatchParams.SharedMemorySize & 0xffff, 1, _context.Capabilities.MaximumComputeSharedMemorySize); + int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize; + + int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize); ComputeShader cs = ShaderCache.GetComputeShader( shaderGpuVa, - sharedMemorySize, - dispatchParams.UnpackBlockSizeX(), - dispatchParams.UnpackBlockSizeY(), - dispatchParams.UnpackBlockSizeZ()); + qmd.CtaThreadDimension0, + qmd.CtaThreadDimension1, + qmd.CtaThreadDimension2, + localMemorySize, + sharedMemorySize); _context.Renderer.Pipeline.SetProgram(cs.HostProgram); var samplerPool = state.Get(MethodOffset.SamplerPoolState); - TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, dispatchParams.SamplerIndex); + TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, qmd.SamplerIndex); var texturePool = state.Get(MethodOffset.TexturePoolState); @@ -50,17 +52,19 @@ namespace Ryujinx.Graphics.Gpu.Engine ShaderProgramInfo info = cs.Shader.Program.Info; uint sbEnableMask = 0; - uint ubEnableMask = dispatchParams.UnpackUniformBuffersEnableMask(); + uint ubEnableMask = 0; - for (int index = 0; index < dispatchParams.UniformBuffers.Length; index++) + for (int index = 0; index < Constants.TotalCpUniformBuffers; index++) { - if ((ubEnableMask & (1 << index)) == 0) + if (!qmd.ConstantBufferValid(index)) { continue; } - ulong gpuVa = dispatchParams.UniformBuffers[index].PackAddress(); - ulong size = dispatchParams.UniformBuffers[index].UnpackSize(); + ubEnableMask |= 1u << index; + + ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32; + ulong size = (ulong)qmd.ConstantBufferSize(index); BufferManager.SetComputeUniformBuffer(index, gpuVa, size); } @@ -131,9 +135,9 @@ namespace Ryujinx.Graphics.Gpu.Engine TextureManager.CommitComputeBindings(); _context.Renderer.Pipeline.DispatchCompute( - dispatchParams.UnpackGridSizeX(), - dispatchParams.UnpackGridSizeY(), - dispatchParams.UnpackGridSizeZ()); + qmd.CtaRasterWidth, + qmd.CtaRasterHeight, + qmd.CtaRasterDepth); UpdateShaderState(state); } diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs b/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs deleted file mode 100644 index c19b43d8..00000000 --- a/Ryujinx.Graphics.Gpu/Engine/ComputeParams.cs +++ /dev/null @@ -1,173 +0,0 @@ -using Ryujinx.Graphics.Gpu.State; -using System; -using System.Runtime.InteropServices; - -namespace Ryujinx.Graphics.Gpu.Engine -{ - /// - /// Compute uniform buffer parameters. - /// - struct UniformBufferParams - { - public int AddressLow; - public int AddressHighAndSize; - - /// - /// Packs the split address to a 64-bits integer. - /// - /// Uniform buffer GPU virtual address - public ulong PackAddress() - { - return (uint)AddressLow | ((ulong)(AddressHighAndSize & 0xff) << 32); - } - - /// - /// Unpacks the uniform buffer size in bytes. - /// - /// Uniform buffer size in bytes - public ulong UnpackSize() - { - return (ulong)((AddressHighAndSize >> 15) & 0x1ffff); - } - } - - /// - /// Compute dispatch parameters. - /// - struct ComputeParams - { - public int Unknown0; - public int Unknown1; - public int Unknown2; - public int Unknown3; - public int Unknown4; - public int Unknown5; - public int Unknown6; - public int Unknown7; - public int ShaderOffset; - public int Unknown9; - public int Unknown10; - public SamplerIndex SamplerIndex; - public int GridSizeX; - public int GridSizeYZ; - public int Unknown14; - public int Unknown15; - public int Unknown16; - public int SharedMemorySize; - public int BlockSizeX; - public int BlockSizeYZ; - public int UniformBuffersConfig; - public int Unknown21; - public int Unknown22; - public int Unknown23; - public int Unknown24; - public int Unknown25; - public int Unknown26; - public int Unknown27; - public int Unknown28; - - private UniformBufferParams _uniformBuffer0; - private UniformBufferParams _uniformBuffer1; - private UniformBufferParams _uniformBuffer2; - private UniformBufferParams _uniformBuffer3; - private UniformBufferParams _uniformBuffer4; - private UniformBufferParams _uniformBuffer5; - private UniformBufferParams _uniformBuffer6; - private UniformBufferParams _uniformBuffer7; - - /// - /// Uniform buffer parameters. - /// - public Span UniformBuffers - { - get - { - return MemoryMarshal.CreateSpan(ref _uniformBuffer0, 8); - } - } - - public int Unknown45; - public int Unknown46; - public int Unknown47; - public int Unknown48; - public int Unknown49; - public int Unknown50; - public int Unknown51; - public int Unknown52; - public int Unknown53; - public int Unknown54; - public int Unknown55; - public int Unknown56; - public int Unknown57; - public int Unknown58; - public int Unknown59; - public int Unknown60; - public int Unknown61; - public int Unknown62; - public int Unknown63; - - /// - /// Unpacks the work group X size. - /// - /// Work group X size - public int UnpackGridSizeX() - { - return GridSizeX & 0x7fffffff; - } - - /// - /// Unpacks the work group Y size. - /// - /// Work group Y size - public int UnpackGridSizeY() - { - return GridSizeYZ & 0xffff; - } - - /// - /// Unpacks the work group Z size. - /// - /// Work group Z size - public int UnpackGridSizeZ() - { - return (GridSizeYZ >> 16) & 0xffff; - } - - /// - /// Unpacks the local group X size. - /// - /// Local group X size - public int UnpackBlockSizeX() - { - return (BlockSizeX >> 16) & 0xffff; - } - - /// - /// Unpacks the local group Y size. - /// - /// Local group Y size - public int UnpackBlockSizeY() - { - return BlockSizeYZ & 0xffff; - } - - /// - /// Unpacks the local group Z size. - /// - /// Local group Z size - public int UnpackBlockSizeZ() - { - return (BlockSizeYZ >> 16) & 0xffff; - } - - /// - /// Unpacks the uniform buffers enable mask. - /// Each bit set on the mask indicates that the respective buffer index is enabled. - /// - /// Uniform buffers enable mask - public uint UnpackUniformBuffersEnableMask() - { - return (uint)UniformBuffersConfig & 0xff; - } - } -} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs b/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs new file mode 100644 index 00000000..35418c2d --- /dev/null +++ b/Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs @@ -0,0 +1,275 @@ +using Ryujinx.Graphics.Gpu.State; +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine +{ + /// + /// Type of the dependent Queue Meta Data. + /// + enum DependentQmdType + { + Queue, + Grid + } + + /// + /// Type of the release memory barrier. + /// + enum ReleaseMembarType + { + FeNone, + FeSysmembar + } + + /// + /// Type of the CWD memory barrier. + /// + enum CwdMembarType + { + L1None, + L1Sysmembar, + L1Membar + } + + /// + /// NaN behavior of 32-bits float operations on the shader. + /// + enum Fp32NanBehavior + { + Legacy, + Fp64Compatible + } + + /// + /// NaN behavior of 32-bits float to integer conversion on the shader. + /// + enum Fp32F2iNanBehavior + { + PassZero, + PassIndefinite + } + + /// + /// Limit of calls. + /// + enum ApiVisibleCallLimit + { + _32, + NoCheck + } + + /// + /// Shared memory bank mapping mode. + /// + enum SharedMemoryBankMapping + { + FourBytesPerBank, + EightBytesPerBank + } + + /// + /// Denormal behavior of 32-bits float narrowing instructions. + /// + enum Fp32NarrowInstruction + { + KeepDenorms, + FlushDenorms + } + + /// + /// Configuration of the L1 cache. + /// + enum L1Configuration + { + DirectlyAddressableMemorySize16kb, + DirectlyAddressableMemorySize32kb, + DirectlyAddressableMemorySize48kb + } + + /// + /// Reduction operation. + /// + enum ReductionOp + { + RedAdd, + RedMin, + RedMax, + RedInc, + RedDec, + RedAnd, + RedOr, + RedXor + } + + /// + /// Reduction format. + /// + enum ReductionFormat + { + Unsigned32, + Signed32 + } + + /// + /// Size of a structure in words. + /// + enum StructureSize + { + FourWords, + OneWord + } + + /// + /// Compute Queue Meta Data. + /// + unsafe struct ComputeQmd + { + private fixed int _words[64]; + + public int OuterPut => BitRange(30, 0); + public bool OuterOverflow => Bit(31); + public int OuterGet => BitRange(62, 32); + public bool OuterStickyOverflow => Bit(63); + public int InnerGet => BitRange(94, 64); + public bool InnerOverflow => Bit(95); + public int InnerPut => BitRange(126, 96); + public bool InnerStickyOverflow => Bit(127); + public int QmdReservedAA => BitRange(159, 128); + public int DependentQmdPointer => BitRange(191, 160); + public int QmdGroupId => BitRange(197, 192); + public bool SmGlobalCachingEnable => Bit(198); + public bool RunCtaInOneSmPartition => Bit(199); + public bool IsQueue => Bit(200); + public bool AddToHeadOfQmdGroupLinkedList => Bit(201); + public bool SemaphoreReleaseEnable0 => Bit(202); + public bool SemaphoreReleaseEnable1 => Bit(203); + public bool RequireSchedulingPcas => Bit(204); + public bool DependentQmdScheduleEnable => Bit(205); + public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206); + public bool DependentQmdFieldCopy => Bit(207); + public int QmdReservedB => BitRange(223, 208); + public int CircularQueueSize => BitRange(248, 224); + public bool QmdReservedC => Bit(249); + public bool InvalidateTextureHeaderCache => Bit(250); + public bool InvalidateTextureSamplerCache => Bit(251); + public bool InvalidateTextureDataCache => Bit(252); + public bool InvalidateShaderDataCache => Bit(253); + public bool InvalidateInstructionCache => Bit(254); + public bool InvalidateShaderConstantCache => Bit(255); + public int ProgramOffset => BitRange(287, 256); + public int CircularQueueAddrLower => BitRange(319, 288); + public int CircularQueueAddrUpper => BitRange(327, 320); + public int QmdReservedD => BitRange(335, 328); + public int CircularQueueEntrySize => BitRange(351, 336); + public int CwdReferenceCountId => BitRange(357, 352); + public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358); + public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366); + public bool CwdReferenceCountIncrEnable => Bit(367); + public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368); + public bool SequentiallyRunCtas => Bit(370); + public bool CwdReferenceCountDecrEnable => Bit(371); + public bool Throttled => Bit(372); + public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376); + public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377); + public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378); + public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379); + public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382); + public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383); + public int CtaRasterWidth => BitRange(415, 384); + public int CtaRasterHeight => BitRange(431, 416); + public int CtaRasterDepth => BitRange(447, 432); + public int CtaRasterWidthResume => BitRange(479, 448); + public int CtaRasterHeightResume => BitRange(495, 480); + public int CtaRasterDepthResume => BitRange(511, 496); + public int QueueEntriesPerCtaMinusOne => BitRange(518, 512); + public int CoalesceWaitingPeriod => BitRange(529, 522); + public int SharedMemorySize => BitRange(561, 544); + public int QmdReservedG => BitRange(575, 562); + public int QmdVersion => BitRange(579, 576); + public int QmdMajorVersion => BitRange(583, 580); + public int QmdReservedH => BitRange(591, 584); + public int CtaThreadDimension0 => BitRange(607, 592); + public int CtaThreadDimension1 => BitRange(623, 608); + public int CtaThreadDimension2 => BitRange(639, 624); + public bool ConstantBufferValid(int i) => Bit(640 + i * 1); + public int QmdReservedI => BitRange(668, 648); + public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669); + public int SmDisableMaskLower => BitRange(703, 672); + public int SmDisableMaskUpper => BitRange(735, 704); + public int Release0AddressLower => BitRange(767, 736); + public int Release0AddressUpper => BitRange(775, 768); + public int QmdReservedJ => BitRange(783, 776); + public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788); + public bool QmdReservedK => Bit(791); + public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792); + public bool Release0ReductionEnable => Bit(794); + public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799); + public int Release0Payload => BitRange(831, 800); + public int Release1AddressLower => BitRange(863, 832); + public int Release1AddressUpper => BitRange(871, 864); + public int QmdReservedL => BitRange(879, 872); + public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884); + public bool QmdReservedM => Bit(887); + public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888); + public bool Release1ReductionEnable => Bit(890); + public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895); + public int Release1Payload => BitRange(927, 896); + public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64); + public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64); + public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64); + public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64); + public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64); + public int ShaderLocalMemoryLowSize => BitRange(1463, 1440); + public int QmdReservedN => BitRange(1466, 1464); + public int BarrierCount => BitRange(1471, 1467); + public int ShaderLocalMemoryHighSize => BitRange(1495, 1472); + public int RegisterCount => BitRange(1503, 1496); + public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504); + public int SassVersion => BitRange(1535, 1528); + public int HwOnlyInnerGet => BitRange(1566, 1536); + public bool HwOnlyRequireSchedulingPcas => Bit(1567); + public int HwOnlyInnerPut => BitRange(1598, 1568); + public bool HwOnlyScgType => Bit(1599); + public int HwOnlySpanListHeadIndex => BitRange(1629, 1600); + public bool QmdReservedQ => Bit(1630); + public bool HwOnlySpanListHeadIndexValid => Bit(1631); + public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632); + public int QmdSpareE => BitRange(1695, 1664); + public int QmdSpareF => BitRange(1727, 1696); + public int QmdSpareG => BitRange(1759, 1728); + public int QmdSpareH => BitRange(1791, 1760); + public int QmdSpareI => BitRange(1823, 1792); + public int QmdSpareJ => BitRange(1855, 1824); + public int QmdSpareK => BitRange(1887, 1856); + public int QmdSpareL => BitRange(1919, 1888); + public int QmdSpareM => BitRange(1951, 1920); + public int QmdSpareN => BitRange(1983, 1952); + public int DebugIdUpper => BitRange(2015, 1984); + public int DebugIdLower => BitRange(2047, 2016); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool Bit(int bit) + { + if ((uint)bit >= 64 * 32) + { + throw new ArgumentOutOfRangeException(nameof(bit)); + } + + return (_words[bit >> 5] & (1 << (bit & 31))) != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int BitRange(int upper, int lower) + { + if ((uint)lower >= 64 * 32) + { + throw new ArgumentOutOfRangeException(nameof(lower)); + } + + int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1))); + + return (_words[lower >> 5] >> (lower & 31)) & mask; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj index b9751508..a55c4d1c 100644 --- a/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj +++ b/Ryujinx.Graphics.Gpu/Ryujinx.Graphics.Gpu.csproj @@ -13,4 +13,12 @@ win-x64;osx-x64;linux-x64 + + true + + + + true + + diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs index dad1b0ac..8aa9b1c7 100644 --- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs +++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs @@ -51,12 +51,19 @@ namespace Ryujinx.Graphics.Gpu.Shader /// This automatically translates, compiles and adds the code to the cache if not present. /// /// GPU virtual address of the binary shader code - /// Shared memory size of the compute shader /// Local group size X of the computer shader /// Local group size Y of the computer shader /// Local group size Z of the computer shader + /// Local memory size of the compute shader + /// Shared memory size of the compute shader /// Compiled compute shader code - public ComputeShader GetComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ) + public ComputeShader GetComputeShader( + ulong gpuVa, + int localSizeX, + int localSizeY, + int localSizeZ, + int localMemorySize, + int sharedMemorySize) { bool isCached = _cpPrograms.TryGetValue(gpuVa, out List list); @@ -71,7 +78,13 @@ namespace Ryujinx.Graphics.Gpu.Shader } } - CachedShader shader = TranslateComputeShader(gpuVa, sharedMemorySize, localSizeX, localSizeY, localSizeZ); + CachedShader shader = TranslateComputeShader( + gpuVa, + localSizeX, + localSizeY, + localSizeZ, + localMemorySize, + sharedMemorySize); shader.HostShader = _context.Renderer.CompileShader(shader.Program); @@ -237,12 +250,19 @@ namespace Ryujinx.Graphics.Gpu.Shader /// Translates the binary Maxwell shader code to something that the host API accepts. /// /// GPU virtual address of the binary shader code - /// Shared memory size of the compute shader /// Local group size X of the computer shader /// Local group size Y of the computer shader /// Local group size Z of the computer shader + /// Local memory size of the compute shader + /// Shared memory size of the compute shader /// Compiled compute shader code - private CachedShader TranslateComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ) + private CachedShader TranslateComputeShader( + ulong gpuVa, + int localSizeX, + int localSizeY, + int localSizeZ, + int localMemorySize, + int sharedMemorySize) { if (gpuVa == 0) { @@ -256,6 +276,7 @@ namespace Ryujinx.Graphics.Gpu.Shader QueryInfoName.ComputeLocalSizeX => localSizeX, QueryInfoName.ComputeLocalSizeY => localSizeY, QueryInfoName.ComputeLocalSizeZ => localSizeZ, + QueryInfoName.ComputeLocalMemorySize => localMemorySize, QueryInfoName.ComputeSharedMemorySize => sharedMemorySize, _ => QueryInfoCommon(info) }; diff --git a/Ryujinx.Graphics.OpenGL/Program.cs b/Ryujinx.Graphics.OpenGL/Program.cs index a8ee7ae8..fe14e9a9 100644 --- a/Ryujinx.Graphics.OpenGL/Program.cs +++ b/Ryujinx.Graphics.OpenGL/Program.cs @@ -77,14 +77,7 @@ namespace Ryujinx.Graphics.OpenGL Bind(); - int extraBlockindex = GL.GetUniformBlockIndex(Handle, "Extra"); - - if (extraBlockindex >= 0) - { - GL.UniformBlockBinding(Handle, extraBlockindex, 0); - } - - int ubBindingPoint = 1; + int ubBindingPoint = 0; int sbBindingPoint = 0; int textureUnit = 0; int imageUnit = 0; diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index 200569c4..2e7f9f1b 100644 --- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -47,25 +47,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl context.AppendLine(); } - context.AppendLine("layout (std140) uniform Extra"); - - context.EnterScope(); - - context.AppendLine("vec2 flip;"); - context.AppendLine("int instance;"); - - context.LeaveScope(";"); - - context.AppendLine(); - - context.AppendLine($"uint {DefaultNames.LocalMemoryName}[0x100];"); - context.AppendLine(); - if (context.Config.Stage == ShaderStage.Compute) { - string size = NumberFormatter.FormatInt(BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4)); + int localMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeLocalMemorySize), 4); - context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{size}];"); + if (localMemorySize != 0) + { + string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); + + context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); + context.AppendLine(); + } + + int sharedMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4); + + if (sharedMemorySize != 0) + { + string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize); + + context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];"); + context.AppendLine(); + } + } + else if (context.Config.LocalMemorySize != 0) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); + + string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); + + context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); context.AppendLine(); } diff --git a/Ryujinx.Graphics.Shader/QueryInfoName.cs b/Ryujinx.Graphics.Shader/QueryInfoName.cs index c4f2cb6c..887c0d7d 100644 --- a/Ryujinx.Graphics.Shader/QueryInfoName.cs +++ b/Ryujinx.Graphics.Shader/QueryInfoName.cs @@ -5,6 +5,7 @@ namespace Ryujinx.Graphics.Shader ComputeLocalSizeX, ComputeLocalSizeY, ComputeLocalSizeZ, + ComputeLocalMemorySize, ComputeSharedMemorySize, IsTextureBuffer, IsTextureRectangle, diff --git a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 8a0f25fe..e3708b41 100644 --- a/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -10,6 +10,8 @@ namespace Ryujinx.Graphics.Shader.Translation public int MaxOutputVertices { get; } + public int LocalMemorySize { get; } + public OutputMapTarget[] OmapTargets { get; } public bool OmapSampleMask { get; } public bool OmapDepth { get; } @@ -23,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation Stage = ShaderStage.Compute; OutputTopology = OutputTopology.PointList; MaxOutputVertices = 0; + LocalMemorySize = 0; OmapTargets = null; OmapSampleMask = false; OmapDepth = false; @@ -35,6 +38,7 @@ namespace Ryujinx.Graphics.Shader.Translation Stage = header.Stage; OutputTopology = header.OutputTopology; MaxOutputVertices = header.MaxOutputVertexCount; + LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize; OmapTargets = header.OmapTargets; OmapSampleMask = header.OmapSampleMask; OmapDepth = header.OmapDepth; @@ -80,6 +84,8 @@ namespace Ryujinx.Graphics.Shader.Translation case QueryInfoName.ComputeLocalSizeY: case QueryInfoName.ComputeLocalSizeZ: return 1; + case QueryInfoName.ComputeLocalMemorySize: + return 0x1000; case QueryInfoName.ComputeSharedMemorySize: return 0xc000; case QueryInfoName.IsTextureBuffer: