0
0
Fork 0
mirror of https://github.com/GreemDev/Ryujinx.git synced 2025-01-08 23:32:00 +00:00

GPU: Eliminate CB0 accesses when storage buffer accesses are resolved (#3847)

* Eliminate CB0 accesses

Still some work to do, decouple from hle?

* Forgot the important part somehow

* Fix and improve alignment test

* Address Feedback

* Remove some complexity when checking storage buffer alignment

* Update Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs

Co-authored-by: gdkchan <gab.dark.100@gmail.com>

Co-authored-by: gdkchan <gab.dark.100@gmail.com>
This commit is contained in:
riperiperi 2022-11-17 17:47:41 +00:00 committed by GitHub
parent 391e08dd27
commit 33a4d7d1ba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 317 additions and 68 deletions

View file

@ -95,5 +95,10 @@ namespace Ryujinx.Graphics.Gpu
/// Byte alignment for block linear textures /// Byte alignment for block linear textures
/// </summary> /// </summary>
public const int GobAlignment = 64; public const int GobAlignment = 64;
/// <summary>
/// Expected byte alignment for storage buffers
/// </summary>
public const int StorageAlignment = 16;
} }
} }

View file

@ -138,7 +138,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
qmd.CtaThreadDimension1, qmd.CtaThreadDimension1,
qmd.CtaThreadDimension2, qmd.CtaThreadDimension2,
localMemorySize, localMemorySize,
sharedMemorySize); sharedMemorySize,
_channel.BufferManager.HasUnalignedStorageBuffers);
CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
@ -150,6 +151,33 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
ShaderProgramInfo info = cs.Shaders[0].Info; ShaderProgramInfo info = cs.Shaders[0].Info;
bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers;
for (int index = 0; index < info.SBuffers.Count; index++)
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
int sbDescOffset = 0x310 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
}
if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned)
{
// Refetch the shader, as assumptions about storage buffer alignment have changed.
cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa);
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
info = cs.Shaders[0].Info;
}
for (int index = 0; index < info.CBuffers.Count; index++) for (int index = 0; index < info.CBuffers.Count; index++)
{ {
BufferDescriptor cb = info.CBuffers[index]; BufferDescriptor cb = info.CBuffers[index];
@ -174,21 +202,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute
_channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size); _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size);
} }
for (int index = 0; index < info.SBuffers.Count; index++)
{
BufferDescriptor sb = info.SBuffers[index];
ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0);
int sbDescOffset = 0x310 + sb.Slot * 0x10;
sbDescAddress += (ulong)sbDescOffset;
SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress);
_channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags);
}
_channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers); _channel.BufferManager.SetComputeStorageBufferBindings(info.SBuffers);
_channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers); _channel.BufferManager.SetComputeUniformBufferBindings(info.CBuffers);

View file

@ -293,9 +293,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary> /// </summary>
private void CommitBindings() private void CommitBindings()
{ {
var buffers = _channel.BufferManager;
var hasUnaligned = buffers.HasUnalignedStorageBuffers;
UpdateStorageBuffers(); UpdateStorageBuffers();
if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState)) if (!_channel.TextureManager.CommitGraphicsBindings(_shaderSpecState) || (buffers.HasUnalignedStorageBuffers != hasUnaligned))
{ {
// Shader must be reloaded. // Shader must be reloaded.
UpdateShaderState(); UpdateShaderState();
@ -1361,7 +1364,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_state.State.AlphaTestFunc, _state.State.AlphaTestFunc,
_state.State.AlphaTestRef, _state.State.AlphaTestRef,
ref attributeTypes, ref attributeTypes,
_drawState.HasConstantBufferDrawParameters); _drawState.HasConstantBufferDrawParameters,
_channel.BufferManager.HasUnalignedStorageBuffers);
} }
/// <summary> /// <summary>

View file

@ -17,6 +17,9 @@ namespace Ryujinx.Graphics.Gpu.Memory
private readonly GpuContext _context; private readonly GpuContext _context;
private readonly GpuChannel _channel; private readonly GpuChannel _channel;
private int _unalignedStorageBuffers;
public bool HasUnalignedStorageBuffers => _unalignedStorageBuffers > 0;
private IndexBuffer _indexBuffer; private IndexBuffer _indexBuffer;
private readonly VertexBuffer[] _vertexBuffers; private readonly VertexBuffer[] _vertexBuffers;
private readonly BufferBounds[] _transformFeedbackBuffers; private readonly BufferBounds[] _transformFeedbackBuffers;
@ -38,6 +41,11 @@ namespace Ryujinx.Graphics.Gpu.Memory
/// </summary> /// </summary>
public BufferBounds[] Buffers { get; } public BufferBounds[] Buffers { get; }
/// <summary>
/// Flag indicating if this binding is unaligned.
/// </summary>
public bool[] Unaligned { get; }
/// <summary> /// <summary>
/// Total amount of buffers used on the shader. /// Total amount of buffers used on the shader.
/// </summary> /// </summary>
@ -51,6 +59,7 @@ namespace Ryujinx.Graphics.Gpu.Memory
{ {
Bindings = new BufferDescriptor[count]; Bindings = new BufferDescriptor[count];
Buffers = new BufferBounds[count]; Buffers = new BufferBounds[count];
Unaligned = new bool[count];
} }
/// <summary> /// <summary>
@ -202,6 +211,31 @@ namespace Ryujinx.Graphics.Gpu.Memory
_transformFeedbackBuffersDirty = true; _transformFeedbackBuffersDirty = true;
} }
/// <summary>
/// Records the alignment of a storage buffer.
/// Unaligned storage buffers disable some optimizations on the shader.
/// </summary>
/// <param name="buffers">The binding list to modify</param>
/// <param name="index">Index of the storage buffer</param>
/// <param name="gpuVa">Start GPU virtual address of the buffer</param>
private void RecordStorageAlignment(BuffersPerStage buffers, int index, ulong gpuVa)
{
bool unaligned = (gpuVa & (Constants.StorageAlignment - 1)) != 0;
if (unaligned || HasUnalignedStorageBuffers)
{
// Check if the alignment changed for this binding.
ref bool currentUnaligned = ref buffers.Unaligned[index];
if (currentUnaligned != unaligned)
{
currentUnaligned = unaligned;
_unalignedStorageBuffers += unaligned ? 1 : -1;
}
}
}
/// <summary> /// <summary>
/// Sets a storage buffer on the compute pipeline. /// Sets a storage buffer on the compute pipeline.
/// Storage buffers can be read and written to on shaders. /// Storage buffers can be read and written to on shaders.
@ -214,6 +248,8 @@ namespace Ryujinx.Graphics.Gpu.Memory
{ {
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
RecordStorageAlignment(_cpStorageBuffers, index, gpuVa);
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment); gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
@ -234,17 +270,21 @@ namespace Ryujinx.Graphics.Gpu.Memory
{ {
size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1); size += gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1);
BuffersPerStage buffers = _gpStorageBuffers[stage];
RecordStorageAlignment(buffers, index, gpuVa);
gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment); gpuVa = BitUtils.AlignDown(gpuVa, _context.Capabilities.StorageBufferOffsetAlignment);
ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); ulong address = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size);
if (_gpStorageBuffers[stage].Buffers[index].Address != address || if (buffers.Buffers[index].Address != address ||
_gpStorageBuffers[stage].Buffers[index].Size != size) buffers.Buffers[index].Size != size)
{ {
_gpStorageBuffersDirty = true; _gpStorageBuffersDirty = true;
} }
_gpStorageBuffers[stage].SetBounds(index, address, size, flags); buffers.SetBounds(index, address, size, flags);
} }
/// <summary> /// <summary>

View file

@ -36,6 +36,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary> /// </summary>
/// <param name="channel">GPU channel</param> /// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param> /// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="gpuVa">GPU virtual address of the compute shader</param> /// <param name="gpuVa">GPU virtual address of the compute shader</param>
/// <param name="program">Cached host program for the given state, if found</param> /// <param name="program">Cached host program for the given state, if found</param>
/// <param name="cachedGuestCode">Cached guest code, if any found</param> /// <param name="cachedGuestCode">Cached guest code, if any found</param>
@ -43,6 +44,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
public bool TryFind( public bool TryFind(
GpuChannel channel, GpuChannel channel,
GpuChannelPoolState poolState, GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
ulong gpuVa, ulong gpuVa,
out CachedShaderProgram program, out CachedShaderProgram program,
out byte[] cachedGuestCode) out byte[] cachedGuestCode)
@ -50,7 +52,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
program = null; program = null;
ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa); ShaderCodeAccessor codeAccessor = new ShaderCodeAccessor(channel.MemoryManager, gpuVa);
bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode); bool hasSpecList = _cache.TryFindItem(codeAccessor, out var specList, out cachedGuestCode);
return hasSpecList && specList.TryFindForCompute(channel, poolState, out program); return hasSpecList && specList.TryFindForCompute(channel, poolState, computeState, out program);
} }
/// <summary> /// <summary>

View file

@ -225,6 +225,12 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
return _oldSpecState.GraphicsState.EarlyZForce; return _oldSpecState.GraphicsState.EarlyZForce;
} }
/// <inheritdoc/>
public bool QueryHasUnalignedStorageBuffer()
{
return _oldSpecState.GraphicsState.HasUnalignedStorageBuffer || _oldSpecState.ComputeState.HasUnalignedStorageBuffer;
}
/// <inheritdoc/> /// <inheritdoc/>
public bool QueryViewportTransformDisable() public bool QueryViewportTransformDisable()
{ {

View file

@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMajor = 1;
private const ushort FileFormatVersionMinor = 2; private const ushort FileFormatVersionMinor = 2;
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
private const uint CodeGenVersion = 3747; private const uint CodeGenVersion = 3848;
private const string SharedTocFileName = "shared.toc"; private const string SharedTocFileName = "shared.toc";
private const string SharedDataFileName = "shared.data"; private const string SharedDataFileName = "shared.data";

View file

@ -145,6 +145,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
return _state.GraphicsState.HasConstantBufferDrawParameters; return _state.GraphicsState.HasConstantBufferDrawParameters;
} }
/// <inheritdoc/>
public bool QueryHasUnalignedStorageBuffer()
{
return _state.GraphicsState.HasUnalignedStorageBuffer || _state.ComputeState.HasUnalignedStorageBuffer;
}
/// <inheritdoc/> /// <inheritdoc/>
public InputTopology QueryPrimitiveTopology() public InputTopology QueryPrimitiveTopology()
{ {

View file

@ -32,6 +32,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary> /// </summary>
public readonly int SharedMemorySize; public readonly int SharedMemorySize;
/// <summary>
/// Indicates that any storage buffer use is unaligned.
/// </summary>
public readonly bool HasUnalignedStorageBuffer;
/// <summary> /// <summary>
/// Creates a new GPU compute state. /// Creates a new GPU compute state.
/// </summary> /// </summary>
@ -40,18 +45,21 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="localSizeZ">Local group size Z of the compute shader</param> /// <param name="localSizeZ">Local group size Z of the compute shader</param>
/// <param name="localMemorySize">Local memory size of the compute shader</param> /// <param name="localMemorySize">Local memory size of the compute shader</param>
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param> /// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
public GpuChannelComputeState( public GpuChannelComputeState(
int localSizeX, int localSizeX,
int localSizeY, int localSizeY,
int localSizeZ, int localSizeZ,
int localMemorySize, int localMemorySize,
int sharedMemorySize) int sharedMemorySize,
bool hasUnalignedStorageBuffer)
{ {
LocalSizeX = localSizeX; LocalSizeX = localSizeX;
LocalSizeY = localSizeY; LocalSizeY = localSizeY;
LocalSizeZ = localSizeZ; LocalSizeZ = localSizeZ;
LocalMemorySize = localMemorySize; LocalMemorySize = localMemorySize;
SharedMemorySize = sharedMemorySize; SharedMemorySize = sharedMemorySize;
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
} }
} }
} }

View file

@ -82,6 +82,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary> /// </summary>
public readonly bool HasConstantBufferDrawParameters; public readonly bool HasConstantBufferDrawParameters;
/// <summary>
/// Indicates that any storage buffer use is unaligned.
/// </summary>
public readonly bool HasUnalignedStorageBuffer;
/// <summary> /// <summary>
/// Creates a new GPU graphics state. /// Creates a new GPU graphics state.
/// </summary> /// </summary>
@ -99,6 +104,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param> /// <param name="alphaTestReference">When alpha test is enabled, indicates the value to compare with the fragment output alpha</param>
/// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param> /// <param name="attributeTypes">Type of the vertex attributes consumed by the shader</param>
/// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param> /// <param name="hasConstantBufferDrawParameters">Indicates that the draw is writing the base vertex, base instance and draw index to Constant Buffer 0</param>
/// <param name="hasUnalignedStorageBuffer">Indicates that any storage buffer use is unaligned</param>
public GpuChannelGraphicsState( public GpuChannelGraphicsState(
bool earlyZForce, bool earlyZForce,
PrimitiveTopology topology, PrimitiveTopology topology,
@ -113,7 +119,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
CompareOp alphaTestCompare, CompareOp alphaTestCompare,
float alphaTestReference, float alphaTestReference,
ref Array32<AttributeType> attributeTypes, ref Array32<AttributeType> attributeTypes,
bool hasConstantBufferDrawParameters) bool hasConstantBufferDrawParameters,
bool hasUnalignedStorageBuffer)
{ {
EarlyZForce = earlyZForce; EarlyZForce = earlyZForce;
Topology = topology; Topology = topology;
@ -129,6 +136,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
AlphaTestReference = alphaTestReference; AlphaTestReference = alphaTestReference;
AttributeTypes = attributeTypes; AttributeTypes = attributeTypes;
HasConstantBufferDrawParameters = hasConstantBufferDrawParameters; HasConstantBufferDrawParameters = hasConstantBufferDrawParameters;
HasUnalignedStorageBuffer = hasUnalignedStorageBuffer;
} }
} }
} }

View file

@ -203,12 +203,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
GpuChannelComputeState computeState, GpuChannelComputeState computeState,
ulong gpuVa) ulong gpuVa)
{ {
if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, cpShader, gpuVa)) if (_cpPrograms.TryGetValue(gpuVa, out var cpShader) && IsShaderEqual(channel, poolState, computeState, cpShader, gpuVa))
{ {
return cpShader; return cpShader;
} }
if (_computeShaderCache.TryFind(channel, poolState, gpuVa, out cpShader, out byte[] cachedGuestCode)) if (_computeShaderCache.TryFind(channel, poolState, computeState, gpuVa, out cpShader, out byte[] cachedGuestCode))
{ {
_cpPrograms[gpuVa] = cpShader; _cpPrograms[gpuVa] = cpShader;
return cpShader; return cpShader;
@ -473,18 +473,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary> /// </summary>
/// <param name="channel">GPU channel using the shader</param> /// <param name="channel">GPU channel using the shader</param>
/// <param name="poolState">GPU channel state to verify shader compatibility</param> /// <param name="poolState">GPU channel state to verify shader compatibility</param>
/// <param name="computeState">GPU channel compute state to verify shader compatibility</param>
/// <param name="cpShader">Cached compute shader</param> /// <param name="cpShader">Cached compute shader</param>
/// <param name="gpuVa">GPU virtual address of the shader code in memory</param> /// <param name="gpuVa">GPU virtual address of the shader code in memory</param>
/// <returns>True if the code is different, false otherwise</returns> /// <returns>True if the code is different, false otherwise</returns>
private static bool IsShaderEqual( private static bool IsShaderEqual(
GpuChannel channel, GpuChannel channel,
GpuChannelPoolState poolState, GpuChannelPoolState poolState,
GpuChannelComputeState computeState,
CachedShaderProgram cpShader, CachedShaderProgram cpShader,
ulong gpuVa) ulong gpuVa)
{ {
if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa)) if (IsShaderEqual(channel.MemoryManager, cpShader.Shaders[0], gpuVa))
{ {
return cpShader.SpecializationState.MatchesCompute(channel, poolState, true); return cpShader.SpecializationState.MatchesCompute(channel, poolState, computeState, true);
} }
return false; return false;

View file

@ -53,13 +53,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary> /// </summary>
/// <param name="channel">GPU channel</param> /// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param> /// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="program">Cached program, if found</param> /// <param name="program">Cached program, if found</param>
/// <returns>True if a compatible program is found, false otherwise</returns> /// <returns>True if a compatible program is found, false otherwise</returns>
public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, out CachedShaderProgram program) public bool TryFindForCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, out CachedShaderProgram program)
{ {
foreach (var entry in _entries) foreach (var entry in _entries)
{ {
if (entry.SpecializationState.MatchesCompute(channel, poolState, true)) if (entry.SpecializationState.MatchesCompute(channel, poolState, computeState, true))
{ {
program = entry; program = entry;
return true; return true;

View file

@ -531,6 +531,11 @@ namespace Ryujinx.Graphics.Gpu.Shader
return false; return false;
} }
if (graphicsState.HasUnalignedStorageBuffer != GraphicsState.HasUnalignedStorageBuffer)
{
return false;
}
return Matches(channel, poolState, checkTextures, isCompute: false); return Matches(channel, poolState, checkTextures, isCompute: false);
} }
@ -539,10 +544,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// </summary> /// </summary>
/// <param name="channel">GPU channel</param> /// <param name="channel">GPU channel</param>
/// <param name="poolState">Texture pool state</param> /// <param name="poolState">Texture pool state</param>
/// <param name="computeState">Compute state</param>
/// <param name="checkTextures">Indicates whether texture descriptors should be checked</param> /// <param name="checkTextures">Indicates whether texture descriptors should be checked</param>
/// <returns>True if the state matches, false otherwise</returns> /// <returns>True if the state matches, false otherwise</returns>
public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, bool checkTextures) public bool MatchesCompute(GpuChannel channel, GpuChannelPoolState poolState, GpuChannelComputeState computeState, bool checkTextures)
{ {
if (computeState.HasUnalignedStorageBuffer != ComputeState.HasUnalignedStorageBuffer)
{
return false;
}
return Matches(channel, poolState, checkTextures, isCompute: true); return Matches(channel, poolState, checkTextures, isCompute: true);
} }

View file

@ -10,5 +10,7 @@ namespace Ryujinx.Graphics.Shader
public const int NvnBaseVertexByteOffset = 0x640; public const int NvnBaseVertexByteOffset = 0x640;
public const int NvnBaseInstanceByteOffset = 0x644; public const int NvnBaseInstanceByteOffset = 0x644;
public const int NvnDrawIndexByteOffset = 0x648; public const int NvnDrawIndexByteOffset = 0x648;
public const int StorageAlignment = 16;
} }
} }

View file

@ -177,6 +177,15 @@ namespace Ryujinx.Graphics.Shader
return false; return false;
} }
/// <summary>
/// Queries whenever the current draw uses unaligned storage buffer addresses.
/// </summary>
/// <returns>True if any storage buffer address is not aligned to 16 bytes, false otherwise</returns>
bool QueryHasUnalignedStorageBuffer()
{
return false;
}
/// <summary> /// <summary>
/// Queries host about the presence of the FrontFacing built-in variable bug. /// Queries host about the presence of the FrontFacing built-in variable bug.
/// </summary> /// </summary>

View file

@ -34,7 +34,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
// we can guess which storage buffer it is accessing. // we can guess which storage buffer it is accessing.
// We can then replace the global memory access with a storage // We can then replace the global memory access with a storage
// buffer access. // buffer access.
node = ReplaceGlobalWithStorage(node, config, storageIndex); node = ReplaceGlobalWithStorage(block, node, config, storageIndex);
} }
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal) else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
{ {
@ -54,7 +54,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
} }
} }
private static LinkedListNode<INode> ReplaceGlobalWithStorage(LinkedListNode<INode> node, ShaderConfig config, int storageIndex) private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
{ {
Operation operation = (Operation)node.Value; Operation operation = (Operation)node.Value;
@ -64,42 +64,10 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
config.SetUsedStorageBuffer(storageIndex, isWrite); config.SetUsedStorageBuffer(storageIndex, isWrite);
Operand GetStorageOffset()
{
Operand addrLow = operation.GetSource(0);
Operand baseAddrLow = Cbuf(0, GetStorageCbOffset(config.Stage, storageIndex));
Operand baseAddrTrunc = Local();
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
node.List.AddBefore(node, andOp);
Operand byteOffset = Local();
Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
node.List.AddBefore(node, subOp);
if (isStg16Or8)
{
return byteOffset;
}
Operand wordOffset = Local();
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
node.List.AddBefore(node, shrOp);
return wordOffset;
}
Operand[] sources = new Operand[operation.SourcesCount]; Operand[] sources = new Operand[operation.SourcesCount];
sources[0] = Const(storageIndex); sources[0] = Const(storageIndex);
sources[1] = GetStorageOffset(); sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
for (int index = 2; index < operation.SourcesCount; index++) for (int index = 2; index < operation.SourcesCount; index++)
{ {
@ -144,6 +112,170 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return node; return node;
} }
private static Operand GetStorageOffset(
BasicBlock block,
LinkedListNode<INode> node,
ShaderConfig config,
int storageIndex,
Operand addrLow,
bool isStg16Or8)
{
int baseAddressCbOffset = GetStorageCbOffset(config.Stage, storageIndex);
bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
(Operand byteOffset, int constantOffset) = storageAligned ?
GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), baseAddressCbOffset) :
(null, 0);
if (byteOffset == null)
{
Operand baseAddrLow = Cbuf(0, baseAddressCbOffset);
Operand baseAddrTrunc = Local();
Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
node.List.AddBefore(node, andOp);
Operand offset = Local();
Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
node.List.AddBefore(node, subOp);
byteOffset = offset;
}
else if (constantOffset != 0)
{
Operand offset = Local();
Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
node.List.AddBefore(node, addOp);
byteOffset = offset;
}
if (byteOffset != null)
{
ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
}
if (isStg16Or8)
{
return byteOffset;
}
Operand wordOffset = Local();
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
node.List.AddBefore(node, shrOp);
return wordOffset;
}
private static bool IsCb0Offset(Operand operand, int offset)
{
return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == 0 && operand.GetCbufOffset() == offset;
}
private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
{
// When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
// Eliminate the storage buffer base address from this too, leaving only the byte offset.
foreach (INode useNode in address.UseOps)
{
if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
{
Operand src1 = op.GetSource(0);
Operand src2 = op.GetSource(1);
int addressIndex = -1;
if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
{
addressIndex = 0;
}
else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
{
addressIndex = 1;
}
if (addressIndex != -1)
{
LinkedListNode<INode> node = list.Find(op);
// Add offset calculation before the use. Needs to be on the same block.
if (node != null)
{
Operand offset = Local();
Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
list.AddBefore(node, addOp);
op.SetSource(addressIndex, offset);
}
}
}
}
}
private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int baseAddressCbOffset)
{
if (IsCb0Offset(address, baseAddressCbOffset))
{
// Direct offset: zero.
return (Const(0), 0);
}
(address, int constantOffset) = GetStorageConstantOffset(block, address);
address = Utils.FindLastOperation(address, block);
if (IsCb0Offset(address, baseAddressCbOffset))
{
// Only constant offset
return (Const(0), constantOffset);
}
if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
{
return (null, 0);
}
Operand src1 = offsetAdd.GetSource(0);
Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
if (IsCb0Offset(src2, baseAddressCbOffset))
{
return (src1, constantOffset);
}
else if (IsCb0Offset(src1, baseAddressCbOffset))
{
return (src2, constantOffset);
}
return (null, 0);
}
private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
{
if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
{
return (address, 0);
}
Operand src1 = offsetAdd.GetSource(0);
Operand src2 = offsetAdd.GetSource(1);
if (src2.Type != OperandType.Constant)
{
return (address, 0);
}
return (src1, src2.Value);
}
private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex) private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
{ {
Operation operation = (Operation)node.Value; Operation operation = (Operation)node.Value;
@ -165,7 +297,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
Operand byteOffset = Local(); Operand byteOffset = Local();
Operand wordOffset = Local(); Operand wordOffset = Local();
Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc); Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
node.List.AddBefore(node, subOp); node.List.AddBefore(node, subOp);
@ -260,7 +392,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
{ {
if (operand.Type == OperandType.ConstantBuffer) if (operand.Type == OperandType.ConstantBuffer)
{ {
int slot = operand.GetCbufSlot(); int slot = operand.GetCbufSlot();
int offset = operand.GetCbufOffset(); int offset = operand.GetCbufOffset();
if (slot == 0 && offset >= sbStart && offset < sbEnd) if (slot == 0 && offset >= sbStart && offset < sbEnd)