Improve shader global memory to storage pass (#2200)
* Improve shader global memory to storage pass * Formatting and more comments * Shader cache version bump
This commit is contained in:
parent
7719909397
commit
40e276c9b5
5 changed files with 158 additions and 129 deletions
|
@ -35,7 +35,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
|||
/// <summary>
|
||||
/// Version of the codegen (to be changed when codegen or guest format change).
|
||||
/// </summary>
|
||||
private const ulong ShaderCodeGenVersion = 2163;
|
||||
private const ulong ShaderCodeGenVersion = 2200;
|
||||
|
||||
// Progress reporting helpers
|
||||
private volatile int _shaderCount;
|
||||
|
|
|
@ -5,66 +5,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
class BindlessElimination
|
||||
{
|
||||
private static Operation FindBranchSource(BasicBlock block)
|
||||
{
|
||||
foreach (BasicBlock sourceBlock in block.Predecessors)
|
||||
{
|
||||
if (sourceBlock.Operations.Count > 0)
|
||||
{
|
||||
Operation lastOp = sourceBlock.Operations.Last.Value as Operation;
|
||||
|
||||
if (lastOp != null &&
|
||||
((sourceBlock.Next == block && lastOp.Inst == Instruction.BranchIfFalse) ||
|
||||
(sourceBlock.Branch == block && lastOp.Inst == Instruction.BranchIfTrue)))
|
||||
{
|
||||
return lastOp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock)
|
||||
{
|
||||
// Check if all the conditions for the query block are satisfied by the current block.
|
||||
// Just checks the top-most conditional for now.
|
||||
|
||||
Operation currentBranch = FindBranchSource(currentBlock);
|
||||
Operation queryBranch = FindBranchSource(queryBlock);
|
||||
|
||||
Operand currentCondition = currentBranch?.GetSource(0);
|
||||
Operand queryCondition = queryBranch?.GetSource(0);
|
||||
|
||||
// The condition should be the same operand instance.
|
||||
|
||||
return currentBranch != null && queryBranch != null &&
|
||||
currentBranch.Inst == queryBranch.Inst &&
|
||||
currentCondition == queryCondition;
|
||||
}
|
||||
|
||||
private static Operand FindLastOperation(Operand source, BasicBlock block)
|
||||
{
|
||||
if (source.AsgOp is PhiNode phiNode)
|
||||
{
|
||||
// This source can have a different value depending on a previous branch.
|
||||
// Ensure that conditions met for that branch are also met for the current one.
|
||||
// Prefer the latest sources for the phi node.
|
||||
|
||||
for (int i = phiNode.SourcesCount - 1; i >= 0; i--)
|
||||
{
|
||||
BasicBlock phiBlock = phiNode.GetBlock(i);
|
||||
|
||||
if (BlockConditionsMatch(block, phiBlock))
|
||||
{
|
||||
return phiNode.GetSource(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return source;
|
||||
}
|
||||
|
||||
public static void RunPass(BasicBlock block, ShaderConfig config)
|
||||
{
|
||||
// We can turn a bindless into regular access by recognizing the pattern
|
||||
|
@ -89,7 +29,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
texOp.Inst == Instruction.TextureSample ||
|
||||
texOp.Inst == Instruction.TextureSize)
|
||||
{
|
||||
Operand bindlessHandle = FindLastOperation(texOp.GetSource(0), block);
|
||||
Operand bindlessHandle = Utils.FindLastOperation(texOp.GetSource(0), block);
|
||||
|
||||
if (bindlessHandle.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
|
@ -107,8 +47,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
continue;
|
||||
}
|
||||
|
||||
Operand src0 = FindLastOperation(handleCombineOp.GetSource(0), block);
|
||||
Operand src1 = FindLastOperation(handleCombineOp.GetSource(1), block);
|
||||
Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
|
||||
Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
|
||||
|
||||
if (src0.Type != OperandType.ConstantBuffer ||
|
||||
src1.Type != OperandType.ConstantBuffer || src0.GetCbufSlot() != src1.GetCbufSlot())
|
||||
|
@ -120,7 +60,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
else if (texOp.Inst == Instruction.ImageLoad || texOp.Inst == Instruction.ImageStore)
|
||||
{
|
||||
Operand src0 = FindLastOperation(texOp.GetSource(0), block);
|
||||
Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
|
||||
|
||||
if (src0.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
|
|
|
@ -25,32 +25,29 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
Operand source = operation.GetSource(0);
|
||||
|
||||
if (source.AsgOp is Operation asgOperation)
|
||||
int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd);
|
||||
|
||||
if (storageIndex >= 0)
|
||||
{
|
||||
int storageIndex = SearchForStorageBase(asgOperation, sbStart, sbEnd);
|
||||
// Storage buffers are implemented using global memory access.
|
||||
// If we know from where the base address of the access is loaded,
|
||||
// we can guess which storage buffer it is accessing.
|
||||
// We can then replace the global memory access with a storage
|
||||
// buffer access.
|
||||
node = ReplaceGlobalWithStorage(node, config, storageIndex);
|
||||
}
|
||||
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
// Here we effectively try to replace a LDG instruction with LDC.
|
||||
// The hardware only supports a limited amount of constant buffers
|
||||
// so NVN "emulates" more constant buffers using global memory access.
|
||||
// Here we try to replace the global access back to a constant buffer
|
||||
// load.
|
||||
storageIndex = SearchForStorageBase(block, source, UbeBaseOffset, UbeBaseOffset + UbeDescsSize);
|
||||
|
||||
if (storageIndex >= 0)
|
||||
{
|
||||
// Storage buffers are implemented using global memory access.
|
||||
// If we know from where the base address of the access is loaded,
|
||||
// we can guess which storage buffer it is accessing.
|
||||
// We can then replace the global memory access with a storage
|
||||
// buffer access.
|
||||
node = ReplaceGlobalWithStorage(node, config, storageIndex);
|
||||
}
|
||||
else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
|
||||
{
|
||||
// Here we effectively try to replace a LDG instruction with LDC.
|
||||
// The hardware only supports a limited amount of constant buffers
|
||||
// so NVN "emulates" more constant buffers using global memory access.
|
||||
// Here we try to replace the global access back to a constant buffer
|
||||
// load.
|
||||
storageIndex = SearchForStorageBase(asgOperation, UbeBaseOffset, UbeBaseOffset + UbeDescsSize);
|
||||
|
||||
if (storageIndex >= 0)
|
||||
{
|
||||
node = ReplaceLdgWithLdc(node, config, storageIndex);
|
||||
}
|
||||
node = ReplaceLdgWithLdc(node, config, storageIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -184,35 +181,70 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
return node;
|
||||
}
|
||||
|
||||
private static int SearchForStorageBase(Operation operation, int sbStart, int sbEnd)
|
||||
private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd)
|
||||
{
|
||||
Queue<Operation> assignments = new Queue<Operation>();
|
||||
globalAddress = Utils.FindLastOperation(globalAddress, block);
|
||||
|
||||
assignments.Enqueue(operation);
|
||||
|
||||
while (assignments.TryDequeue(out operation))
|
||||
if (globalAddress.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
return GetStorageIndex(globalAddress, sbStart, sbEnd);
|
||||
}
|
||||
|
||||
Operation operation = globalAddress.AsgOp as Operation;
|
||||
|
||||
if (operation == null || operation.Inst != Instruction.Add)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
Operand src1 = operation.GetSource(0);
|
||||
Operand src2 = operation.GetSource(1);
|
||||
|
||||
if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
|
||||
(src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
|
||||
{
|
||||
if (src1.Type == OperandType.LocalVariable)
|
||||
{
|
||||
Operand source = operation.GetSource(index);
|
||||
operation = Utils.FindLastOperation(src1, block).AsgOp as Operation;
|
||||
}
|
||||
else
|
||||
{
|
||||
operation = Utils.FindLastOperation(src2, block).AsgOp as Operation;
|
||||
}
|
||||
|
||||
if (source.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
int slot = source.GetCbufSlot();
|
||||
int offset = source.GetCbufOffset();
|
||||
if (operation == null || operation.Inst != Instruction.Add)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (slot == 0 && offset >= sbStart && offset < sbEnd)
|
||||
{
|
||||
int storageIndex = (offset - sbStart) / StorageDescSize;
|
||||
for (int index = 0; index < operation.SourcesCount; index++)
|
||||
{
|
||||
Operand source = operation.GetSource(index);
|
||||
|
||||
return storageIndex;
|
||||
}
|
||||
}
|
||||
int storageIndex = GetStorageIndex(source, sbStart, sbEnd);
|
||||
|
||||
if (source.AsgOp is Operation asgOperation)
|
||||
{
|
||||
assignments.Enqueue(asgOperation);
|
||||
}
|
||||
if (storageIndex != -1)
|
||||
{
|
||||
return storageIndex;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
|
||||
{
|
||||
if (operand.Type == OperandType.ConstantBuffer)
|
||||
{
|
||||
int slot = operand.GetCbufSlot();
|
||||
int offset = operand.GetCbufOffset();
|
||||
|
||||
if (slot == 0 && offset >= sbStart && offset < sbEnd)
|
||||
{
|
||||
int storageIndex = (offset - sbStart) / StorageDescSize;
|
||||
|
||||
return storageIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,11 +10,22 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
{
|
||||
public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
|
||||
{
|
||||
RunOptimizationPasses(blocks);
|
||||
|
||||
// Those passes are looking for specific patterns and only needs to run once.
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
GlobalToStorage.RunPass(blocks[blkIndex], config);
|
||||
BindlessToIndexed.RunPass(blocks[blkIndex]);
|
||||
BindlessElimination.RunPass(blocks[blkIndex], config);
|
||||
}
|
||||
|
||||
// Run optimizations one last time to remove any code that is now optimizable after above passes.
|
||||
RunOptimizationPasses(blocks);
|
||||
}
|
||||
|
||||
private static void RunOptimizationPasses(BasicBlock[] blocks)
|
||||
{
|
||||
bool modified;
|
||||
|
||||
do
|
||||
|
@ -85,27 +96,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|||
}
|
||||
}
|
||||
while (modified);
|
||||
|
||||
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
||||
{
|
||||
BindlessToIndexed.RunPass(blocks[blkIndex]);
|
||||
BindlessElimination.RunPass(blocks[blkIndex], config);
|
||||
|
||||
// Try to eliminate any operations that are now unused.
|
||||
LinkedListNode<INode> node = blocks[blkIndex].Operations.First;
|
||||
|
||||
while (node != null)
|
||||
{
|
||||
LinkedListNode<INode> nextNode = node.Next;
|
||||
|
||||
if (IsUnused(node.Value))
|
||||
{
|
||||
RemoveNode(blocks[blkIndex], node);
|
||||
}
|
||||
|
||||
node = nextNode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void PropagateCopy(Operation copyOp)
|
||||
|
|
67
Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
Normal file
67
Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
Normal file
|
@ -0,0 +1,67 @@
|
|||
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||
|
||||
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
||||
{
|
||||
static class Utils
|
||||
{
|
||||
private static Operation FindBranchSource(BasicBlock block)
|
||||
{
|
||||
foreach (BasicBlock sourceBlock in block.Predecessors)
|
||||
{
|
||||
if (sourceBlock.Operations.Count > 0)
|
||||
{
|
||||
Operation lastOp = sourceBlock.Operations.Last.Value as Operation;
|
||||
|
||||
if (lastOp != null &&
|
||||
((sourceBlock.Next == block && lastOp.Inst == Instruction.BranchIfFalse) ||
|
||||
(sourceBlock.Branch == block && lastOp.Inst == Instruction.BranchIfTrue)))
|
||||
{
|
||||
return lastOp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock)
|
||||
{
|
||||
// Check if all the conditions for the query block are satisfied by the current block.
|
||||
// Just checks the top-most conditional for now.
|
||||
|
||||
Operation currentBranch = FindBranchSource(currentBlock);
|
||||
Operation queryBranch = FindBranchSource(queryBlock);
|
||||
|
||||
Operand currentCondition = currentBranch?.GetSource(0);
|
||||
Operand queryCondition = queryBranch?.GetSource(0);
|
||||
|
||||
// The condition should be the same operand instance.
|
||||
|
||||
return currentBranch != null && queryBranch != null &&
|
||||
currentBranch.Inst == queryBranch.Inst &&
|
||||
currentCondition == queryCondition;
|
||||
}
|
||||
|
||||
public static Operand FindLastOperation(Operand source, BasicBlock block)
|
||||
{
|
||||
if (source.AsgOp is PhiNode phiNode)
|
||||
{
|
||||
// This source can have a different value depending on a previous branch.
|
||||
// Ensure that conditions met for that branch are also met for the current one.
|
||||
// Prefer the latest sources for the phi node.
|
||||
|
||||
for (int i = phiNode.SourcesCount - 1; i >= 0; i--)
|
||||
{
|
||||
BasicBlock phiBlock = phiNode.GetBlock(i);
|
||||
|
||||
if (BlockConditionsMatch(block, phiBlock))
|
||||
{
|
||||
return phiNode.GetSource(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return source;
|
||||
}
|
||||
}
|
||||
}
|
Reference in a new issue