Remove barrier on Intel if control flow is potentially divergent (#5044)
* Remove barrier on Intel if control flow is potentially divergent * Shader cache version bump
This commit is contained in:
parent
fe30c03cac
commit
2cdcfe46d8
10 changed files with 64 additions and 8 deletions
|
@ -34,6 +34,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
public readonly bool SupportsCubemapView;
|
||||
public readonly bool SupportsNonConstantTextureOffset;
|
||||
public readonly bool SupportsShaderBallot;
|
||||
public readonly bool SupportsShaderBarrierDivergence;
|
||||
public readonly bool SupportsShaderFloat64;
|
||||
public readonly bool SupportsTextureShadowLod;
|
||||
public readonly bool SupportsViewportIndexVertexTessellation;
|
||||
|
@ -82,6 +83,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
bool supportsCubemapView,
|
||||
bool supportsNonConstantTextureOffset,
|
||||
bool supportsShaderBallot,
|
||||
bool supportsShaderBarrierDivergence,
|
||||
bool supportsShaderFloat64,
|
||||
bool supportsTextureShadowLod,
|
||||
bool supportsViewportIndexVertexTessellation,
|
||||
|
@ -126,6 +128,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
SupportsCubemapView = supportsCubemapView;
|
||||
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
|
||||
SupportsShaderBallot = supportsShaderBallot;
|
||||
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||
SupportsTextureShadowLod = supportsTextureShadowLod;
|
||||
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
|||
private const ushort FileFormatVersionMajor = 1;
|
||||
private const ushort FileFormatVersionMinor = 2;
|
||||
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
||||
private const uint CodeGenVersion = 5159;
|
||||
private const uint CodeGenVersion = 5044;
|
||||
|
||||
private const string SharedTocFileName = "shared.toc";
|
||||
private const string SharedDataFileName = "shared.data";
|
||||
|
|
|
@ -141,6 +141,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
|||
|
||||
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
|
||||
|
||||
public bool QueryHostSupportsShaderBarrierDivergence() => _context.Capabilities.SupportsShaderBarrierDivergence;
|
||||
|
||||
public bool QueryHostSupportsShaderFloat64() => _context.Capabilities.SupportsShaderFloat64;
|
||||
|
||||
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
|
||||
|
|
|
@ -127,6 +127,7 @@ namespace Ryujinx.Graphics.OpenGL
|
|||
public Capabilities GetCapabilities()
|
||||
{
|
||||
bool intelWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelWindows;
|
||||
bool intelUnix = HwCapabilities.Vendor == HwCapabilities.GpuVendor.IntelUnix;
|
||||
bool amdWindows = HwCapabilities.Vendor == HwCapabilities.GpuVendor.AmdWindows;
|
||||
|
||||
return new Capabilities(
|
||||
|
@ -158,6 +159,7 @@ namespace Ryujinx.Graphics.OpenGL
|
|||
supportsCubemapView: true,
|
||||
supportsNonConstantTextureOffset: HwCapabilities.SupportsNonConstantTextureOffset,
|
||||
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
||||
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
|
||||
supportsShaderFloat64: true,
|
||||
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
|
||||
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
|
||||
|
|
|
@ -28,18 +28,18 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
|||
|
||||
for (int i = 1; i < info.Functions.Count; i++)
|
||||
{
|
||||
PrintFunction(context, info, info.Functions[i]);
|
||||
PrintFunction(context, info.Functions[i]);
|
||||
|
||||
context.AppendLine();
|
||||
}
|
||||
}
|
||||
|
||||
PrintFunction(context, info, info.Functions[0], MainFunctionName);
|
||||
PrintFunction(context, info.Functions[0], MainFunctionName);
|
||||
|
||||
return context.GetCode();
|
||||
}
|
||||
|
||||
private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null)
|
||||
private static void PrintFunction(CodeGenContext context, StructuredFunction function, string funcName = null)
|
||||
{
|
||||
context.CurrentFunction = function;
|
||||
|
||||
|
@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
|||
|
||||
Declarations.DeclareLocals(context, function);
|
||||
|
||||
PrintBlock(context, function.MainBlock);
|
||||
PrintBlock(context, function.MainBlock, funcName == MainFunctionName);
|
||||
|
||||
context.LeaveScope();
|
||||
}
|
||||
|
@ -72,7 +72,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
|||
return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
|
||||
}
|
||||
|
||||
private static void PrintBlock(CodeGenContext context, AstBlock block)
|
||||
private static void PrintBlock(CodeGenContext context, AstBlock block, bool isMainFunction)
|
||||
{
|
||||
AstBlockVisitor visitor = new AstBlockVisitor(block);
|
||||
|
||||
|
@ -112,10 +112,32 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
|||
}
|
||||
};
|
||||
|
||||
bool supportsBarrierDivergence = context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence();
|
||||
bool mayHaveReturned = false;
|
||||
|
||||
foreach (IAstNode node in visitor.Visit())
|
||||
{
|
||||
if (node is AstOperation operation)
|
||||
{
|
||||
if (!supportsBarrierDivergence)
|
||||
{
|
||||
if (operation.Inst == IntermediateRepresentation.Instruction.Barrier)
|
||||
{
|
||||
// Barrier on divergent control flow paths may cause the GPU to hang,
|
||||
// so skip emitting the barrier for those cases.
|
||||
if (visitor.Block.Type != AstBlockType.Main || mayHaveReturned || !isMainFunction)
|
||||
{
|
||||
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (operation.Inst == IntermediateRepresentation.Instruction.Return)
|
||||
{
|
||||
mayHaveReturned = true;
|
||||
}
|
||||
}
|
||||
|
||||
string expr = InstGen.GetExpression(context, operation);
|
||||
|
||||
if (expr != null)
|
||||
|
|
|
@ -76,6 +76,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||
|
||||
public SpirvDelegates Delegates { get; }
|
||||
|
||||
public bool IsMainFunction { get; private set; }
|
||||
public bool MayHaveReturned { get; set; }
|
||||
|
||||
public CodeGenContext(
|
||||
StructuredProgramInfo info,
|
||||
ShaderConfig config,
|
||||
|
@ -108,8 +111,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||
Delegates = new SpirvDelegates(this);
|
||||
}
|
||||
|
||||
public void StartFunction()
|
||||
public void StartFunction(bool isMainFunction)
|
||||
{
|
||||
IsMainFunction = isMainFunction;
|
||||
MayHaveReturned = false;
|
||||
_locals.Clear();
|
||||
_localForArgs.Clear();
|
||||
_funcArgs.Clear();
|
||||
|
|
|
@ -242,6 +242,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||
|
||||
private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
|
||||
{
|
||||
// Barrier on divergent control flow paths may cause the GPU to hang,
|
||||
// so skip emitting the barrier for those cases.
|
||||
if (!context.Config.GpuAccessor.QueryHostSupportsShaderBarrierDivergence() &&
|
||||
(context.CurrentBlock.Type != AstBlockType.Main || context.MayHaveReturned || !context.IsMainFunction))
|
||||
{
|
||||
context.Config.GpuAccessor.Log($"Shader has barrier on potentially divergent block, the barrier will be removed.");
|
||||
|
||||
return OperationResult.Invalid;
|
||||
}
|
||||
|
||||
context.ControlBarrier(
|
||||
context.Constant(context.TypeU32(), Scope.Workgroup),
|
||||
context.Constant(context.TypeU32(), Scope.Workgroup),
|
||||
|
@ -1092,6 +1102,8 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||
|
||||
private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation)
|
||||
{
|
||||
context.MayHaveReturned = true;
|
||||
|
||||
if (operation.SourcesCount != 0)
|
||||
{
|
||||
context.ReturnValue(context.Get(context.CurrentFunction.ReturnType, operation.GetSource(0)));
|
||||
|
|
|
@ -148,7 +148,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
|
|||
|
||||
context.CurrentFunction = function;
|
||||
context.AddFunction(spvFunc);
|
||||
context.StartFunction();
|
||||
context.StartFunction(isMainFunction: funcIndex == 0);
|
||||
|
||||
Declarations.DeclareParameters(context, function);
|
||||
|
||||
|
|
|
@ -331,6 +331,15 @@ namespace Ryujinx.Graphics.Shader
|
|||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU shader support for barrier instructions on divergent control flow paths.
|
||||
/// </summary>
|
||||
/// <returns>True if the GPU supports barriers on divergent control flow paths, false otherwise</returns>
|
||||
bool QueryHostSupportsShaderBarrierDivergence()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU support for 64-bit floating point (double precision) operations on the shader.
|
||||
/// </summary>
|
||||
|
|
|
@ -595,6 +595,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
supportsCubemapView: !IsAmdGcn,
|
||||
supportsNonConstantTextureOffset: false,
|
||||
supportsShaderBallot: false,
|
||||
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
|
||||
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
||||
supportsTextureShadowLod: false,
|
||||
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,
|
||||
|
|
Reference in a new issue