0
0
Fork 0
mirror of https://github.com/GreemDev/Ryujinx.git synced 2024-12-23 18:35:47 +00:00
Ryujinx/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
gdkchan f09bba82b9
Geometry shader emulation for macOS (#5551)
* Implement vertex and geometry shader conversion to compute

* Call InitializeReservedCounts for compute too

* PR feedback

* Set clip distance mask for geometry and tessellation shaders too

* Transform feedback emulation only for vertex
2023-08-29 21:10:34 -03:00

373 lines
14 KiB
C#

using Ryujinx.Graphics.Shader.Decoders;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System;
using System.Linq;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
namespace Ryujinx.Graphics.Shader.Translation
{
public static class Translator
{
private const int ThreadsPerWarp = 32;
private const int HeaderSize = 0x50;
internal readonly struct FunctionCode
{
public Operation[] Code { get; }
public FunctionCode(Operation[] code)
{
Code = code;
}
}
public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
{
return DecodeShader(address, gpuAccessor, options);
}
private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
{
int localMemorySize;
ShaderDefinitions definitions;
DecodedProgram program;
if (options.Flags.HasFlag(TranslationFlags.Compute))
{
definitions = CreateComputeDefinitions(gpuAccessor);
localMemorySize = gpuAccessor.QueryComputeLocalMemorySize();
program = Decoder.Decode(definitions, gpuAccessor, address);
}
else
{
ShaderHeader header = new(gpuAccessor, address);
definitions = CreateGraphicsDefinitions(gpuAccessor, header);
localMemorySize = GetLocalMemorySize(header);
program = Decoder.Decode(definitions, gpuAccessor, address + HeaderSize);
}
ulong maxEndAddress = 0;
foreach (DecodedFunction function in program)
{
foreach (Block block in function.Blocks)
{
if (maxEndAddress < block.EndAddress)
{
maxEndAddress = block.EndAddress;
}
}
}
int size = (int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize);
return new TranslatorContext(address, size, localMemorySize, definitions, gpuAccessor, options, program);
}
private static ShaderDefinitions CreateComputeDefinitions(IGpuAccessor gpuAccessor)
{
return new ShaderDefinitions(
ShaderStage.Compute,
gpuAccessor.QueryComputeLocalSizeX(),
gpuAccessor.QueryComputeLocalSizeY(),
gpuAccessor.QueryComputeLocalSizeZ());
}
private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header)
{
TransformFeedbackOutput[] transformFeedbackOutputs = GetTransformFeedbackOutputs(gpuAccessor, out ulong transformFeedbackVecMap);
return new ShaderDefinitions(
header.Stage,
gpuAccessor.QueryGraphicsState(),
header.Stage == ShaderStage.Geometry && header.GpPassthrough,
header.ThreadsPerInputPrimitive,
header.OutputTopology,
header.MaxOutputVertexCount,
header.ImapTypes,
header.OmapTargets,
header.OmapSampleMask,
header.OmapDepth,
gpuAccessor.QueryHostSupportsScaledVertexFormats(),
transformFeedbackVecMap,
transformFeedbackOutputs);
}
internal static TransformFeedbackOutput[] GetTransformFeedbackOutputs(IGpuAccessor gpuAccessor, out ulong transformFeedbackVecMap)
{
bool transformFeedbackEnabled =
gpuAccessor.QueryTransformFeedbackEnabled() &&
gpuAccessor.QueryHostSupportsTransformFeedback();
TransformFeedbackOutput[] transformFeedbackOutputs = null;
transformFeedbackVecMap = 0UL;
if (transformFeedbackEnabled)
{
transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
{
var locations = gpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
var stride = gpuAccessor.QueryTransformFeedbackStride(tfbIndex);
for (int i = 0; i < locations.Length; i++)
{
byte wordOffset = locations[i];
if (wordOffset < 0xc0)
{
transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
transformFeedbackVecMap |= 1UL << (wordOffset / 4);
}
}
}
}
return transformFeedbackOutputs;
}
private static int GetLocalMemorySize(ShaderHeader header)
{
return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
}
internal static FunctionCode[] EmitShader(
TranslatorContext translatorContext,
ResourceManager resourceManager,
DecodedProgram program,
bool vertexAsCompute,
bool initializeOutputs,
out int initializationOperations)
{
initializationOperations = 0;
FunctionMatch.RunPass(program);
foreach (DecodedFunction function in program.Where(x => !x.IsCompilerGenerated).OrderBy(x => x.Address))
{
program.AddFunctionAndSetId(function);
}
FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount];
for (int index = 0; index < functions.Length; index++)
{
EmitterContext context = new(translatorContext, resourceManager, program, vertexAsCompute, index != 0);
if (initializeOutputs && index == 0)
{
EmitOutputsInitialization(context, translatorContext.AttributeUsage, translatorContext.GpuAccessor, translatorContext.Stage);
initializationOperations = context.OperationsCount;
}
DecodedFunction function = program.GetFunctionById(index);
foreach (Block block in function.Blocks)
{
context.CurrBlock = block;
context.EnterBlock(block.Address);
EmitOps(context, block);
}
functions[index] = new(context.GetOperations());
}
return functions;
}
private static void EmitOutputsInitialization(EmitterContext context, AttributeUsage attributeUsage, IGpuAccessor gpuAccessor, ShaderStage stage)
{
// Compute has no output attributes, and fragment is the last stage, so we
// don't need to initialize outputs on those stages.
if (stage == ShaderStage.Compute || stage == ShaderStage.Fragment)
{
return;
}
if (stage == ShaderStage.Vertex)
{
InitializePositionOutput(context);
}
UInt128 usedAttributes = context.TranslatorContext.AttributeUsage.NextInputAttributesComponents;
while (usedAttributes != UInt128.Zero)
{
int index = (int)UInt128.TrailingZeroCount(usedAttributes);
int vecIndex = index / 4;
usedAttributes &= ~(UInt128.One << index);
// We don't need to initialize passthrough attributes.
if ((context.TranslatorContext.AttributeUsage.PassthroughAttributes & (1 << vecIndex)) != 0)
{
continue;
}
InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false);
}
if (context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch != null)
{
foreach (int vecIndex in context.TranslatorContext.AttributeUsage.NextUsedInputAttributesPerPatch.Order())
{
InitializeOutput(context, vecIndex, perPatch: true);
}
}
if (attributeUsage.NextUsesFixedFuncAttributes)
{
bool supportsLayerFromVertexOrTess = gpuAccessor.QueryHostSupportsLayerVertexTessellation();
int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++)
{
int index = attributeUsage.GetFreeUserAttribute(isOutput: true, i);
if (index < 0)
{
break;
}
InitializeOutput(context, index, perPatch: false);
}
}
}
private static void InitializePositionOutput(EmitterContext context)
{
for (int c = 0; c < 4; c++)
{
context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f));
}
}
private static void InitializeOutput(EmitterContext context, int location, bool perPatch)
{
for (int c = 0; c < 4; c++)
{
InitializeOutputComponent(context, location, c, perPatch);
}
}
private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch)
{
StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
if (context.TranslatorContext.Definitions.OaIndexing)
{
Operand invocationId = null;
if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
{
invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
}
int index = location * 4 + c;
context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f));
}
else
{
if (context.TranslatorContext.Definitions.Stage == ShaderStage.TessellationControl && !perPatch)
{
Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f));
}
else
{
context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f));
}
}
}
private static void EmitOps(EmitterContext context, Block block)
{
for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
{
InstOp op = block.OpCodes[opIndex];
if (context.TranslatorContext.Options.Flags.HasFlag(TranslationFlags.DebugMode))
{
string instName;
if (op.Emitter != null)
{
instName = op.Name.ToString();
}
else
{
instName = "???";
context.TranslatorContext.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
}
string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
context.Add(new CommentNode(dbgComment));
}
InstConditional opConditional = new(op.RawOpCode);
bool noPred = op.Props.HasFlag(InstProps.NoPred);
if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv)
{
continue;
}
Operand predSkipLbl = null;
if (Decoder.IsPopBranch(op.Name))
{
// If the instruction is a SYNC or BRK instruction with only one
// possible target address, then the instruction is basically
// just a simple branch, we can generate code similar to branch
// instructions, with the condition check on the branch itself.
noPred = block.SyncTargets.Count <= 1;
}
else if (op.Name == InstName.Bra)
{
noPred = true;
}
if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred))
{
Operand label;
if (opIndex == block.OpCodes.Count - 1 && block.HasNext())
{
label = context.GetLabel(block.Successors[0].Address);
}
else
{
label = Label();
predSkipLbl = label;
}
Operand pred = Register(opConditional.Pred, RegisterType.Predicate);
if (opConditional.PredInv)
{
context.BranchIfTrue(label, pred);
}
else
{
context.BranchIfFalse(label, pred);
}
}
context.CurrOp = op;
op.Emitter?.Invoke(context);
if (predSkipLbl != null)
{
context.MarkLabel(predSkipLbl);
}
}
}
}
}