Implement VMAD shader instruction and improve InvocationInfo and ISBERD handling (#3251)
* Implement VMAD shader instruction and improve InvocationInfo and ISBERD handling * Shader cache version bump * Fix typo
This commit is contained in:
parent
3139a85a2b
commit
e44a43c7e1
9 changed files with 171 additions and 61 deletions
|
@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Version of the codegen (to be changed when codegen or guest format change).
|
/// Version of the codegen (to be changed when codegen or guest format change).
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private const ulong ShaderCodeGenVersion = 3184;
|
private const ulong ShaderCodeGenVersion = 3251;
|
||||||
|
|
||||||
// Progress reporting helpers
|
// Progress reporting helpers
|
||||||
private volatile int _shaderCount;
|
private volatile int _shaderCount;
|
||||||
|
|
|
@ -250,9 +250,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||||
: "gl_SubgroupInvocationID";
|
: "gl_SubgroupInvocationID";
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: There must be a better way to handle this...
|
|
||||||
if (config.Stage == ShaderStage.Fragment)
|
if (config.Stage == ShaderStage.Fragment)
|
||||||
{
|
{
|
||||||
|
// TODO: There must be a better way to handle this...
|
||||||
switch (value)
|
switch (value)
|
||||||
{
|
{
|
||||||
case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])";
|
case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])";
|
||||||
|
|
|
@ -5144,6 +5144,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
public int SrcC => (int)((_opcode >> 39) & 0xFF);
|
public int SrcC => (int)((_opcode >> 39) & 0xFF);
|
||||||
public int Pred => (int)((_opcode >> 16) & 0x7);
|
public int Pred => (int)((_opcode >> 16) & 0x7);
|
||||||
public bool PredInv => (_opcode & 0x80000) != 0;
|
public bool PredInv => (_opcode & 0x80000) != 0;
|
||||||
|
public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
|
||||||
public bool WriteCC => (_opcode & 0x800000000000) != 0;
|
public bool WriteCC => (_opcode & 0x800000000000) != 0;
|
||||||
public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3);
|
public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3);
|
||||||
public bool DFormat => (_opcode & 0x40000000000000) != 0;
|
public bool DFormat => (_opcode & 0x40000000000000) != 0;
|
||||||
|
@ -5164,6 +5165,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
|
||||||
public int SrcC => (int)((_opcode >> 39) & 0xFF);
|
public int SrcC => (int)((_opcode >> 39) & 0xFF);
|
||||||
public int Pred => (int)((_opcode >> 16) & 0x7);
|
public int Pred => (int)((_opcode >> 16) & 0x7);
|
||||||
public bool PredInv => (_opcode & 0x80000) != 0;
|
public bool PredInv => (_opcode & 0x80000) != 0;
|
||||||
|
public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
|
||||||
public bool WriteCC => (_opcode & 0x800000000000) != 0;
|
public bool WriteCC => (_opcode & 0x800000000000) != 0;
|
||||||
public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
|
public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
|
||||||
public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
|
public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
|
||||||
|
|
|
@ -13,16 +13,28 @@ namespace Ryujinx.Graphics.Shader
|
||||||
{
|
{
|
||||||
public static string ToGlslString(this InputTopology topology)
|
public static string ToGlslString(this InputTopology topology)
|
||||||
{
|
{
|
||||||
switch (topology)
|
return topology switch
|
||||||
{
|
{
|
||||||
case InputTopology.Points: return "points";
|
InputTopology.Points => "points",
|
||||||
case InputTopology.Lines: return "lines";
|
InputTopology.Lines => "lines",
|
||||||
case InputTopology.LinesAdjacency: return "lines_adjacency";
|
InputTopology.LinesAdjacency => "lines_adjacency",
|
||||||
case InputTopology.Triangles: return "triangles";
|
InputTopology.Triangles => "triangles",
|
||||||
case InputTopology.TrianglesAdjacency: return "triangles_adjacency";
|
InputTopology.TrianglesAdjacency => "triangles_adjacency",
|
||||||
}
|
_ => "points"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
return "points";
|
public static int ToInputVertices(this InputTopology topology)
|
||||||
|
{
|
||||||
|
return topology switch
|
||||||
|
{
|
||||||
|
InputTopology.Points => 1,
|
||||||
|
InputTopology.Lines or
|
||||||
|
InputTopology.LinesAdjacency => 2,
|
||||||
|
InputTopology.Triangles or
|
||||||
|
InputTopology.TrianglesAdjacency => 3,
|
||||||
|
_ => 1
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -73,6 +73,26 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
|
||||||
|
{
|
||||||
|
return type switch
|
||||||
|
{
|
||||||
|
VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
|
||||||
|
VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
|
||||||
|
VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
|
||||||
|
VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
|
||||||
|
VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
|
||||||
|
VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
|
||||||
|
VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
|
||||||
|
VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
|
||||||
|
VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
|
||||||
|
VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
|
||||||
|
VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
|
||||||
|
VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
|
||||||
|
_ => src
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)
|
public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)
|
||||||
{
|
{
|
||||||
if (!setCC)
|
if (!setCC)
|
||||||
|
@ -118,6 +138,15 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high)
|
||||||
|
{
|
||||||
|
low = context.BitwiseNot(low);
|
||||||
|
high = context.BitwiseNot(high);
|
||||||
|
low = AddWithCarry(context, low, Const(1), out Operand carryOut);
|
||||||
|
high = context.IAdd(high, carryOut);
|
||||||
|
return (low, high);
|
||||||
|
}
|
||||||
|
|
||||||
public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut)
|
public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut)
|
||||||
{
|
{
|
||||||
Operand result = context.IAdd(lhs, rhs);
|
Operand result = context.IAdd(lhs, rhs);
|
||||||
|
|
|
@ -168,10 +168,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
{
|
{
|
||||||
InstIsberd op = context.GetOp<InstIsberd>();
|
InstIsberd op = context.GetOp<InstIsberd>();
|
||||||
|
|
||||||
// This instruction performs a load from ISBE memory,
|
// This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory.
|
||||||
// however it seems to be only used to get some vertex
|
// Here, we just propagate the offset, as the result from this instruction is usually
|
||||||
// input data, so we instead propagate the offset so that
|
// used with ALD to perform vertex load on geometry or tessellation shaders.
|
||||||
// it can be used on the attribute load.
|
// The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex.
|
||||||
|
// Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex.
|
||||||
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
|
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -94,31 +94,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
case SReg.InvocationInfo:
|
case SReg.InvocationInfo:
|
||||||
if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment)
|
if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment)
|
||||||
{
|
{
|
||||||
Operand primitiveId = Attribute(AttributeConsts.PrimitiveId);
|
// Note: Lowest 8-bits seems to contain some primitive index,
|
||||||
Operand patchVerticesIn;
|
// but it seems to be NVIDIA implementation specific as it's only used
|
||||||
|
// to calculate ISBE offsets, so we can just keep it as zero.
|
||||||
|
|
||||||
if (context.Config.Stage == ShaderStage.TessellationEvaluation)
|
if (context.Config.Stage == ShaderStage.TessellationControl ||
|
||||||
|
context.Config.Stage == ShaderStage.TessellationEvaluation)
|
||||||
{
|
{
|
||||||
patchVerticesIn = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16));
|
src = context.ShiftLeft(Attribute(AttributeConsts.PatchVerticesIn), Const(16));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology();
|
src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16);
|
||||||
|
|
||||||
int inputVertices = inputTopology switch
|
|
||||||
{
|
|
||||||
InputTopology.Points => 1,
|
|
||||||
InputTopology.Lines or
|
|
||||||
InputTopology.LinesAdjacency => 2,
|
|
||||||
InputTopology.Triangles or
|
|
||||||
InputTopology.TrianglesAdjacency => 3,
|
|
||||||
_ => 1
|
|
||||||
};
|
|
||||||
|
|
||||||
patchVerticesIn = Const(inputVertices << 16);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
src = context.BitwiseOr(primitiveId, patchVerticesIn);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
using Ryujinx.Graphics.Shader.Decoders;
|
using Ryujinx.Graphics.Shader.Decoders;
|
||||||
|
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
||||||
using Ryujinx.Graphics.Shader.Translation;
|
using Ryujinx.Graphics.Shader.Translation;
|
||||||
|
|
||||||
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
|
using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
|
||||||
|
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
|
||||||
|
|
||||||
namespace Ryujinx.Graphics.Shader.Instructions
|
namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
{
|
{
|
||||||
|
@ -11,8 +13,106 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
{
|
{
|
||||||
InstVmad op = context.GetOp<InstVmad>();
|
InstVmad op = context.GetOp<InstVmad>();
|
||||||
|
|
||||||
// TODO: Implement properly.
|
bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0;
|
||||||
context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcC));
|
bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0;
|
||||||
|
|
||||||
|
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
|
||||||
|
Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB);
|
||||||
|
Operand srcB;
|
||||||
|
|
||||||
|
if (op.BVideo)
|
||||||
|
{
|
||||||
|
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int imm = op.Imm16;
|
||||||
|
|
||||||
|
if (bSigned)
|
||||||
|
{
|
||||||
|
imm = (imm << 16) >> 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
srcB = Const(imm);
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand productLow = context.IMultiply(srcA, srcB);
|
||||||
|
Operand productHigh;
|
||||||
|
|
||||||
|
if (aSigned == bSigned)
|
||||||
|
{
|
||||||
|
productHigh = aSigned
|
||||||
|
? context.MultiplyHighS32(srcA, srcB)
|
||||||
|
: context.MultiplyHighU32(srcA, srcB);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operand temp = aSigned
|
||||||
|
? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))
|
||||||
|
: context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)));
|
||||||
|
|
||||||
|
productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (op.AvgMode == AvgMode.NegA)
|
||||||
|
{
|
||||||
|
(productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh);
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry);
|
||||||
|
Operand resHigh = context.IAdd(productHigh, sumCarry);
|
||||||
|
|
||||||
|
if (op.AvgMode == AvgMode.PlusOne)
|
||||||
|
{
|
||||||
|
resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry);
|
||||||
|
resHigh = context.IAdd(resHigh, poCarry);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool resSigned = op.ASelect == VectorSelect.S32 ||
|
||||||
|
op.BSelect == VectorSelect.S32 ||
|
||||||
|
op.AvgMode == AvgMode.NegB ||
|
||||||
|
op.AvgMode == AvgMode.NegA;
|
||||||
|
|
||||||
|
int shift = op.VideoScale switch
|
||||||
|
{
|
||||||
|
VideoScale.Shr7 => 7,
|
||||||
|
VideoScale.Shr15 => 15,
|
||||||
|
_ => 0
|
||||||
|
};
|
||||||
|
|
||||||
|
if (shift != 0)
|
||||||
|
{
|
||||||
|
// Low = (Low >> Shift) | (High << (32 - Shift))
|
||||||
|
// High >>= Shift
|
||||||
|
resLow = context.ShiftRightU32(resLow, Const(shift));
|
||||||
|
resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift)));
|
||||||
|
resHigh = resSigned
|
||||||
|
? context.ShiftRightS32(resHigh, Const(shift))
|
||||||
|
: context.ShiftRightU32(resHigh, Const(shift));
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand res = resLow;
|
||||||
|
|
||||||
|
if (op.Sat)
|
||||||
|
{
|
||||||
|
Operand sign = context.ShiftRightS32(resHigh, Const(31));
|
||||||
|
|
||||||
|
if (resSigned)
|
||||||
|
{
|
||||||
|
Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31)));
|
||||||
|
Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue));
|
||||||
|
res = context.ConditionalSelect(overflow, clampValue, resLow);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operand overflow = context.ICompareNotEqual(resHigh, Const(0));
|
||||||
|
res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetDest(op.Dest), res);
|
||||||
|
|
||||||
|
// TODO: CC.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -13,14 +13,13 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
{
|
{
|
||||||
InstVmnmx op = context.GetOp<InstVmnmx>();
|
InstVmnmx op = context.GetOp<InstVmnmx>();
|
||||||
|
|
||||||
Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
|
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
|
||||||
Operand srcC = GetSrcReg(context, op.SrcC);
|
Operand srcC = GetSrcReg(context, op.SrcC);
|
||||||
|
|
||||||
Operand srcB;
|
Operand srcB;
|
||||||
|
|
||||||
if (op.BVideo)
|
if (op.BVideo)
|
||||||
{
|
{
|
||||||
srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
|
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -124,13 +123,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
{
|
{
|
||||||
InstVsetp op = context.GetOp<InstVsetp>();
|
InstVsetp op = context.GetOp<InstVsetp>();
|
||||||
|
|
||||||
Operand srcA = Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
|
Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
|
||||||
|
|
||||||
Operand srcB;
|
Operand srcB;
|
||||||
|
|
||||||
if (op.BVideo)
|
if (op.BVideo)
|
||||||
{
|
{
|
||||||
srcB = Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
|
srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -181,25 +179,5 @@ namespace Ryujinx.Graphics.Shader.Instructions
|
||||||
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
|
context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
|
||||||
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
|
context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
|
|
||||||
{
|
|
||||||
return type switch
|
|
||||||
{
|
|
||||||
VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
|
|
||||||
VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
|
|
||||||
VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
|
|
||||||
VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
|
|
||||||
VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
|
|
||||||
VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
|
|
||||||
VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
|
|
||||||
VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
|
|
||||||
VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
|
|
||||||
VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
|
|
||||||
VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
|
|
||||||
VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
|
|
||||||
_ => src
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Reference in a new issue