0
0
Fork 0

Implement remaining shader double-precision instructions (#2845)

* Implement remaining shader double-precision instructions

* Shader cache version bump
This commit is contained in:
gdkchan 2021-12-08 17:54:12 -03:00 committed by GitHub
parent a0aa87366c
commit 650cc41c02
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 282 additions and 121 deletions

View file

@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <summary>
/// Version of the codegen (to be changed when codegen or guest format change).
/// </summary>
private const ulong ShaderCodeGenVersion = 2876;
private const ulong ShaderCodeGenVersion = 2845;
// Progress reporting helpers
private volatile int _shaderCount;

View file

@ -35,8 +35,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
VariableType type = GetSrcVarType(operation.Inst, 0);
string srcExpr = GetSoureExpr(context, src, type);
string zero;
NumberFormatter.TryFormat(0, type, out string zero);
if (type == VariableType.F64)
{
zero = "0.0";
}
else
{
NumberFormatter.TryFormat(0, type, out zero);
}
// Starting in the 496.13 NVIDIA driver, there's an issue with assigning variables to negated expressions.
// (-expr) does not work, but (0.0 - expr) does. This should be removed once the issue is resolved.

View file

@ -10,7 +10,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static bool TryFormat(int value, VariableType dstType, out string formatted)
{
if (dstType == VariableType.F32 || dstType == VariableType.F64)
if (dstType == VariableType.F32)
{
return TryFormatFloat(BitConverter.Int32BitsToSingle(value), out formatted);
}

View file

@ -75,69 +75,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented.");
}
public static void DmnmxR(EmitterContext context)
{
InstDmnmxR op = context.GetOp<InstDmnmxR>();
context.Config.GpuAccessor.Log("Shader instruction DmnmxR is not implemented.");
}
public static void DmnmxI(EmitterContext context)
{
InstDmnmxI op = context.GetOp<InstDmnmxI>();
context.Config.GpuAccessor.Log("Shader instruction DmnmxI is not implemented.");
}
public static void DmnmxC(EmitterContext context)
{
InstDmnmxC op = context.GetOp<InstDmnmxC>();
context.Config.GpuAccessor.Log("Shader instruction DmnmxC is not implemented.");
}
public static void DsetR(EmitterContext context)
{
InstDsetR op = context.GetOp<InstDsetR>();
context.Config.GpuAccessor.Log("Shader instruction DsetR is not implemented.");
}
public static void DsetI(EmitterContext context)
{
InstDsetI op = context.GetOp<InstDsetI>();
context.Config.GpuAccessor.Log("Shader instruction DsetI is not implemented.");
}
public static void DsetC(EmitterContext context)
{
InstDsetC op = context.GetOp<InstDsetC>();
context.Config.GpuAccessor.Log("Shader instruction DsetC is not implemented.");
}
public static void DsetpR(EmitterContext context)
{
InstDsetpR op = context.GetOp<InstDsetpR>();
context.Config.GpuAccessor.Log("Shader instruction DsetpR is not implemented.");
}
public static void DsetpI(EmitterContext context)
{
InstDsetpI op = context.GetOp<InstDsetpI>();
context.Config.GpuAccessor.Log("Shader instruction DsetpI is not implemented.");
}
public static void DsetpC(EmitterContext context)
{
InstDsetpC op = context.GetOp<InstDsetpC>();
context.Config.GpuAccessor.Log("Shader instruction DsetpC is not implemented.");
}
public static void FchkR(EmitterContext context)
{
InstFchkR op = context.GetOp<InstFchkR>();

View file

@ -98,7 +98,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
var src = GetSrcReg(context, op.SrcB);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
public static void I2iI(EmitterContext context)
@ -107,7 +107,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
public static void I2iC(EmitterContext context)
@ -116,7 +116,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat);
EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
}
private static void EmitF2F(
@ -176,7 +176,6 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (dstType == IDstFmt.U64)
{
context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I.");
return;
}
Instruction fpType = srcType.ToInstFPType();
@ -198,7 +197,9 @@ namespace Ryujinx.Graphics.Shader.Instructions
if (!isSignedInt)
{
// Negative float to uint cast is undefined, so we clamp the value before conversion.
srcB = context.FPMaximum(srcB, ConstF(0), fpType);
Operand c0 = srcType == DstFmt.F64 ? context.PackDouble2x32(0.0) : ConstF(0);
srcB = context.FPMaximum(srcB, c0, fpType);
}
if (srcType == DstFmt.F64)
@ -292,7 +293,8 @@ namespace Ryujinx.Graphics.Shader.Instructions
int rd,
bool absolute,
bool negate,
bool saturate)
bool saturate,
bool writeCC)
{
if ((srcType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32 || (dstType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32)
{
@ -337,7 +339,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(GetDest(rd), src);
// TODO: CC.
SetZnFlags(context, src, writeCC);
}
private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg)

View file

@ -528,18 +528,5 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
}
private static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64)
{
if (isFP64)
{
context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value));
context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value));
}
else
{
context.Copy(GetDest(rd), value);
}
}
}
}

View file

@ -11,6 +11,156 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DsetR(EmitterContext context)
{
InstDsetR op = context.GetOp<InstDsetR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetI(EmitterContext context)
{
InstDsetI op = context.GetOp<InstDsetI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetC(EmitterContext context)
{
InstDsetC op = context.GetOp<InstDsetC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFset(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.Dest,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
op.BVal,
op.WriteCC,
isFP64: true);
}
public static void DsetpR(EmitterContext context)
{
InstDsetpR op = context.GetOp<InstDsetpR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void DsetpI(EmitterContext context)
{
InstDsetpI op = context.GetOp<InstDsetpI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void DsetpC(EmitterContext context)
{
InstDsetpC op = context.GetOp<InstDsetpC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
EmitFsetp(
context,
op.FComp,
op.Bop,
srcA,
srcB,
op.SrcPred,
op.SrcPredInv,
op.DestPred,
op.DestPredInv,
op.AbsA,
op.AbsB,
op.NegA,
op.NegB,
writeCC: false,
isFP64: true);
}
public static void FcmpR(EmitterContext context)
{
InstFcmpR op = context.GetOp<InstFcmpR>();
@ -240,12 +390,15 @@ namespace Ryujinx.Graphics.Shader.Instructions
bool negateA,
bool negateB,
bool boolFloat,
bool writeCC)
bool writeCC,
bool isFP64 = false)
{
srcA = context.FPAbsNeg(srcA, absoluteA, negateA);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB);
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
Operand res = GetFPComparison(context, cmpOp, srcA, srcB);
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
res = GetPredLogicalOp(context, logicOp, res, pred);
@ -282,12 +435,15 @@ namespace Ryujinx.Graphics.Shader.Instructions
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC)
bool writeCC,
bool isFP64 = false)
{
srcA = context.FPAbsNeg(srcA, absoluteA, negateA);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB);
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB);
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
Operand p1Res = context.BitwiseNot(p0Res);
Operand pred = GetPredicate(context, srcPred, srcPredInv);
@ -367,7 +523,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
}
private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB)
private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB, Instruction fpType = Instruction.FP32)
{
Operand res;
@ -381,7 +537,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
else if (cond == FComp.Nan || cond == FComp.Num)
{
res = context.BitwiseOr(context.IsNan(srcA), context.IsNan(srcB));
res = context.BitwiseOr(context.IsNan(srcA, fpType), context.IsNan(srcB, fpType));
if (cond == FComp.Num)
{
@ -404,12 +560,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
default: throw new ArgumentException($"Unexpected condition \"{cond}\".");
}
res = context.Add(inst | Instruction.FP32, Local(), srcA, srcB);
res = context.Add(inst | fpType, Local(), srcA, srcB);
if ((cond & FComp.Nan) != 0)
{
res = context.BitwiseOr(res, context.IsNan(srcA));
res = context.BitwiseOr(res, context.IsNan(srcB));
res = context.BitwiseOr(res, context.IsNan(srcA, fpType));
res = context.BitwiseOr(res, context.IsNan(srcB, fpType));
}
}

View file

@ -9,6 +9,39 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
static partial class InstEmit
{
public static void DmnmxR(EmitterContext context)
{
InstDmnmxR op = context.GetOp<InstDmnmxR>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void DmnmxI(EmitterContext context)
{
InstDmnmxI op = context.GetOp<InstDmnmxI>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void DmnmxC(EmitterContext context)
{
InstDmnmxC op = context.GetOp<InstDmnmxC>();
var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
}
public static void FmnmxR(EmitterContext context)
{
InstFmnmxR op = context.GetOp<InstFmnmxR>();
@ -52,19 +85,22 @@ namespace Ryujinx.Graphics.Shader.Instructions
bool absoluteB,
bool negateA,
bool negateB,
bool writeCC)
bool writeCC,
bool isFP64 = false)
{
srcA = context.FPAbsNeg(srcA, absoluteA, negateA);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB);
Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
Operand resMin = context.FPMinimum(srcA, srcB);
Operand resMax = context.FPMaximum(srcA, srcB);
srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
Operand dest = GetDest(rd);
Operand resMin = context.FPMinimum(srcA, srcB, fpType);
Operand resMax = context.FPMaximum(srcA, srcB, fpType);
context.Copy(dest, context.ConditionalSelect(srcPred, resMin, resMax));
Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
SetFPZnFlags(context, dest, writeCC);
SetDest(context, res, rd, isFP64);
SetFPZnFlags(context, res, writeCC, fpType);
}
}
}

View file

@ -58,7 +58,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
if (isFP64)
{
return context.FP32ConvertToFP64(Const(imm));
return context.PackDouble2x32(Const(0), Const(imm));
}
else
{
@ -218,6 +218,19 @@ namespace Ryujinx.Graphics.Shader.Instructions
return local;
}
public static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64)
{
if (isFP64)
{
context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value));
context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value));
}
else
{
context.Copy(GetDest(rd), value);
}
}
public static int Imm16ToSInt(int imm16)
{
return (short)imm16;

View file

@ -61,11 +61,23 @@ namespace Ryujinx.Graphics.Shader.Instructions
res = context.FPReciprocalSquareRoot(res);
break;
case MufuOp.Rcp64h:
res = context.PackDouble2x32(OperandHelper.Const(0), res);
res = context.UnpackDouble2x32High(context.FPReciprocal(res, Instruction.FP64));
break;
case MufuOp.Rsq64h:
res = context.PackDouble2x32(OperandHelper.Const(0), res);
res = context.UnpackDouble2x32High(context.FPReciprocalSquareRoot(res, Instruction.FP64));
break;
case MufuOp.Sqrt:
res = context.FPSquareRoot(res);
break;
default: /* TODO */ break;
default:
context.Config.GpuAccessor.Log($"Invalid MUFU operation \"{op.MufuOp}\".");
break;
}
context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat));

View file

@ -87,7 +87,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.ImageLoad, VariableType.F32);
Add(Instruction.ImageStore, VariableType.None);
Add(Instruction.ImageAtomic, VariableType.S32);
Add(Instruction.IsNan, VariableType.Bool, VariableType.F32);
Add(Instruction.IsNan, VariableType.Bool, VariableType.Scalar);
Add(Instruction.LoadAttribute, VariableType.F32, VariableType.S32, VariableType.S32, VariableType.S32);
Add(Instruction.LoadConstant, VariableType.F32, VariableType.S32, VariableType.S32);
Add(Instruction.LoadGlobal, VariableType.U32, VariableType.S32, VariableType.S32);

View file

@ -1,4 +1,5 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System;
using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
@ -271,9 +272,9 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.FP32 | Instruction.Cosine, Local(), a);
}
public static Operand FPDivide(this EmitterContext context, Operand a, Operand b)
public static Operand FPDivide(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
{
return context.Add(Instruction.FP32 | Instruction.Divide, Local(), a, b);
return context.Add(fpType | Instruction.Divide, Local(), a, b);
}
public static Operand FPExponentB2(this EmitterContext context, Operand a)
@ -301,9 +302,9 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(fpType | Instruction.Maximum, Local(), a, b);
}
public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b)
public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
{
return context.Add(Instruction.FP32 | Instruction.Minimum, Local(), a, b);
return context.Add(fpType | Instruction.Minimum, Local(), a, b);
}
public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
@ -326,14 +327,14 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(fpType | Instruction.Negate, Local(), a);
}
public static Operand FPReciprocal(this EmitterContext context, Operand a)
public static Operand FPReciprocal(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
{
return context.FPDivide(ConstF(1), a);
return context.FPDivide(fpType == Instruction.FP64 ? context.PackDouble2x32(1.0) : ConstF(1), a, fpType);
}
public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a)
public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
{
return context.Add(Instruction.FP32 | Instruction.ReciprocalSquareRoot, Local(), a);
return context.Add(fpType | Instruction.ReciprocalSquareRoot, Local(), a);
}
public static Operand FPRound(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
@ -353,7 +354,9 @@ namespace Ryujinx.Graphics.Shader.Translation
public static Operand FPSaturate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
{
return context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1));
return fpType == Instruction.FP64
? context.Add(fpType | Instruction.Clamp, Local(), a, context.PackDouble2x32(0.0), context.PackDouble2x32(1.0))
: context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1));
}
public static Operand FPSine(this EmitterContext context, Operand a)
@ -541,9 +544,9 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.Subtract, Local(), a, b);
}
public static Operand IsNan(this EmitterContext context, Operand a)
public static Operand IsNan(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
{
return context.Add(Instruction.IsNan, Local(), a);
return context.Add(fpType | Instruction.IsNan, Local(), a);
}
public static Operand LoadAttribute(this EmitterContext context, Operand a, Operand b, Operand c)
@ -595,6 +598,13 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.MultiplyHighU32, Local(), a, b);
}
public static Operand PackDouble2x32(this EmitterContext context, double value)
{
long valueAsLong = BitConverter.DoubleToInt64Bits(value);
return context.Add(Instruction.PackDouble2x32, Local(), Const((int)valueAsLong), Const((int)(valueAsLong >> 32)));
}
public static Operand PackDouble2x32(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.PackDouble2x32, Local(), a, b);