Implement CPU FCVT Half <-> Double conversion variants (#3439)
* Half <-> Double conversion support * Add tests, fast path and deduplicate SoftFloat code * PPTC version
This commit is contained in:
parent
b46b63e06a
commit
f7ef6364b7
5 changed files with 729 additions and 406 deletions
|
@ -105,11 +105,48 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
|
else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
|
||||||
{
|
{
|
||||||
throw new NotImplementedException("Double-precision to half-precision.");
|
if (Optimizations.UseF16c)
|
||||||
|
{
|
||||||
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
||||||
|
|
||||||
|
Operand n = GetVec(op.Rn);
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
|
||||||
|
res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), res);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
|
||||||
|
|
||||||
|
Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne);
|
||||||
|
|
||||||
|
res = context.ZeroExtend16(OperandType.I64, res);
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (op.Size == 3 && op.Opc == 1) // Double -> Half.
|
else if (op.Size == 3 && op.Opc == 1) // Half -> Double.
|
||||||
{
|
{
|
||||||
throw new NotImplementedException("Half-precision to double-precision.");
|
if (Optimizations.UseF16c)
|
||||||
|
{
|
||||||
|
Operand n = GetVec(op.Rn);
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
|
||||||
|
res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
|
||||||
|
res = context.VectorZeroUpper64(res);
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), res);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
|
||||||
|
|
||||||
|
Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne);
|
||||||
|
|
||||||
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else // Invalid encoding.
|
else // Invalid encoding.
|
||||||
{
|
{
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -206,6 +206,7 @@ namespace ARMeilleure.Translation
|
||||||
SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ)));
|
SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedSrcUnsignedDstSatQ)));
|
||||||
|
|
||||||
SetDelegateInfo(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)));
|
SetDelegateInfo(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)));
|
||||||
|
SetDelegateInfo(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)));
|
||||||
|
|
||||||
SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAdd)));
|
SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAdd)));
|
||||||
SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAddFpscr))); // A32 only.
|
SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAddFpscr))); // A32 only.
|
||||||
|
@ -294,6 +295,8 @@ namespace ARMeilleure.Translation
|
||||||
SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtStepFused)));
|
SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtStepFused)));
|
||||||
SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSqrt)));
|
SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSqrt)));
|
||||||
SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSub)));
|
SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSub)));
|
||||||
|
|
||||||
|
SetDelegateInfo(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||||
|
|
||||||
private const uint InternalVersion = 3362; //! To be incremented manually for each change to the ARMeilleure project.
|
private const uint InternalVersion = 3439; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
|
|
@ -825,6 +825,14 @@ namespace Ryujinx.Tests.Cpu
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static uint[] _F_Cvt_S_DH_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0x1E63C020u // FCVT H0, D1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private static uint[] _F_Cvt_S_HS_()
|
private static uint[] _F_Cvt_S_HS_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
|
@ -833,6 +841,14 @@ namespace Ryujinx.Tests.Cpu
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static uint[] _F_Cvt_S_HD_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0x1EE2C020u // FCVT D0, H1
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
private static uint[] _F_Cvt_ANZ_SU_S_S_()
|
private static uint[] _F_Cvt_ANZ_SU_S_S_()
|
||||||
{
|
{
|
||||||
return new uint[]
|
return new uint[]
|
||||||
|
@ -1998,6 +2014,22 @@ namespace Ryujinx.Tests.Cpu
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise] [Explicit]
|
||||||
|
public void F_Cvt_S_DH([ValueSource("_F_Cvt_S_DH_")] uint opcodes,
|
||||||
|
[ValueSource("_1D_F_")] ulong a,
|
||||||
|
[Values(RMode.Rn)] RMode rMode)
|
||||||
|
{
|
||||||
|
ulong z = TestContext.CurrentContext.Random.NextULong();
|
||||||
|
V128 v0 = MakeVectorE0E1(z, z);
|
||||||
|
V128 v1 = MakeVectorE0(a);
|
||||||
|
|
||||||
|
int fpcr = (int)rMode << (int)Fpcr.RMode;
|
||||||
|
|
||||||
|
SingleOpcode(opcodes, v0: v0, v1: v1, fpcr: fpcr);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
[Test, Pairwise] [Explicit]
|
[Test, Pairwise] [Explicit]
|
||||||
public void F_Cvt_S_HS([ValueSource("_F_Cvt_S_HS_")] uint opcodes,
|
public void F_Cvt_S_HS([ValueSource("_F_Cvt_S_HS_")] uint opcodes,
|
||||||
[ValueSource("_1H_F_")] ulong a)
|
[ValueSource("_1H_F_")] ulong a)
|
||||||
|
@ -2011,6 +2043,19 @@ namespace Ryujinx.Tests.Cpu
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise] [Explicit]
|
||||||
|
public void F_Cvt_S_HD([ValueSource("_F_Cvt_S_HD_")] uint opcodes,
|
||||||
|
[ValueSource("_1H_F_")] ulong a)
|
||||||
|
{
|
||||||
|
ulong z = TestContext.CurrentContext.Random.NextULong();
|
||||||
|
V128 v0 = MakeVectorE0E1(z, z);
|
||||||
|
V128 v1 = MakeVectorE0(a);
|
||||||
|
|
||||||
|
SingleOpcode(opcodes, v0: v0, v1: v1);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
[Test, Pairwise] [Explicit]
|
[Test, Pairwise] [Explicit]
|
||||||
public void F_Cvt_ANZ_SU_S_S([ValueSource("_F_Cvt_ANZ_SU_S_S_")] uint opcodes,
|
public void F_Cvt_ANZ_SU_S_S([ValueSource("_F_Cvt_ANZ_SU_S_S_")] uint opcodes,
|
||||||
[ValueSource("_1S_F_W_")] ulong a)
|
[ValueSource("_1S_F_W_")] ulong a)
|
||||||
|
|
Reference in a new issue