Lower precision of estimate instruction results to match Arm behavior (#1943)
* Lower precision of estimate instruction results to match Arm behavior * PTC version update * Nits
This commit is contained in:
parent
98d0240ce6
commit
dcce407071
2 changed files with 66 additions and 19 deletions
|
@ -1475,9 +1475,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||
{
|
||||
EmitScalarUnaryOpF(context, Intrinsic.X86Rcpss, 0);
|
||||
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1494,9 +1496,16 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||
{
|
||||
EmitVectorUnaryOpF(context, Intrinsic.X86Rcpps, 0);
|
||||
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1652,7 +1661,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
||||
EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1667,7 +1676,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitVectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
||||
EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1682,7 +1691,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitScalarRoundOpF(context, FPRoundingMode.ToNearest);
|
||||
EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1697,7 +1706,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitVectorRoundOpF(context, FPRoundingMode.ToNearest);
|
||||
EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1712,7 +1721,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
||||
EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1727,7 +1736,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitVectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
||||
EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1778,7 +1787,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitScalarRoundOpF(context, FPRoundingMode.TowardsZero);
|
||||
EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1793,7 +1802,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
if (Optimizations.UseSse41)
|
||||
{
|
||||
EmitVectorRoundOpF(context, FPRoundingMode.TowardsZero);
|
||||
EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1810,9 +1819,11 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||
{
|
||||
EmitScalarUnaryOpF(context, Intrinsic.X86Rsqrtss, 0);
|
||||
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true);
|
||||
|
||||
context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1829,9 +1840,16 @@ namespace ARMeilleure.Instructions
|
|||
|
||||
int sizeF = op.Size & 1;
|
||||
|
||||
if (Optimizations.FastFP && Optimizations.UseSse && sizeF == 0)
|
||||
if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0)
|
||||
{
|
||||
EmitVectorUnaryOpF(context, Intrinsic.X86Rsqrtps, 0);
|
||||
Operand res = EmitSse41FP32RoundExp8(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false);
|
||||
|
||||
if (op.RegisterSize == RegisterSize.Simd64)
|
||||
{
|
||||
res = context.VectorZeroUpper64(res);
|
||||
}
|
||||
|
||||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3498,7 +3516,7 @@ namespace ARMeilleure.Instructions
|
|||
return context.ConditionalSelect(cmp, op1, op2);
|
||||
}
|
||||
|
||||
private static void EmitScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
||||
private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
||||
{
|
||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||
|
||||
|
@ -3520,7 +3538,7 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
|
||||
private static void EmitVectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
||||
private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode)
|
||||
{
|
||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||
|
||||
|
@ -3538,6 +3556,35 @@ namespace ARMeilleure.Instructions
|
|||
context.Copy(GetVec(op.Rd), res);
|
||||
}
|
||||
|
||||
private static Operand EmitSse41FP32RoundExp8(ArmEmitterContext context, Operand value, bool scalar)
|
||||
{
|
||||
Operand roundMask;
|
||||
Operand truncMask;
|
||||
Operand expMask;
|
||||
|
||||
if (scalar)
|
||||
{
|
||||
roundMask = X86GetScalar(context, 0x4000);
|
||||
truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000));
|
||||
expMask = X86GetScalar(context, 0x7F800000);
|
||||
}
|
||||
else
|
||||
{
|
||||
roundMask = X86GetAllElements(context, 0x4000);
|
||||
truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000));
|
||||
expMask = X86GetAllElements(context, 0x7F800000);
|
||||
}
|
||||
|
||||
Operand oValue = value;
|
||||
Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask);
|
||||
Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, masked, expMask);
|
||||
|
||||
value = context.AddIntrinsic(Intrinsic.X86Paddw, value, roundMask);
|
||||
value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask);
|
||||
|
||||
return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf);
|
||||
}
|
||||
|
||||
public static void EmitSse2VectorIsNaNOpF(
|
||||
ArmEmitterContext context,
|
||||
Operand opF,
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace ARMeilleure.Translation.PTC
|
|||
{
|
||||
private const string HeaderMagic = "PTChd";
|
||||
|
||||
private const int InternalVersion = 1956; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const int InternalVersion = 1943; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
|
Reference in a new issue