mirror of
https://github.com/GreemDev/Ryujinx.git
synced 2025-01-09 08:52:00 +00:00
Fix Fcmge_S/V & Fcmgt_S/V Inst.s (#815)
* Fix Fcmge_S/V & Fcmgt_S/V. Follow-up Fcm**_S/V & Fc*mp*_S. Improve CmpCondition enum. Nits. * Optimize Fccmp*_S & Fcmp*_S. * Fix cvtsd2si opcode. * Address PR feedback.
This commit is contained in:
parent
2ea8d5bd5f
commit
eefe2b20fc
11 changed files with 92 additions and 62 deletions
|
@ -94,7 +94,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex));
|
Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex));
|
||||||
Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||||
Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||||
Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
|
||||||
Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||||
|
|
|
@ -265,7 +265,21 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
|
Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
|
||||||
|
|
||||||
context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
|
if (intrinOp.Intrinsic == Intrinsic.X86Cvtsi2si)
|
||||||
|
{
|
||||||
|
if (dest.Type == OperandType.I32)
|
||||||
|
{
|
||||||
|
context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32
|
||||||
|
}
|
||||||
|
else /* if (dest.Type == OperandType.I64) */
|
||||||
|
{
|
||||||
|
context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,7 +26,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
public static bool ForceLegacySse { get; set; }
|
public static bool ForceLegacySse { get; set; }
|
||||||
|
|
||||||
public static bool SupportsVexEncoding => !ForceLegacySse && SupportsAvx;
|
public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
|
||||||
|
|
||||||
static HardwareCapabilities()
|
static HardwareCapabilities()
|
||||||
{
|
{
|
||||||
|
|
|
@ -37,6 +37,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
|
Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
|
||||||
Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
|
Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
|
||||||
Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
|
Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
|
||||||
|
Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr));
|
||||||
Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
|
Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
|
Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
|
Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
|
||||||
|
|
|
@ -322,7 +322,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Fcmge_S(ArmEmitterContext context)
|
public static void Fcmge_S(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseAvx)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true);
|
||||||
}
|
}
|
||||||
|
@ -334,7 +334,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Fcmge_V(ArmEmitterContext context)
|
public static void Fcmge_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseAvx)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false);
|
||||||
}
|
}
|
||||||
|
@ -346,7 +346,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Fcmgt_S(ArmEmitterContext context)
|
public static void Fcmgt_S(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseAvx)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true);
|
||||||
}
|
}
|
||||||
|
@ -358,7 +358,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
public static void Fcmgt_V(ArmEmitterContext context)
|
public static void Fcmgt_V(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseAvx)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false);
|
||||||
}
|
}
|
||||||
|
@ -372,7 +372,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -384,7 +384,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThanOrEqual, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -396,7 +396,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: true, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -408,7 +408,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && Optimizations.UseSse2)
|
||||||
{
|
{
|
||||||
EmitCmpSseOrSse2OpF(context, CmpCondition.GreaterThan, scalar: false, isLeOrLt: true);
|
EmitCmpSseOrSse2OpF(context, CmpCondition.LessThan, scalar: false);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -426,7 +426,7 @@ namespace ARMeilleure.Instructions
|
||||||
EmitFcmpOrFcmpe(context, signalNaNs: true);
|
EmitFcmpOrFcmpe(context, signalNaNs: true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
|
private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs)
|
||||||
{
|
{
|
||||||
OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
|
OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp;
|
||||||
|
|
||||||
|
@ -435,7 +435,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
|
context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond));
|
||||||
|
|
||||||
EmitSetNzcv(context, Const(op.Nzcv));
|
EmitSetNzcv(context, op.Nzcv);
|
||||||
|
|
||||||
context.Branch(lblEnd);
|
context.Branch(lblEnd);
|
||||||
|
|
||||||
|
@ -446,27 +446,47 @@ namespace ARMeilleure.Instructions
|
||||||
context.MarkLabel(lblEnd);
|
context.MarkLabel(lblEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void EmitSetNzcv(ArmEmitterContext context, int nzcv)
|
||||||
|
{
|
||||||
|
Operand Extract(int value, int bit)
|
||||||
|
{
|
||||||
|
if (bit != 0)
|
||||||
|
{
|
||||||
|
value >>= bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
value &= 1;
|
||||||
|
|
||||||
|
return Const(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
SetFlag(context, PState.VFlag, Extract(nzcv, 0));
|
||||||
|
SetFlag(context, PState.CFlag, Extract(nzcv, 1));
|
||||||
|
SetFlag(context, PState.ZFlag, Extract(nzcv, 2));
|
||||||
|
SetFlag(context, PState.NFlag, Extract(nzcv, 3));
|
||||||
|
}
|
||||||
|
|
||||||
private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
|
private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs)
|
||||||
{
|
{
|
||||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||||
|
|
||||||
const int cmpOrdered = 7;
|
|
||||||
|
|
||||||
bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
|
bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false;
|
||||||
|
|
||||||
if (Optimizations.FastFP && Optimizations.UseSse2)
|
if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
|
||||||
{
|
{
|
||||||
Operand n = GetVec(op.Rn);
|
Operand n = GetVec(op.Rn);
|
||||||
Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
|
Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm);
|
||||||
|
|
||||||
|
CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
|
||||||
|
|
||||||
Operand lblNaN = Label();
|
Operand lblNaN = Label();
|
||||||
Operand lblEnd = Label();
|
Operand lblEnd = Label();
|
||||||
|
|
||||||
if (op.Size == 0)
|
if (op.Size == 0)
|
||||||
{
|
{
|
||||||
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const(cmpOrdered));
|
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
|
||||||
|
|
||||||
Operand isOrdered = context.VectorExtract16(ordMask, 0);
|
Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
|
||||||
|
|
||||||
context.BranchIfFalse(lblNaN, isOrdered);
|
context.BranchIfFalse(lblNaN, isOrdered);
|
||||||
|
|
||||||
|
@ -481,9 +501,9 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
else /* if (op.Size == 1) */
|
else /* if (op.Size == 1) */
|
||||||
{
|
{
|
||||||
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const(cmpOrdered));
|
Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
|
||||||
|
|
||||||
Operand isOrdered = context.VectorExtract16(ordMask, 0);
|
Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
|
||||||
|
|
||||||
context.BranchIfFalse(lblNaN, isOrdered);
|
context.BranchIfFalse(lblNaN, isOrdered);
|
||||||
|
|
||||||
|
@ -653,18 +673,7 @@ namespace ARMeilleure.Instructions
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
||||||
private enum CmpCondition
|
private static void EmitCmpSseOrSse2OpF(ArmEmitterContext context, CmpCondition cond, bool scalar)
|
||||||
{
|
|
||||||
Equal = 0,
|
|
||||||
GreaterThanOrEqual = 5,
|
|
||||||
GreaterThan = 6
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void EmitCmpSseOrSse2OpF(
|
|
||||||
ArmEmitterContext context,
|
|
||||||
CmpCondition cond,
|
|
||||||
bool scalar,
|
|
||||||
bool isLeOrLt = false)
|
|
||||||
{
|
{
|
||||||
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
||||||
|
|
||||||
|
@ -677,9 +686,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
|
Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps;
|
||||||
|
|
||||||
Operand res = isLeOrLt
|
Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||||
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
|
||||||
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
|
||||||
|
|
||||||
if (scalar)
|
if (scalar)
|
||||||
{
|
{
|
||||||
|
@ -696,9 +703,7 @@ namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
|
Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd;
|
||||||
|
|
||||||
Operand res = isLeOrLt
|
Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond));
|
||||||
? context.AddIntrinsic(inst, m, n, Const((int)cond))
|
|
||||||
: context.AddIntrinsic(inst, n, m, Const((int)cond));
|
|
||||||
|
|
||||||
if (scalar)
|
if (scalar)
|
||||||
{
|
{
|
||||||
|
|
|
@ -732,8 +732,7 @@ namespace ARMeilleure.Instructions
|
||||||
Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
|
Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
|
||||||
Debug.Assert((uint)size < 2);
|
Debug.Assert((uint)size < 2);
|
||||||
|
|
||||||
OperandType type = size == 0 ? OperandType.FP32
|
OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
|
||||||
: OperandType.FP64;
|
|
||||||
|
|
||||||
if (signed)
|
if (signed)
|
||||||
{
|
{
|
||||||
|
@ -837,15 +836,12 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand n = GetVec(op.Rn);
|
Operand n = GetVec(op.Rn);
|
||||||
|
|
||||||
const int cmpGreaterThanOrEqual = 5;
|
|
||||||
const int cmpOrdered = 7;
|
|
||||||
|
|
||||||
// sizeF == ((OpCodeSimdShImm64)op).Size - 2
|
// sizeF == ((OpCodeSimdShImm64)op).Size - 2
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
if (sizeF == 0)
|
if (sizeF == 0)
|
||||||
{
|
{
|
||||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
|
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
||||||
|
|
||||||
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
||||||
|
|
||||||
|
@ -867,7 +863,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
|
Operand mask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
|
||||||
|
|
||||||
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const(cmpGreaterThanOrEqual));
|
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, mask, Const((int)CmpCondition.NotLessThan));
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
|
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
|
||||||
|
|
||||||
|
@ -884,7 +880,7 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
else /* if (sizeF == 1) */
|
else /* if (sizeF == 1) */
|
||||||
{
|
{
|
||||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
|
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
||||||
|
|
||||||
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
||||||
|
|
||||||
|
@ -920,7 +916,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
|
Operand mask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
|
||||||
|
|
||||||
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const(cmpGreaterThanOrEqual));
|
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, mask, Const((int)CmpCondition.NotLessThan));
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
|
Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, mask2);
|
||||||
|
|
||||||
|
@ -939,16 +935,12 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand n = GetVec(op.Rn);
|
Operand n = GetVec(op.Rn);
|
||||||
|
|
||||||
const int cmpGreaterThanOrEqual = 5;
|
|
||||||
const int cmpGreaterThan = 6;
|
|
||||||
const int cmpOrdered = 7;
|
|
||||||
|
|
||||||
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
||||||
int sizeF = op.Size & 1;
|
int sizeF = op.Size & 1;
|
||||||
|
|
||||||
if (sizeF == 0)
|
if (sizeF == 0)
|
||||||
{
|
{
|
||||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const(cmpOrdered));
|
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
||||||
|
|
||||||
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
||||||
|
|
||||||
|
@ -966,7 +958,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
|
Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundps, nScaled, Const(X86GetRoundControl(roundMode)));
|
||||||
|
|
||||||
Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const(cmpGreaterThan));
|
Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
|
||||||
|
|
||||||
Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
|
Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
|
||||||
|
|
||||||
|
@ -976,13 +968,13 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask);
|
Operand res = context.AddIntrinsic(Intrinsic.X86Subps, nRndMasked, mask);
|
||||||
|
|
||||||
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const(cmpGreaterThan));
|
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
|
||||||
|
|
||||||
Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
|
Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
|
||||||
|
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked);
|
res = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, resMasked);
|
||||||
|
|
||||||
Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const(cmpGreaterThanOrEqual));
|
Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmpps, resMasked, mask, Const((int)CmpCondition.NotLessThan));
|
||||||
|
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
|
res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt);
|
res = context.AddIntrinsic(Intrinsic.X86Paddd, res, nInt);
|
||||||
|
@ -1000,7 +992,7 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
else /* if (sizeF == 1) */
|
else /* if (sizeF == 1) */
|
||||||
{
|
{
|
||||||
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const(cmpOrdered));
|
Operand nMask = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
||||||
|
|
||||||
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
Operand nScaled = context.AddIntrinsic(Intrinsic.X86Pand, nMask, n);
|
||||||
|
|
||||||
|
@ -1018,7 +1010,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
|
Operand nRnd = context.AddIntrinsic(Intrinsic.X86Roundpd, nScaled, Const(X86GetRoundControl(roundMode)));
|
||||||
|
|
||||||
Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const(cmpGreaterThan));
|
Operand nRndMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nRnd, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
|
||||||
|
|
||||||
Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
|
Operand nRndMasked = context.AddIntrinsic(Intrinsic.X86Pand, nRnd, nRndMask);
|
||||||
|
|
||||||
|
@ -1042,7 +1034,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask);
|
Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, nRndMasked, mask);
|
||||||
|
|
||||||
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const(cmpGreaterThan));
|
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, res, context.VectorZero(), Const((int)CmpCondition.NotLessThanOrEqual));
|
||||||
|
|
||||||
Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
|
Operand resMasked = context.AddIntrinsic(Intrinsic.X86Pand, res, mask2);
|
||||||
|
|
||||||
|
@ -1056,7 +1048,7 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
res = EmitVectorLongCreate(context, low, high);
|
res = EmitVectorLongCreate(context, low, high);
|
||||||
|
|
||||||
Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const(cmpGreaterThanOrEqual));
|
Operand mask3 = context.AddIntrinsic(Intrinsic.X86Cmppd, resMasked, mask, Const((int)CmpCondition.NotLessThan));
|
||||||
|
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
|
res = context.AddIntrinsic(Intrinsic.X86Pxor, res, mask3);
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt);
|
res = context.AddIntrinsic(Intrinsic.X86Paddq, res, nInt);
|
||||||
|
|
|
@ -1108,6 +1108,21 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public enum CmpCondition
|
||||||
|
{
|
||||||
|
// Legacy Sse.
|
||||||
|
Equal = 0, // Ordered, non-signaling.
|
||||||
|
LessThan = 1, // Ordered, signaling.
|
||||||
|
LessThanOrEqual = 2, // Ordered, signaling.
|
||||||
|
NotLessThan = 5, // Unordered, signaling.
|
||||||
|
NotLessThanOrEqual = 6, // Unordered, signaling.
|
||||||
|
OrderedQ = 7, // Non-signaling.
|
||||||
|
|
||||||
|
// Vex.
|
||||||
|
GreaterThanOrEqual = 13, // Ordered, signaling.
|
||||||
|
GreaterThan = 14, // Ordered, signaling.
|
||||||
|
OrderedS = 23 // Signaling.
|
||||||
|
}
|
||||||
|
|
||||||
[Flags]
|
[Flags]
|
||||||
public enum SaturatingFlags
|
public enum SaturatingFlags
|
||||||
|
|
|
@ -26,6 +26,7 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||||
X86Cvtps2pd,
|
X86Cvtps2pd,
|
||||||
X86Cvtsd2si,
|
X86Cvtsd2si,
|
||||||
X86Cvtsd2ss,
|
X86Cvtsd2ss,
|
||||||
|
X86Cvtsi2si,
|
||||||
X86Cvtss2sd,
|
X86Cvtss2sd,
|
||||||
X86Divpd,
|
X86Divpd,
|
||||||
X86Divps,
|
X86Divps,
|
||||||
|
|
|
@ -15,6 +15,7 @@ namespace ARMeilleure
|
||||||
public static bool UseSse41IfAvailable { get; set; } = true;
|
public static bool UseSse41IfAvailable { get; set; } = true;
|
||||||
public static bool UseSse42IfAvailable { get; set; } = true;
|
public static bool UseSse42IfAvailable { get; set; } = true;
|
||||||
public static bool UsePopCntIfAvailable { get; set; } = true;
|
public static bool UsePopCntIfAvailable { get; set; } = true;
|
||||||
|
public static bool UseAvxIfAvailable { get; set; } = true;
|
||||||
|
|
||||||
public static bool ForceLegacySse
|
public static bool ForceLegacySse
|
||||||
{
|
{
|
||||||
|
@ -29,5 +30,6 @@ namespace ARMeilleure
|
||||||
internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
|
internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
|
||||||
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
|
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
|
||||||
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
|
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
|
||||||
|
internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -22,7 +22,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.3.0" />
|
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.4.0" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.3.0" />
|
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.4.0" />
|
||||||
<PackageReference Include="NUnit" Version="3.12.0" />
|
<PackageReference Include="NUnit" Version="3.12.0" />
|
||||||
<PackageReference Include="NUnit3TestAdapter" Version="3.15.1" />
|
<PackageReference Include="NUnit3TestAdapter" Version="3.15.1" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
Loading…
Reference in a new issue