CPU: Implement VFNMA.F32 | F.64 (#1783)
* Implement VFNMA.F<32/64> * Update PTC Version * Update Implementation & Renames & Correct Order * Fix alignment * Update implementation to not trigger assert * Actually use the intrinsic that makes sense :)
This commit is contained in:
parent
567ea726e1
commit
36f6bbf5b9
9 changed files with 453 additions and 369 deletions
|
@ -273,10 +273,18 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfnmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfnmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
|
||||
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
|
||||
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||
|
|
|
@ -166,10 +166,18 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
|
||||
Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
|
||||
Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmsub231pd, new IntrinsicInfo(X86Instruction.Vfmsub231pd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmsub231ps, new IntrinsicInfo(X86Instruction.Vfmsub231ps, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmsub231pd, new IntrinsicInfo(X86Instruction.Vfnmsub231pd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmsub231ps, new IntrinsicInfo(X86Instruction.Vfnmsub231ps, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
|
||||
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
|
||||
Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
|
||||
}
|
||||
|
|
|
@ -203,10 +203,18 @@ namespace ARMeilleure.CodeGen.X86
|
|||
Vblendvps,
|
||||
Vcvtph2ps,
|
||||
Vcvtps2ph,
|
||||
Vfmadd231pd,
|
||||
Vfmadd231ps,
|
||||
Vfmadd231sd,
|
||||
Vfmadd231ss,
|
||||
Vfmsub231ps,
|
||||
Vfmsub231pd,
|
||||
Vfmsub231ss,
|
||||
Vfmsub231sd,
|
||||
Vfnmsub231ps,
|
||||
Vfnmsub231pd,
|
||||
Vfnmsub231ss,
|
||||
Vfnmsub231sd,
|
||||
Vpblendvb,
|
||||
Xor,
|
||||
Xorpd,
|
||||
|
|
|
@ -821,6 +821,7 @@ namespace ARMeilleure.Decoders
|
|||
SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create);
|
||||
SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create);
|
||||
SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create);
|
||||
SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create);
|
||||
SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create);
|
||||
SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create);
|
||||
SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);
|
||||
|
|
|
@ -284,6 +284,21 @@ namespace ARMeilleure.Instructions
|
|||
}
|
||||
}
|
||||
|
||||
public static void Vfnma_S(ArmEmitterContext context) // Fused.
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseFma)
|
||||
{
|
||||
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
|
||||
{
|
||||
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), context.Negate(op1), context.Negate(op2), op3);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public static void Vfnms_S(ArmEmitterContext context) // Fused.
|
||||
{
|
||||
if (Optimizations.FastFP && Optimizations.UseFma)
|
||||
|
|
|
@ -571,6 +571,7 @@ namespace ARMeilleure.Instructions
|
|||
Vext,
|
||||
Vfma,
|
||||
Vfms,
|
||||
Vfnma,
|
||||
Vfnms,
|
||||
Vhadd,
|
||||
Vld1,
|
||||
|
|
|
@ -155,10 +155,18 @@ namespace ARMeilleure.IntermediateRepresentation
|
|||
X86Unpcklps,
|
||||
X86Vcvtph2ps,
|
||||
X86Vcvtps2ph,
|
||||
X86Vfmadd231pd,
|
||||
X86Vfmadd231ps,
|
||||
X86Vfmadd231sd,
|
||||
X86Vfmadd231ss,
|
||||
X86Vfmsub231pd,
|
||||
X86Vfmsub231ps,
|
||||
X86Vfmsub231sd,
|
||||
X86Vfmsub231ss,
|
||||
X86Vfnmsub231pd,
|
||||
X86Vfnmsub231ps,
|
||||
X86Vfnmsub231sd,
|
||||
X86Vfnmsub231ss,
|
||||
X86Xorpd,
|
||||
X86Xorps
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ namespace ARMeilleure.Translation.PTC
|
|||
{
|
||||
private const string HeaderMagic = "PTChd";
|
||||
|
||||
private const int InternalVersion = 1631; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const int InternalVersion = 1783; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
|
|
@ -293,6 +293,41 @@ namespace Ryujinx.Tests.Cpu
|
|||
CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv);
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VFNMA.F<size> <Vd>, <Vn>, <Vm>")]
|
||||
public void Vfnma([Values(0u, 1u)] uint rd,
|
||||
[Values(0u, 1u)] uint rn,
|
||||
[Values(0u, 1u)] uint rm,
|
||||
[Values(2u, 3u)] uint size,
|
||||
[ValueSource("_2S_F_")] ulong z,
|
||||
[ValueSource("_2S_F_")] ulong a,
|
||||
[ValueSource("_2S_F_")] ulong b)
|
||||
{
|
||||
uint opcode = 0xe900840;
|
||||
|
||||
if (size == 2)
|
||||
{
|
||||
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
|
||||
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
|
||||
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
|
||||
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
|
||||
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
|
||||
}
|
||||
|
||||
opcode |= ((size & 3) << 8);
|
||||
|
||||
V128 v0 = MakeVectorE0E1(z, z);
|
||||
V128 v1 = MakeVectorE0E1(a, z);
|
||||
V128 v2 = MakeVectorE0E1(b, z);
|
||||
|
||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
|
||||
CompareAgainstUnicorn();
|
||||
}
|
||||
|
||||
[Test, Pairwise, Description("VFNMS.F<size> <Vd>, <Vn>, <Vm>")]
|
||||
public void Vfnms([Values(0u, 1u)] uint rd,
|
||||
[Values(0u, 1u)] uint rn,
|
||||
|
|
Reference in a new issue