0
0
Fork 0
mirror of https://github.com/GreemDev/Ryujinx.git synced 2025-01-24 21:42:00 +00:00

CPU: Implement VFNMA.F32 | F.64 (#1783)

* Implement VFNMA.F<32/64>

* Update PTC Version

* Update Implementation & Renames & Correct Order

* Fix alignment

* Update implementation to not trigger assert

* Actually use the intrinsic that makes sense :)
This commit is contained in:
sharmander 2020-12-07 19:04:01 -05:00 committed by GitHub
parent 567ea726e1
commit 36f6bbf5b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 453 additions and 369 deletions

View file

@ -273,10 +273,18 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfnmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38be, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));

View file

@ -166,10 +166,18 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary)); Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary)); Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm)); Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231pd, new IntrinsicInfo(X86Instruction.Vfmsub231pd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmsub231pd, new IntrinsicInfo(X86Instruction.Vfmsub231pd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231ps, new IntrinsicInfo(X86Instruction.Vfmsub231ps, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmsub231ps, new IntrinsicInfo(X86Instruction.Vfmsub231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma)); Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231pd, new IntrinsicInfo(X86Instruction.Vfnmsub231pd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231ps, new IntrinsicInfo(X86Instruction.Vfnmsub231ps, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
} }

View file

@ -203,10 +203,18 @@ namespace ARMeilleure.CodeGen.X86
Vblendvps, Vblendvps,
Vcvtph2ps, Vcvtph2ps,
Vcvtps2ph, Vcvtps2ph,
Vfmadd231pd,
Vfmadd231ps,
Vfmadd231sd,
Vfmadd231ss,
Vfmsub231ps, Vfmsub231ps,
Vfmsub231pd, Vfmsub231pd,
Vfmsub231ss, Vfmsub231ss,
Vfmsub231sd, Vfmsub231sd,
Vfnmsub231ps,
Vfnmsub231pd,
Vfnmsub231ss,
Vfnmsub231sd,
Vpblendvb, Vpblendvb,
Xor, Xor,
Xorpd, Xorpd,

View file

@ -821,6 +821,7 @@ namespace ARMeilleure.Decoders
SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create); SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create);
SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create); SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create);
SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create); SetA32("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create);
SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create); SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create);

View file

@ -284,6 +284,21 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Vfnma_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseFma)
{
EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd);
}
else
{
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), context.Negate(op1), context.Negate(op2), op3);
});
}
}
public static void Vfnms_S(ArmEmitterContext context) // Fused. public static void Vfnms_S(ArmEmitterContext context) // Fused.
{ {
if (Optimizations.FastFP && Optimizations.UseFma) if (Optimizations.FastFP && Optimizations.UseFma)

View file

@ -571,6 +571,7 @@ namespace ARMeilleure.Instructions
Vext, Vext,
Vfma, Vfma,
Vfms, Vfms,
Vfnma,
Vfnms, Vfnms,
Vhadd, Vhadd,
Vld1, Vld1,

View file

@ -155,10 +155,18 @@ namespace ARMeilleure.IntermediateRepresentation
X86Unpcklps, X86Unpcklps,
X86Vcvtph2ps, X86Vcvtph2ps,
X86Vcvtps2ph, X86Vcvtps2ph,
X86Vfmadd231pd,
X86Vfmadd231ps,
X86Vfmadd231sd,
X86Vfmadd231ss,
X86Vfmsub231pd, X86Vfmsub231pd,
X86Vfmsub231ps, X86Vfmsub231ps,
X86Vfmsub231sd, X86Vfmsub231sd,
X86Vfmsub231ss, X86Vfmsub231ss,
X86Vfnmsub231pd,
X86Vfnmsub231ps,
X86Vfnmsub231sd,
X86Vfnmsub231ss,
X86Xorpd, X86Xorpd,
X86Xorps X86Xorps
} }

View file

@ -21,7 +21,7 @@ namespace ARMeilleure.Translation.PTC
{ {
private const string HeaderMagic = "PTChd"; private const string HeaderMagic = "PTChd";
private const int InternalVersion = 1631; //! To be incremented manually for each change to the ARMeilleure project. private const int InternalVersion = 1783; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0"; private const string ActualDir = "0";
private const string BackupDir = "1"; private const string BackupDir = "1";

View file

@ -293,6 +293,41 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv); CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv);
} }
[Test, Pairwise, Description("VFNMA.F<size> <Vd>, <Vn>, <Vm>")]
public void Vfnma([Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rn,
[Values(0u, 1u)] uint rm,
[Values(2u, 3u)] uint size,
[ValueSource("_2S_F_")] ulong z,
[ValueSource("_2S_F_")] ulong a,
[ValueSource("_2S_F_")] ulong b)
{
uint opcode = 0xe900840;
if (size == 2)
{
opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
}
else
{
opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
}
opcode |= ((size & 3) << 8);
V128 v0 = MakeVectorE0E1(z, z);
V128 v1 = MakeVectorE0E1(a, z);
V128 v2 = MakeVectorE0E1(b, z);
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("VFNMS.F<size> <Vd>, <Vn>, <Vm>")] [Test, Pairwise, Description("VFNMS.F<size> <Vd>, <Vn>, <Vm>")]
public void Vfnms([Values(0u, 1u)] uint rd, public void Vfnms([Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rn, [Values(0u, 1u)] uint rn,