From 88619d71b8e4840218c68b712aa184098d2dbccf Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Fri, 17 Jul 2020 06:21:40 +0200 Subject: [PATCH] CPU: A32: Add Vadd & Vsub Wide (S/U_8/16/32) Inst.s with Test. (#1390) --- ARMeilleure/Decoders/OpCode32SimdRegWide.cs | 17 ++++ ARMeilleure/Decoders/OpCodeTable.cs | 2 + .../Instructions/InstEmitSimdArithmetic32.cs | 14 +++ .../Instructions/InstEmitSimdHelper32.cs | 24 +++++ ARMeilleure/Instructions/InstName.cs | 2 + ARMeilleure/Translation/PTC/Ptc.cs | 2 +- Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 97 ++++++++----------- 7 files changed, 103 insertions(+), 55 deletions(-) create mode 100644 ARMeilleure/Decoders/OpCode32SimdRegWide.cs diff --git a/ARMeilleure/Decoders/OpCode32SimdRegWide.cs b/ARMeilleure/Decoders/OpCode32SimdRegWide.cs new file mode 100644 index 00000000..55384b2b --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdRegWide.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + sealed class OpCode32SimdRegWide : OpCode32SimdReg + { + public OpCode32SimdRegWide(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Q = false; + RegisterSize = RegisterSize.Simd64; + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32SimdRegWide) && DecoderHelper.VectorArgumentsInvalid(true, Vd, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index b98fcab1..59239415 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -803,6 +803,7 @@ namespace ARMeilleure.Decoders SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); SetA32("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, typeof(OpCode32SimdRegS)); SetA32("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, typeof(OpCode32SimdReg)); + SetA32("1111001x1x< context.Add(op1, op2), !op.U); + } + public static void Vdup(ArmEmitterContext context) { OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp; @@ -1191,6 +1198,13 @@ namespace ARMeilleure.Instructions } } + public static void Vsubw_I(ArmEmitterContext context) + { + OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp; + + EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U); + } + private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar) { IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index 9697715a..9753af66 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -281,6 +281,30 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(op.Qd), res); } + public static void EmitVectorBinaryWideOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size + 1, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + if (op.Size == 2) + { + me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); + } + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed) { OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 69b5d3fc..28041874 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -545,6 +545,7 @@ namespace ARMeilleure.Instructions // FP & SIMD (AArch32) Vabs, Vadd, + Vaddw, Vand, Vbif, Vbit, @@ -611,6 +612,7 @@ namespace ARMeilleure.Instructions Vrsqrte, Vrsqrts, Vsub, + Vsubw, Vtbl, Vtrn, Vuzp, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index b951caf8..d5fb8828 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 10; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 11; //! To be incremented manually for each change to the ARMeilleure project. private const string BaseDir = "Ryujinx"; diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index 1581e850..dbe69124 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -13,6 +13,15 @@ namespace Ryujinx.Tests.Cpu #if SimdReg32 #region "ValueSource (Opcodes)" + private static uint[] _V_Add_Sub_Wide_I_() + { + return new uint[] + { + 0xf2800100u, // VADDW.S8 Q0, Q0, D0 + 0xf2800300u // VSUBW.S8 Q0, Q0, D0 + }; + } + private static uint[] _Vp_Add_Max_Min_F_() { return new uint[] @@ -38,60 +47,6 @@ namespace Ryujinx.Tests.Cpu #endregion #region "ValueSource (Types)" - private static ulong[] _1B1H1S1D_() - { - return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful, - 0x0000000000000080ul, 0x00000000000000FFul, - 0x0000000000007FFFul, 0x0000000000008000ul, - 0x000000000000FFFFul, 0x000000007FFFFFFFul, - 0x0000000080000000ul, 0x00000000FFFFFFFFul, - 0x7FFFFFFFFFFFFFFFul, 0x8000000000000000ul, - 0xFFFFFFFFFFFFFFFFul }; - } - - private static ulong[] _1D_() - { - return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; - } - - private static ulong[] _1H1S_() - { - return new ulong[] { 0x0000000000000000ul, 0x0000000000007FFFul, - 0x0000000000008000ul, 0x000000000000FFFFul, - 0x000000007FFFFFFFul, 0x0000000080000000ul, - 0x00000000FFFFFFFFul }; - } - - private static ulong[] _4H2S_() - { - return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, - 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, - 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; - } - - private static ulong[] _4H2S1D_() - { - return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, - 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, - 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; - } - - private static ulong[] _8B_() - { - return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, - 0x8080808080808080ul, 0xFFFFFFFFFFFFFFFFul }; - } - - private static ulong[] _8B4H2S_() - { - return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, - 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, - 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, - 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; - } - private static ulong[] _8B4H2S1D_() { return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, @@ -267,6 +222,40 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise] + public void V_Add_Sub_Wide_I([ValueSource("_V_Add_Sub_Wide_I_")] uint opcode, + [Range(0u, 5u)] uint rd, + [Range(0u, 5u)] uint rn, + [Range(0u, 5u)] uint rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong a, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong b, + [Values(0u, 1u, 2u)] uint size, // + [Values] bool u) // + { + if (u) + { + opcode |= 1 << 24; + } + + rd >>= 1; rd <<= 1; + rn >>= 1; rn <<= 1; + + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + + opcode |= (size & 0x3) << 20; + + V128 v0 = MakeVectorE0E1(z, ~z); + V128 v1 = MakeVectorE0E1(a, ~a); + V128 v2 = MakeVectorE0E1(b, ~b); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("VCMP.f Vd, Vm")] public void Vcmp([Values(2u, 3u)] uint size, [ValueSource("_1S_F_")] ulong a,