From 89ccec197ec9a5db2bb308ef3e9178910d1ab7a8 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Tue, 10 Mar 2020 02:17:30 -0300 Subject: [PATCH] Implement VMOVL and VORR.I32 AArch32 SIMD instructions (#960) * Implement VMOVL and VORR.I32 AArch32 SIMD instructions * Rename
to on test description * Rename Widen to Long and improve VMOVL implementation a bit --- ARMeilleure/Decoders/OpCode32SimdImm.cs | 6 +-- ARMeilleure/Decoders/OpCode32SimdLong.cs | 27 +++++++++++++ ARMeilleure/Decoders/OpCodeSimdHelper.cs | 2 +- ARMeilleure/Decoders/OpCodeTable.cs | 6 ++- .../Instructions/InstEmitSimdLogical32.cs | 39 +++++++++++++++++++ .../Instructions/InstEmitSimdMove32.cs | 30 ++++++++++++++ ARMeilleure/Instructions/InstName.cs | 4 +- Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs | 28 +++++++++++++ Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs | 30 ++++++++++++++ 9 files changed, 165 insertions(+), 7 deletions(-) create mode 100644 ARMeilleure/Decoders/OpCode32SimdLong.cs diff --git a/ARMeilleure/Decoders/OpCode32SimdImm.cs b/ARMeilleure/Decoders/OpCode32SimdImm.cs index 72fca59c..c6ae7ec5 100644 --- a/ARMeilleure/Decoders/OpCode32SimdImm.cs +++ b/ARMeilleure/Decoders/OpCode32SimdImm.cs @@ -1,11 +1,9 @@ namespace ARMeilleure.Decoders { - class OpCode32SimdImm : OpCode32, IOpCode32SimdImm + class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm { - public int Vd { get; private set; } public bool Q { get; private set; } public long Immediate { get; private set; } - public int Size { get; private set; } public int Elems => GetBytesCount() >> Size; public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) @@ -24,7 +22,7 @@ imm |= ((uint)opCode >> 12) & 0x70; imm |= ((uint)opCode >> 17) & 0x80; - (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm, fpBaseSize: 2); + (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm); RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; diff --git a/ARMeilleure/Decoders/OpCode32SimdLong.cs b/ARMeilleure/Decoders/OpCode32SimdLong.cs new file mode 100644 index 00000000..c4b18683 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdLong.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdLong : OpCode32SimdBase + { + public bool U { get; private set; } + + public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm3h = (opCode >> 19) & 0x7; + + // The value must be a power of 2, otherwise it is the encoding of another instruction. + switch (imm3h) + { + case 1: Size = 0; break; + case 2: Size = 1; break; + case 4: Size = 2; break; + } + + U = ((opCode >> 24) & 0x1) != 0; + + RegisterSize = RegisterSize.Simd64; + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/ARMeilleure/Decoders/OpCodeSimdHelper.cs index 3e5a7f65..02f74d03 100644 --- a/ARMeilleure/Decoders/OpCodeSimdHelper.cs +++ b/ARMeilleure/Decoders/OpCodeSimdHelper.cs @@ -2,7 +2,7 @@ { public static class OpCodeSimdHelper { - public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm, int fpBaseSize = 0) + public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm) { int modeLow = cMode & 1; int modeHigh = cMode >> 1; diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 7b1ebbc7..eac31706 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -158,7 +158,7 @@ namespace ARMeilleure.Decoders SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, typeof(OpCodeAluBinary)); SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, typeof(OpCodeMul)); SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, typeof(OpCodeMul)); - SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smul__, InstEmit.Smulh, typeof(OpCodeMul)); + SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, typeof(OpCodeMul)); SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, typeof(OpCodeMemEx)); SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, typeof(OpCodeMemEx)); SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, typeof(OpCodeMemEx)); @@ -829,6 +829,9 @@ namespace ARMeilleure.Decoders SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q (dt - from cmode). SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I64. SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS)); + SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); + SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); + SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong)); SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ)); SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial)); SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial)); @@ -845,6 +848,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, typeof(OpCode32SimdRegS)); SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, typeof(OpCode32SimdRegS)); SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, typeof(OpCode32SimdBinary)); + SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, typeof(OpCode32SimdImm)); SetA32("111100100x<> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U); + + if (op.Size == 2) + { + if (op.U) + { + me = context.ZeroExtend32(OperandType.I64, me); + } + else + { + me = context.SignExtend32(OperandType.I64, me); + } + } + + res = EmitVectorInsert(context, res, me, index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + public static void Vtbl(ArmEmitterContext context) { OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 049c956d..69969e9f 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -81,7 +81,7 @@ namespace ARMeilleure.Instructions Sdiv, Smaddl, Smsubl, - Smul__, + Smulh, Smull, Smulw_, Ssat, @@ -500,6 +500,7 @@ namespace ARMeilleure.Instructions Smlaw_, Smmla, Smmls, + Smul__, Smmul, Stl, Stlb, @@ -560,6 +561,7 @@ namespace ARMeilleure.Instructions Vmla, Vmls, Vmov, + Vmovl, Vmovn, Vmrs, Vmsr, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs index dfbd3b0b..459127de 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs @@ -56,6 +56,34 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise, Description("VORR.I32 , #")] + public void Vorr_II([Range(0u, 4u)] uint rd, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] byte imm, + [Values(0u, 1u, 2u, 3u)] uint cMode, + [Values] bool q) + { + uint opcode = 0xf2800110u; // VORR.I32 D0, #0 + + if (q) + { + opcode |= 1 << 6; + rd <<= 1; + } + + opcode |= (uint)(imm & 0xf) << 0; + opcode |= (uint)(imm & 0x70) << 12; + opcode |= (uint)(imm & 0x80) << 17; + opcode |= (cMode & 0x3) << 9; + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + + V128 v0 = MakeVectorE0E1(z, z); + + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } #endif } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs index 13d61078..8c9627ce 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdMov32.cs @@ -228,6 +228,36 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("VMOVL. , ")] + public void Vmovl([Values(0u, 1u, 2u, 3u)] uint vm, + [Values(0u, 2u, 4u, 6u)] uint vd, + [Values(1u, 2u, 4u)] uint imm3H, + [Values] bool u) + { + // This is not VMOVL because imm3H = 0, but once + // we shift in the imm3H value it turns into VMOVL. + uint opcode = 0xf2800a10u; // VMOV.I16 D0, #0 + + opcode |= (vm & 0x10) << 1; + opcode |= (vm & 0xf); + opcode |= (vd & 0x10) << 18; + opcode |= (vd & 0xf) << 12; + opcode |= (imm3H & 0x7) << 19; + if (u) + { + opcode |= 1 << 24; + } + + V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong()); + + SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("VTRN. , ")] public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm, [Values(0u, 1u, 2u, 3u)] uint vd,