From 04e330cc004add7550eef8361cd490fac99255e2 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Tue, 13 Oct 2020 22:41:33 +0200 Subject: [PATCH] Add Umaal & Vabd_I, Vabdl_I, Vaddl_I, Vhadd, Vqshrn, Vshll inst.s (slow paths). (#1577) * Add Umaal & Vabd_I, Vabdl_I, Vaddl_I, Vhadd, Vqshrn, Vshll inst.s (slow paths). No test provided (i.e. draft). * Ptc InternalVersion = 1577 --- ARMeilleure/Decoders/OpCode32SimdRegLong.cs | 7 ++++ ARMeilleure/Decoders/OpCode32SimdRegWide.cs | 2 +- ARMeilleure/Decoders/OpCode32SimdShImm.cs | 25 ++++++----- ARMeilleure/Decoders/OpCode32SimdShImmLong.cs | 40 ++++++++++++++++++ ARMeilleure/Decoders/OpCodeTable.cs | 7 ++++ ARMeilleure/Instructions/InstEmitMul32.cs | 20 +++++++++ .../Instructions/InstEmitSimdArithmetic32.cs | 35 ++++++++++++++++ .../Instructions/InstEmitSimdShift32.cs | 41 +++++++++++++++++++ ARMeilleure/Instructions/InstName.cs | 7 ++++ ARMeilleure/Translation/PTC/Ptc.cs | 2 +- 10 files changed, 171 insertions(+), 15 deletions(-) create mode 100644 ARMeilleure/Decoders/OpCode32SimdShImmLong.cs diff --git a/ARMeilleure/Decoders/OpCode32SimdRegLong.cs b/ARMeilleure/Decoders/OpCode32SimdRegLong.cs index 24ae42d8..144824b3 100644 --- a/ARMeilleure/Decoders/OpCode32SimdRegLong.cs +++ b/ARMeilleure/Decoders/OpCode32SimdRegLong.cs @@ -8,7 +8,14 @@ { Q = false; RegisterSize = RegisterSize.Simd64; + Polynomial = ((opCode >> 9) & 0x1) != 0; + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32SimdRegLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd)) + { + Instruction = InstDescriptor.Undefined; + } } } } diff --git a/ARMeilleure/Decoders/OpCode32SimdRegWide.cs b/ARMeilleure/Decoders/OpCode32SimdRegWide.cs index 55384b2b..93ef6949 100644 --- a/ARMeilleure/Decoders/OpCode32SimdRegWide.cs +++ b/ARMeilleure/Decoders/OpCode32SimdRegWide.cs @@ -1,6 +1,6 @@ namespace ARMeilleure.Decoders { - sealed class OpCode32SimdRegWide : OpCode32SimdReg + class OpCode32SimdRegWide : OpCode32SimdReg { public OpCode32SimdRegWide(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { diff --git a/ARMeilleure/Decoders/OpCode32SimdShImm.cs b/ARMeilleure/Decoders/OpCode32SimdShImm.cs index 53da6bfb..e40107a2 100644 --- a/ARMeilleure/Decoders/OpCode32SimdShImm.cs +++ b/ARMeilleure/Decoders/OpCode32SimdShImm.cs @@ -2,34 +2,33 @@ { class OpCode32SimdShImm : OpCode32Simd { - public int Immediate { get; private set; } public int Shift { get; private set; } public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { - Immediate = (opCode >> 16) & 0x3f; - var limm = ((opCode >> 1) & 0x40) | Immediate; + int imm6 = (opCode >> 16) & 0x3f; + int limm6 = ((opCode >> 1) & 0x40) | imm6; - if ((limm & 0x40) == 0b1000000) + if ((limm6 & 0x40) == 0b1000000) { Size = 3; - Shift = Immediate; - } - else if ((limm & 0x60) == 0b0100000) + Shift = imm6; + } + else if ((limm6 & 0x60) == 0b0100000) { Size = 2; - Shift = Immediate - 32; + Shift = imm6 - 32; } - else if ((limm & 0x70) == 0b0010000) + else if ((limm6 & 0x70) == 0b0010000) { Size = 1; - Shift = Immediate - 16; + Shift = imm6 - 16; } - else if ((limm & 0x78) == 0b0001000) + else if ((limm6 & 0x78) == 0b0001000) { Size = 0; - Shift = Immediate - 8; - } + Shift = imm6 - 8; + } else { Instruction = InstDescriptor.Undefined; diff --git a/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs b/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs new file mode 100644 index 00000000..1e77b950 --- /dev/null +++ b/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs @@ -0,0 +1,40 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShImmLong : OpCode32Simd + { + public int Shift { get; private set; } + + public OpCode32SimdShImmLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Q = false; + RegisterSize = RegisterSize.Simd64; + + int imm6 = (opCode >> 16) & 0x3f; + + if ((imm6 & 0x20) == 0b100000) + { + Size = 2; + Shift = imm6 - 32; + } + else if ((imm6 & 0x30) == 0b010000) + { + Size = 1; + Shift = imm6 - 16; + } + else if ((imm6 & 0x38) == 0b001000) + { + Size = 0; + Shift = imm6 - 8; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + if (GetType() == typeof(OpCode32SimdShImmLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 15759814..903eaebd 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -787,6 +787,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, typeof(OpCode32AluRsReg)); SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, typeof(OpCode32AluBf)); SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, typeof(OpCode32AluMla)); + SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal, InstEmit32.Umaal, typeof(OpCode32AluUmull)); SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, typeof(OpCode32AluUmull)); SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, typeof(OpCode32AluUmull)); SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat, InstEmit32.Usat, typeof(OpCode32Sat)); @@ -800,12 +801,15 @@ namespace ARMeilleure.Decoders SetA32("111100111x110000xxx0001100x0xxx0", InstName.Aese_V, InstEmit32.Aese_V, typeof(OpCode32Simd)); SetA32("111100111x110000xxx0001111x0xxx0", InstName.Aesimc_V, InstEmit32.Aesimc_V, typeof(OpCode32Simd)); SetA32("111100111x110000xxx0001110x0xxx0", InstName.Aesmc_V, InstEmit32.Aesmc_V, typeof(OpCode32Simd)); + SetA32("1111001x0x<>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, typeof(OpCode32SimdShImmNarrow)); SetA32("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, typeof(OpCode32SimdShImmNarrow)); + SetA32("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, typeof(OpCode32SimdShImmNarrow)); SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte)); SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg)); SetA32("111100111x11xx00xxxx000<>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, typeof(OpCode32SimdShImm)); SetA32("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, typeof(OpCode32SimdReg)); + SetA32("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, typeof(OpCode32SimdShImmLong)); // A1 encoding. SetA32("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, typeof(OpCode32SimdShImm)); SetA32("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, typeof(OpCode32SimdShImmNarrow)); SetA32("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, typeof(OpCode32SimdS)); diff --git a/ARMeilleure/Instructions/InstEmitMul32.cs b/ARMeilleure/Instructions/InstEmitMul32.cs index 6714c5fd..454d44a4 100644 --- a/ARMeilleure/Instructions/InstEmitMul32.cs +++ b/ARMeilleure/Instructions/InstEmitMul32.cs @@ -283,6 +283,26 @@ namespace ARMeilleure.Instructions EmitGenericAluStoreA32(context, op.Rd, false, res); } + public static void Umaal(ArmEmitterContext context) + { + OpCode32AluUmull op = (OpCode32AluUmull)context.CurrOp; + + Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + Operand dHi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)); + Operand dLo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)); + + Operand res = context.Multiply(n, m); + res = context.Add(res, dHi); + res = context.Add(res, dLo); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + EmitGenericAluStoreA32(context, op.RdHi, false, hi); + EmitGenericAluStoreA32(context, op.RdLo, false, lo); + } + public static void Umlal(ArmEmitterContext context) { EmitMlal(context, false); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index 57176794..0eeed5bb 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -14,6 +14,20 @@ namespace ARMeilleure.Instructions { static partial class InstEmit32 { + public static void Vabd_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorBinaryOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U); + } + + public static void Vabdl_I(ArmEmitterContext context) + { + OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp; + + EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U); + } + public static void Vabs_S(ArmEmitterContext context) { OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; @@ -107,6 +121,13 @@ namespace ARMeilleure.Instructions } } + public static void Vaddl_I(ArmEmitterContext context) + { + OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp; + + EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); + } + public static void Vaddw_I(ArmEmitterContext context) { OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp; @@ -263,6 +284,20 @@ namespace ARMeilleure.Instructions } } + public static void Vhadd(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftRightUI(context.Add(op1, op2), Const(1))); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftRightSI(context.Add(op1, op2), Const(1))); + } + } + public static void Vmov_S(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs index f3c002db..c904c0ee 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -5,6 +5,8 @@ using System; using System.Diagnostics; using System.Reflection; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper32; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -24,6 +26,13 @@ namespace ARMeilleure.Instructions EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); } + public static void Vqshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx); + } + public static void Vrshr(ArmEmitterContext context) { OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; @@ -105,6 +114,38 @@ namespace ARMeilleure.Instructions } } + public static void Vshll(ArmEmitterContext context) + { + OpCode32SimdShImmLong op = (OpCode32SimdShImmLong)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U); + + if (op.Size == 2) + { + if (op.U) + { + me = context.ZeroExtend32(OperandType.I64, me); + } + else + { + me = context.SignExtend32(OperandType.I64, me); + } + } + + me = context.ShiftLeft(me, Const(op.Shift)); + + res = EmitVectorInsert(context, res, me, index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + public static void Vshr(ArmEmitterContext context) { OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 0d0c1264..c694bb06 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -536,6 +536,7 @@ namespace ARMeilleure.Instructions Trap, Tst, Ubfx, + Umaal, Umlal, Umull, Usat, @@ -545,8 +546,11 @@ namespace ARMeilleure.Instructions Uxth, // FP & SIMD (AArch32) + Vabd, + Vabdl, Vabs, Vadd, + Vaddl, Vaddw, Vand, Vbic, @@ -567,6 +571,7 @@ namespace ARMeilleure.Instructions Vext, Vfma, Vfms, + Vhadd, Vld1, Vld2, Vld3, @@ -598,11 +603,13 @@ namespace ARMeilleure.Instructions Vpmin, Vqrshrn, Vqrshrun, + Vqshrn, Vrev, Vrint, Vrshr, Vsel, Vshl, + Vshll, Vshr, Vshrn, Vst1, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 26b00469..bf5fd838 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -21,7 +21,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1447; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 1577; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1";