Implement VMOVL and VORR.I32 AArch32 SIMD instructions (#960)
* Implement VMOVL and VORR.I32 AArch32 SIMD instructions * Rename <dt> to <size> on test description * Rename Widen to Long and improve VMOVL implementation a bit
This commit is contained in:
parent
08c0e3829b
commit
89ccec197e
9 changed files with 165 additions and 7 deletions
|
@ -1,11 +1,9 @@
|
||||||
namespace ARMeilleure.Decoders
|
namespace ARMeilleure.Decoders
|
||||||
{
|
{
|
||||||
class OpCode32SimdImm : OpCode32, IOpCode32SimdImm
|
class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm
|
||||||
{
|
{
|
||||||
public int Vd { get; private set; }
|
|
||||||
public bool Q { get; private set; }
|
public bool Q { get; private set; }
|
||||||
public long Immediate { get; private set; }
|
public long Immediate { get; private set; }
|
||||||
public int Size { get; private set; }
|
|
||||||
public int Elems => GetBytesCount() >> Size;
|
public int Elems => GetBytesCount() >> Size;
|
||||||
|
|
||||||
public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||||
|
@ -24,7 +22,7 @@
|
||||||
imm |= ((uint)opCode >> 12) & 0x70;
|
imm |= ((uint)opCode >> 12) & 0x70;
|
||||||
imm |= ((uint)opCode >> 17) & 0x80;
|
imm |= ((uint)opCode >> 17) & 0x80;
|
||||||
|
|
||||||
(Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm, fpBaseSize: 2);
|
(Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm);
|
||||||
|
|
||||||
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
|
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;
|
||||||
|
|
||||||
|
|
27
ARMeilleure/Decoders/OpCode32SimdLong.cs
Normal file
27
ARMeilleure/Decoders/OpCode32SimdLong.cs
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
namespace ARMeilleure.Decoders
|
||||||
|
{
|
||||||
|
class OpCode32SimdLong : OpCode32SimdBase
|
||||||
|
{
|
||||||
|
public bool U { get; private set; }
|
||||||
|
|
||||||
|
public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
|
||||||
|
{
|
||||||
|
int imm3h = (opCode >> 19) & 0x7;
|
||||||
|
|
||||||
|
// The value must be a power of 2, otherwise it is the encoding of another instruction.
|
||||||
|
switch (imm3h)
|
||||||
|
{
|
||||||
|
case 1: Size = 0; break;
|
||||||
|
case 2: Size = 1; break;
|
||||||
|
case 4: Size = 2; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
U = ((opCode >> 24) & 0x1) != 0;
|
||||||
|
|
||||||
|
RegisterSize = RegisterSize.Simd64;
|
||||||
|
|
||||||
|
Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
|
||||||
|
Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -2,7 +2,7 @@
|
||||||
{
|
{
|
||||||
public static class OpCodeSimdHelper
|
public static class OpCodeSimdHelper
|
||||||
{
|
{
|
||||||
public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm, int fpBaseSize = 0)
|
public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm)
|
||||||
{
|
{
|
||||||
int modeLow = cMode & 1;
|
int modeLow = cMode & 1;
|
||||||
int modeHigh = cMode >> 1;
|
int modeHigh = cMode >> 1;
|
||||||
|
|
|
@ -158,7 +158,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, typeof(OpCodeAluBinary));
|
SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, typeof(OpCodeAluBinary));
|
||||||
SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, typeof(OpCodeMul));
|
SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, typeof(OpCodeMul));
|
||||||
SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, typeof(OpCodeMul));
|
SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, typeof(OpCodeMul));
|
||||||
SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smul__, InstEmit.Smulh, typeof(OpCodeMul));
|
SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, typeof(OpCodeMul));
|
||||||
SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, typeof(OpCodeMemEx));
|
SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, typeof(OpCodeMemEx));
|
||||||
SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, typeof(OpCodeMemEx));
|
SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, typeof(OpCodeMemEx));
|
||||||
SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, typeof(OpCodeMemEx));
|
SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, typeof(OpCodeMemEx));
|
||||||
|
@ -829,6 +829,9 @@ namespace ARMeilleure.Decoders
|
||||||
SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q (dt - from cmode).
|
SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q (dt - from cmode).
|
||||||
SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I64.
|
SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I64.
|
||||||
SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS));
|
SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS));
|
||||||
|
SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong));
|
||||||
|
SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong));
|
||||||
|
SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong));
|
||||||
SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ));
|
SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ));
|
||||||
SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial));
|
SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial));
|
||||||
SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial));
|
SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial));
|
||||||
|
@ -845,6 +848,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, typeof(OpCode32SimdRegS));
|
SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, typeof(OpCode32SimdRegS));
|
||||||
SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, typeof(OpCode32SimdRegS));
|
SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, typeof(OpCode32SimdRegS));
|
||||||
SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, typeof(OpCode32SimdBinary));
|
SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, typeof(OpCode32SimdBinary));
|
||||||
|
SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, typeof(OpCode32SimdImm));
|
||||||
SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg));
|
SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg));
|
||||||
SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg));
|
SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg));
|
||||||
SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));
|
SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));
|
||||||
|
|
|
@ -2,7 +2,10 @@
|
||||||
using ARMeilleure.IntermediateRepresentation;
|
using ARMeilleure.IntermediateRepresentation;
|
||||||
using ARMeilleure.Translation;
|
using ARMeilleure.Translation;
|
||||||
|
|
||||||
|
using static ARMeilleure.Instructions.InstEmitHelper;
|
||||||
|
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
||||||
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
|
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
|
||||||
|
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
|
||||||
|
|
||||||
namespace ARMeilleure.Instructions
|
namespace ARMeilleure.Instructions
|
||||||
{
|
{
|
||||||
|
@ -64,6 +67,42 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Vorr_II(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
|
||||||
|
|
||||||
|
long immediate = op.Immediate;
|
||||||
|
|
||||||
|
// Replicate fields to fill the 64-bits, if size is < 64-bits.
|
||||||
|
switch (op.Size)
|
||||||
|
{
|
||||||
|
case 0: immediate *= 0x0101010101010101L; break;
|
||||||
|
case 1: immediate *= 0x0001000100010001L; break;
|
||||||
|
case 2: immediate *= 0x0000000100000001L; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Operand imm = Const(immediate);
|
||||||
|
Operand res = GetVecA32(op.Qd);
|
||||||
|
|
||||||
|
if (op.Q)
|
||||||
|
{
|
||||||
|
for (int elem = 0; elem < 2; elem++)
|
||||||
|
{
|
||||||
|
Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
|
||||||
|
|
||||||
|
res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
|
||||||
|
|
||||||
|
res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVecA32(op.Qd), res);
|
||||||
|
}
|
||||||
|
|
||||||
private static void EmitBifBit(ArmEmitterContext context, bool notRm)
|
private static void EmitBifBit(ArmEmitterContext context, bool notRm)
|
||||||
{
|
{
|
||||||
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
|
||||||
|
|
|
@ -139,6 +139,36 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void Vmovl(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp;
|
||||||
|
|
||||||
|
Operand res = context.VectorZero();
|
||||||
|
|
||||||
|
int elems = op.GetBytesCount() >> op.Size;
|
||||||
|
|
||||||
|
for (int index = 0; index < elems; index++)
|
||||||
|
{
|
||||||
|
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
|
||||||
|
|
||||||
|
if (op.Size == 2)
|
||||||
|
{
|
||||||
|
if (op.U)
|
||||||
|
{
|
||||||
|
me = context.ZeroExtend32(OperandType.I64, me);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
me = context.SignExtend32(OperandType.I64, me);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
res = EmitVectorInsert(context, res, me, index, op.Size + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
context.Copy(GetVecA32(op.Qd), res);
|
||||||
|
}
|
||||||
|
|
||||||
public static void Vtbl(ArmEmitterContext context)
|
public static void Vtbl(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
|
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;
|
||||||
|
|
|
@ -81,7 +81,7 @@ namespace ARMeilleure.Instructions
|
||||||
Sdiv,
|
Sdiv,
|
||||||
Smaddl,
|
Smaddl,
|
||||||
Smsubl,
|
Smsubl,
|
||||||
Smul__,
|
Smulh,
|
||||||
Smull,
|
Smull,
|
||||||
Smulw_,
|
Smulw_,
|
||||||
Ssat,
|
Ssat,
|
||||||
|
@ -500,6 +500,7 @@ namespace ARMeilleure.Instructions
|
||||||
Smlaw_,
|
Smlaw_,
|
||||||
Smmla,
|
Smmla,
|
||||||
Smmls,
|
Smmls,
|
||||||
|
Smul__,
|
||||||
Smmul,
|
Smmul,
|
||||||
Stl,
|
Stl,
|
||||||
Stlb,
|
Stlb,
|
||||||
|
@ -560,6 +561,7 @@ namespace ARMeilleure.Instructions
|
||||||
Vmla,
|
Vmla,
|
||||||
Vmls,
|
Vmls,
|
||||||
Vmov,
|
Vmov,
|
||||||
|
Vmovl,
|
||||||
Vmovn,
|
Vmovn,
|
||||||
Vmrs,
|
Vmrs,
|
||||||
Vmsr,
|
Vmsr,
|
||||||
|
|
|
@ -56,6 +56,34 @@ namespace Ryujinx.Tests.Cpu
|
||||||
|
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise, Description("VORR.I32 <Vd>, #<imm>")]
|
||||||
|
public void Vorr_II([Range(0u, 4u)] uint rd,
|
||||||
|
[Random(RndCnt)] ulong z,
|
||||||
|
[Random(RndCnt)] byte imm,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint cMode,
|
||||||
|
[Values] bool q)
|
||||||
|
{
|
||||||
|
uint opcode = 0xf2800110u; // VORR.I32 D0, #0
|
||||||
|
|
||||||
|
if (q)
|
||||||
|
{
|
||||||
|
opcode |= 1 << 6;
|
||||||
|
rd <<= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
opcode |= (uint)(imm & 0xf) << 0;
|
||||||
|
opcode |= (uint)(imm & 0x70) << 12;
|
||||||
|
opcode |= (uint)(imm & 0x80) << 17;
|
||||||
|
opcode |= (cMode & 0x3) << 9;
|
||||||
|
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||||
|
|
||||||
|
V128 v0 = MakeVectorE0E1(z, z);
|
||||||
|
|
||||||
|
SingleOpcode(opcode, v0: v0);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -228,6 +228,36 @@ namespace Ryujinx.Tests.Cpu
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise, Description("VMOVL.<size> <Qd>, <Dm>")]
|
||||||
|
public void Vmovl([Values(0u, 1u, 2u, 3u)] uint vm,
|
||||||
|
[Values(0u, 2u, 4u, 6u)] uint vd,
|
||||||
|
[Values(1u, 2u, 4u)] uint imm3H,
|
||||||
|
[Values] bool u)
|
||||||
|
{
|
||||||
|
// This is not VMOVL because imm3H = 0, but once
|
||||||
|
// we shift in the imm3H value it turns into VMOVL.
|
||||||
|
uint opcode = 0xf2800a10u; // VMOV.I16 D0, #0
|
||||||
|
|
||||||
|
opcode |= (vm & 0x10) << 1;
|
||||||
|
opcode |= (vm & 0xf);
|
||||||
|
opcode |= (vd & 0x10) << 18;
|
||||||
|
opcode |= (vd & 0xf) << 12;
|
||||||
|
opcode |= (imm3H & 0x7) << 19;
|
||||||
|
if (u)
|
||||||
|
{
|
||||||
|
opcode |= 1 << 24;
|
||||||
|
}
|
||||||
|
|
||||||
|
V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
|
||||||
|
|
||||||
|
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
[Test, Pairwise, Description("VTRN.<size> <Vd>, <Vm>")]
|
[Test, Pairwise, Description("VTRN.<size> <Vd>, <Vm>")]
|
||||||
public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm,
|
public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm,
|
||||||
[Values(0u, 1u, 2u, 3u)] uint vd,
|
[Values(0u, 1u, 2u, 3u)] uint vd,
|
||||||
|
|
Reference in a new issue