0
0
Fork 0

Implement VMOVL and VORR.I32 AArch32 SIMD instructions (#960)

* Implement VMOVL and VORR.I32 AArch32 SIMD instructions

* Rename <dt> to <size> on test description

* Rename Widen to Long and improve VMOVL implementation a bit
This commit is contained in:
gdkchan 2020-03-10 02:17:30 -03:00 committed by GitHub
parent 08c0e3829b
commit 89ccec197e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 165 additions and 7 deletions

View file

@ -1,11 +1,9 @@
namespace ARMeilleure.Decoders namespace ARMeilleure.Decoders
{ {
class OpCode32SimdImm : OpCode32, IOpCode32SimdImm class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm
{ {
public int Vd { get; private set; }
public bool Q { get; private set; } public bool Q { get; private set; }
public long Immediate { get; private set; } public long Immediate { get; private set; }
public int Size { get; private set; }
public int Elems => GetBytesCount() >> Size; public int Elems => GetBytesCount() >> Size;
public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
@ -24,7 +22,7 @@
imm |= ((uint)opCode >> 12) & 0x70; imm |= ((uint)opCode >> 12) & 0x70;
imm |= ((uint)opCode >> 17) & 0x80; imm |= ((uint)opCode >> 17) & 0x80;
(Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm, fpBaseSize: 2); (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm);
RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64;

View file

@ -0,0 +1,27 @@
namespace ARMeilleure.Decoders
{
class OpCode32SimdLong : OpCode32SimdBase
{
public bool U { get; private set; }
public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
int imm3h = (opCode >> 19) & 0x7;
// The value must be a power of 2, otherwise it is the encoding of another instruction.
switch (imm3h)
{
case 1: Size = 0; break;
case 2: Size = 1; break;
case 4: Size = 2; break;
}
U = ((opCode >> 24) & 0x1) != 0;
RegisterSize = RegisterSize.Simd64;
Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf);
Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf);
}
}
}

View file

@ -2,7 +2,7 @@
{ {
public static class OpCodeSimdHelper public static class OpCodeSimdHelper
{ {
public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm, int fpBaseSize = 0) public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm)
{ {
int modeLow = cMode & 1; int modeLow = cMode & 1;
int modeHigh = cMode >> 1; int modeHigh = cMode >> 1;

View file

@ -158,7 +158,7 @@ namespace ARMeilleure.Decoders
SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, typeof(OpCodeAluBinary)); SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, typeof(OpCodeAluBinary));
SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, typeof(OpCodeMul)); SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, typeof(OpCodeMul));
SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, typeof(OpCodeMul)); SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, typeof(OpCodeMul));
SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smul__, InstEmit.Smulh, typeof(OpCodeMul)); SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, typeof(OpCodeMul));
SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, typeof(OpCodeMemEx)); SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, typeof(OpCodeMemEx));
SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, typeof(OpCodeMemEx)); SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, typeof(OpCodeMemEx));
SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, typeof(OpCodeMemEx)); SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, typeof(OpCodeMemEx));
@ -829,6 +829,9 @@ namespace ARMeilleure.Decoders
SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q (dt - from cmode). SetA32("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q (dt - from cmode).
SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I64. SetA32("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, typeof(OpCode32SimdImm)); // D/Q I64.
SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS)); SetA32("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, typeof(OpCode32SimdS));
SetA32("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong));
SetA32("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong));
SetA32("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, typeof(OpCode32SimdLong));
SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ)); SetA32("111100111x11xx10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, typeof(OpCode32SimdCmpZ));
SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial)); SetA32("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, typeof(OpCode32SimdSpecial));
SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial)); SetA32("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, typeof(OpCode32SimdSpecial));
@ -845,6 +848,7 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, typeof(OpCode32SimdRegS)); SetA32("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, typeof(OpCode32SimdRegS));
SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, typeof(OpCode32SimdRegS)); SetA32("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, typeof(OpCode32SimdRegS));
SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, typeof(OpCode32SimdBinary)); SetA32("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, typeof(OpCode32SimdBinary));
SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, typeof(OpCode32SimdImm));
SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg)); SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg));
SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg)); SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg));
SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte)); SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));

View file

@ -2,7 +2,10 @@
using ARMeilleure.IntermediateRepresentation; using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation; using ARMeilleure.Translation;
using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper32; using static ARMeilleure.Instructions.InstEmitSimdHelper32;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
namespace ARMeilleure.Instructions namespace ARMeilleure.Instructions
{ {
@ -64,6 +67,42 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Vorr_II(ArmEmitterContext context)
{
OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp;
long immediate = op.Immediate;
// Replicate fields to fill the 64-bits, if size is < 64-bits.
switch (op.Size)
{
case 0: immediate *= 0x0101010101010101L; break;
case 1: immediate *= 0x0001000100010001L; break;
case 2: immediate *= 0x0000000100000001L; break;
}
Operand imm = Const(immediate);
Operand res = GetVecA32(op.Qd);
if (op.Q)
{
for (int elem = 0; elem < 2; elem++)
{
Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3);
res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3);
}
}
else
{
Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3);
res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3);
}
context.Copy(GetVecA32(op.Qd), res);
}
private static void EmitBifBit(ArmEmitterContext context, bool notRm) private static void EmitBifBit(ArmEmitterContext context, bool notRm)
{ {
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;

View file

@ -139,6 +139,36 @@ namespace ARMeilleure.Instructions
} }
} }
public static void Vmovl(ArmEmitterContext context)
{
OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp;
Operand res = context.VectorZero();
int elems = op.GetBytesCount() >> op.Size;
for (int index = 0; index < elems; index++)
{
Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U);
if (op.Size == 2)
{
if (op.U)
{
me = context.ZeroExtend32(OperandType.I64, me);
}
else
{
me = context.SignExtend32(OperandType.I64, me);
}
}
res = EmitVectorInsert(context, res, me, index, op.Size + 1);
}
context.Copy(GetVecA32(op.Qd), res);
}
public static void Vtbl(ArmEmitterContext context) public static void Vtbl(ArmEmitterContext context)
{ {
OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp;

View file

@ -81,7 +81,7 @@ namespace ARMeilleure.Instructions
Sdiv, Sdiv,
Smaddl, Smaddl,
Smsubl, Smsubl,
Smul__, Smulh,
Smull, Smull,
Smulw_, Smulw_,
Ssat, Ssat,
@ -500,6 +500,7 @@ namespace ARMeilleure.Instructions
Smlaw_, Smlaw_,
Smmla, Smmla,
Smmls, Smmls,
Smul__,
Smmul, Smmul,
Stl, Stl,
Stlb, Stlb,
@ -560,6 +561,7 @@ namespace ARMeilleure.Instructions
Vmla, Vmla,
Vmls, Vmls,
Vmov, Vmov,
Vmovl,
Vmovn, Vmovn,
Vmrs, Vmrs,
Vmsr, Vmsr,

View file

@ -56,6 +56,34 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("VORR.I32 <Vd>, #<imm>")]
public void Vorr_II([Range(0u, 4u)] uint rd,
[Random(RndCnt)] ulong z,
[Random(RndCnt)] byte imm,
[Values(0u, 1u, 2u, 3u)] uint cMode,
[Values] bool q)
{
uint opcode = 0xf2800110u; // VORR.I32 D0, #0
if (q)
{
opcode |= 1 << 6;
rd <<= 1;
}
opcode |= (uint)(imm & 0xf) << 0;
opcode |= (uint)(imm & 0x70) << 12;
opcode |= (uint)(imm & 0x80) << 17;
opcode |= (cMode & 0x3) << 9;
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
V128 v0 = MakeVectorE0E1(z, z);
SingleOpcode(opcode, v0: v0);
CompareAgainstUnicorn();
}
#endif #endif
} }
} }

View file

@ -228,6 +228,36 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(); CompareAgainstUnicorn();
} }
[Test, Pairwise, Description("VMOVL.<size> <Qd>, <Dm>")]
public void Vmovl([Values(0u, 1u, 2u, 3u)] uint vm,
[Values(0u, 2u, 4u, 6u)] uint vd,
[Values(1u, 2u, 4u)] uint imm3H,
[Values] bool u)
{
// This is not VMOVL because imm3H = 0, but once
// we shift in the imm3H value it turns into VMOVL.
uint opcode = 0xf2800a10u; // VMOV.I16 D0, #0
opcode |= (vm & 0x10) << 1;
opcode |= (vm & 0xf);
opcode |= (vd & 0x10) << 18;
opcode |= (vd & 0xf) << 12;
opcode |= (imm3H & 0x7) << 19;
if (u)
{
opcode |= 1 << 24;
}
V128 v0 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
V128 v1 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
V128 v2 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
V128 v3 = new V128(TestContext.CurrentContext.Random.NextULong(), TestContext.CurrentContext.Random.NextULong());
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, v3: v3);
CompareAgainstUnicorn();
}
[Test, Pairwise, Description("VTRN.<size> <Vd>, <Vm>")] [Test, Pairwise, Description("VTRN.<size> <Vd>, <Vm>")]
public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm, public void Vtrn([Values(0u, 1u, 2u, 3u)] uint vm,
[Values(0u, 1u, 2u, 3u)] uint vd, [Values(0u, 1u, 2u, 3u)] uint vd,