mirror of
https://github.com/GreemDev/Ryujinx.git
synced 2025-01-18 16:21:59 +00:00
Implement VRINT (vector) Arm32 NEON instructions (#3691)
This commit is contained in:
parent
1529e6cf0d
commit
4d69286a9c
4 changed files with 144 additions and 2 deletions
|
@ -791,7 +791,7 @@ namespace ARMeilleure.Decoders
|
||||||
SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCode32AluUx.Create);
|
SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCode32AluUx.Create);
|
||||||
SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCode32AluUx.Create);
|
SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCode32AluUx.Create);
|
||||||
SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, OpCode32AluUx.Create);
|
SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, OpCode32AluUx.Create);
|
||||||
|
|
||||||
// VFP
|
// VFP
|
||||||
SetVfp("<<<<11101x110000xxxx101x11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
|
SetVfp("<<<<11101x110000xxxx101x11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32);
|
||||||
SetVfp("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
|
SetVfp("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32);
|
||||||
|
@ -959,6 +959,10 @@ namespace ARMeilleure.Decoders
|
||||||
SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create);
|
SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create);
|
||||||
SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, OpCode32SimdRev.Create);
|
SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, OpCode32SimdRev.Create);
|
||||||
SetA32("1111001x0x<<xxxxxxxx0001xxx0xxxx", InstName.Vrhadd, InstEmit32.Vrhadd, OpCode32SimdReg.Create);
|
SetA32("1111001x0x<<xxxxxxxx0001xxx0xxxx", InstName.Vrhadd, InstEmit32.Vrhadd, OpCode32SimdReg.Create);
|
||||||
|
SetA32("111100111x111010xxxx01010xx0xxxx", InstName.Vrinta, InstEmit32.Vrinta_V, OpCode32SimdCmpZ.Create);
|
||||||
|
SetA32("111100111x111010xxxx01101xx0xxxx", InstName.Vrintm, InstEmit32.Vrintm_V, OpCode32SimdCmpZ.Create);
|
||||||
|
SetA32("111100111x111010xxxx01000xx0xxxx", InstName.Vrintn, InstEmit32.Vrintn_V, OpCode32SimdCmpZ.Create);
|
||||||
|
SetA32("111100111x111010xxxx01111xx0xxxx", InstName.Vrintp, InstEmit32.Vrintp_V, OpCode32SimdCmpZ.Create);
|
||||||
SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create);
|
SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create);
|
||||||
SetA32("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create);
|
SetA32("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create);
|
||||||
SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create);
|
SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create);
|
||||||
|
|
|
@ -323,6 +323,60 @@ namespace ARMeilleure.Instructions
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VRINTA (vector).
|
||||||
|
public static void Vrinta_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
|
||||||
|
}
|
||||||
|
|
||||||
|
// VRINTM (vector).
|
||||||
|
public static void Vrintm_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpSimd32(context, (m) =>
|
||||||
|
{
|
||||||
|
return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// VRINTN (vector).
|
||||||
|
public static void Vrintn_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpSimd32(context, (m) =>
|
||||||
|
{
|
||||||
|
return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// VRINTP (vector).
|
||||||
|
public static void Vrintp_V(ArmEmitterContext context)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseSse2)
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpSimd32(context, (m) =>
|
||||||
|
{
|
||||||
|
return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
|
||||||
|
});
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// VRINTZ (floating-point).
|
// VRINTZ (floating-point).
|
||||||
public static void Vrint_Z(ArmEmitterContext context)
|
public static void Vrint_Z(ArmEmitterContext context)
|
||||||
{
|
{
|
||||||
|
|
|
@ -636,6 +636,10 @@ namespace ARMeilleure.Instructions
|
||||||
Vrev,
|
Vrev,
|
||||||
Vrhadd,
|
Vrhadd,
|
||||||
Vrint,
|
Vrint,
|
||||||
|
Vrinta,
|
||||||
|
Vrintm,
|
||||||
|
Vrintn,
|
||||||
|
Vrintp,
|
||||||
Vrintx,
|
Vrintx,
|
||||||
Vrshr,
|
Vrshr,
|
||||||
Vrshrn,
|
Vrshrn,
|
||||||
|
|
|
@ -13,6 +13,16 @@ namespace Ryujinx.Tests.Cpu
|
||||||
#if SimdCvt32
|
#if SimdCvt32
|
||||||
|
|
||||||
#region "ValueSource (Opcodes)"
|
#region "ValueSource (Opcodes)"
|
||||||
|
private static uint[] _Vrint_AMNP_V_F32_()
|
||||||
|
{
|
||||||
|
return new uint[]
|
||||||
|
{
|
||||||
|
0xf3ba0500u, // VRINTA.F32 Q0, Q0
|
||||||
|
0xf3ba0680u, // VRINTM.F32 Q0, Q0
|
||||||
|
0xf3ba0400u, // VRINTN.F32 Q0, Q0
|
||||||
|
0xf3ba0780u // VRINTP.F32 Q0, Q0
|
||||||
|
};
|
||||||
|
}
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
#region "ValueSource (Types)"
|
#region "ValueSource (Types)"
|
||||||
|
@ -64,6 +74,47 @@ namespace Ryujinx.Tests.Cpu
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static IEnumerable<ulong> _2S_F_()
|
||||||
|
{
|
||||||
|
yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue)
|
||||||
|
yield return 0x8080000080800000ul; // -Min Normal
|
||||||
|
yield return 0x807FFFFF807FFFFFul; // -Max Subnormal
|
||||||
|
yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon)
|
||||||
|
yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue)
|
||||||
|
yield return 0x0080000000800000ul; // +Min Normal
|
||||||
|
yield return 0x007FFFFF007FFFFFul; // +Max Subnormal
|
||||||
|
yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon)
|
||||||
|
|
||||||
|
if (!NoZeros)
|
||||||
|
{
|
||||||
|
yield return 0x8000000080000000ul; // -Zero
|
||||||
|
yield return 0x0000000000000000ul; // +Zero
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!NoInfs)
|
||||||
|
{
|
||||||
|
yield return 0xFF800000FF800000ul; // -Infinity
|
||||||
|
yield return 0x7F8000007F800000ul; // +Infinity
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!NoNaNs)
|
||||||
|
{
|
||||||
|
yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN)
|
||||||
|
yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload)
|
||||||
|
yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
|
||||||
|
yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int cnt = 1; cnt <= RndCnt; cnt++)
|
||||||
|
{
|
||||||
|
ulong rnd1 = GenNormalS();
|
||||||
|
ulong rnd2 = GenSubnormalS();
|
||||||
|
|
||||||
|
yield return (rnd1 << 32) | rnd1;
|
||||||
|
yield return (rnd2 << 32) | rnd2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static IEnumerable<ulong> _1D_F_()
|
private static IEnumerable<ulong> _1D_F_()
|
||||||
{
|
{
|
||||||
yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue)
|
yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue)
|
||||||
|
@ -224,6 +275,35 @@ namespace Ryujinx.Tests.Cpu
|
||||||
CompareAgainstUnicorn();
|
CompareAgainstUnicorn();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Test, Pairwise] [Explicit]
|
||||||
|
public void Vrint_AMNP_V_F32([ValueSource(nameof(_Vrint_AMNP_V_F32_))] uint opcode,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rd,
|
||||||
|
[Values(0u, 1u, 2u, 3u)] uint rm,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d0,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d1,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d2,
|
||||||
|
[ValueSource(nameof(_2S_F_))] ulong d3,
|
||||||
|
[Values] bool q)
|
||||||
|
{
|
||||||
|
if (q)
|
||||||
|
{
|
||||||
|
opcode |= 1 << 6;
|
||||||
|
|
||||||
|
rd >>= 1; rd <<= 1;
|
||||||
|
rm >>= 1; rm <<= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
|
||||||
|
opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1);
|
||||||
|
|
||||||
|
V128 v0 = MakeVectorE0E1(d0, d1);
|
||||||
|
V128 v1 = MakeVectorE0E1(d2, d3);
|
||||||
|
|
||||||
|
SingleOpcode(opcode, v0: v0, v1: v1);
|
||||||
|
|
||||||
|
CompareAgainstUnicorn();
|
||||||
|
}
|
||||||
|
|
||||||
[Test, Pairwise, Description("VRINTX.F<size> <Sd>, <Sm>")]
|
[Test, Pairwise, Description("VRINTX.F<size> <Sd>, <Sm>")]
|
||||||
public void Vrintx_S([Values(0u, 1u)] uint rd,
|
public void Vrintx_S([Values(0u, 1u)] uint rd,
|
||||||
[Values(0u, 1u)] uint rm,
|
[Values(0u, 1u)] uint rm,
|
||||||
|
@ -253,7 +333,7 @@ namespace Ryujinx.Tests.Cpu
|
||||||
}
|
}
|
||||||
|
|
||||||
opcode |= ((size & 3) << 8);
|
opcode |= ((size & 3) << 8);
|
||||||
|
|
||||||
int fpscr = (int)rMode << (int)Fpcr.RMode;
|
int fpscr = (int)rMode << (int)Fpcr.RMode;
|
||||||
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, fpscr: fpscr);
|
SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, fpscr: fpscr);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue