Implement VRINT (vector) Arm32 NEON instructions (#3691)

2024-12-22 22:25:44 +00:00 · 2022-09-11 12:44:27 -03:00 · 2022-09-11 12:44:27 -03:00 · 4d69286a9c
commit 4d69286a9c
parent 1529e6cf0d
4 changed files with 144 additions and 2 deletions
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@ -791,7 +791,7 @@ namespace ARMeilleure.Decoders
            SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb,    InstEmit32.Uxtb,    OpCode32AluUx.Create);
            SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16,  InstEmit32.Uxtb16,  OpCode32AluUx.Create);
            SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth,    InstEmit32.Uxth,    OpCode32AluUx.Create);
-            
+
            // VFP
            SetVfp("<<<<11101x110000xxxx101x11x0xxxx", InstName.Vabs,   InstEmit32.Vabs_S,   OpCode32SimdS.Create,           OpCode32SimdS.CreateT32);
            SetVfp("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd,   InstEmit32.Vadd_S,   OpCode32SimdRegS.Create,        OpCode32SimdRegS.CreateT32);
@ -959,6 +959,10 @@ namespace ARMeilleure.Decoders
            SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps,      InstEmit32.Vrecps,      OpCode32SimdReg.Create);
            SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev,        InstEmit32.Vrev,        OpCode32SimdRev.Create);
            SetA32("1111001x0x<<xxxxxxxx0001xxx0xxxx", InstName.Vrhadd,      InstEmit32.Vrhadd,      OpCode32SimdReg.Create);
+            SetA32("111100111x111010xxxx01010xx0xxxx", InstName.Vrinta,      InstEmit32.Vrinta_V,    OpCode32SimdCmpZ.Create);
+            SetA32("111100111x111010xxxx01101xx0xxxx", InstName.Vrintm,      InstEmit32.Vrintm_V,    OpCode32SimdCmpZ.Create);
+            SetA32("111100111x111010xxxx01000xx0xxxx", InstName.Vrintn,      InstEmit32.Vrintn_V,    OpCode32SimdCmpZ.Create);
+            SetA32("111100111x111010xxxx01111xx0xxxx", InstName.Vrintp,      InstEmit32.Vrintp_V,    OpCode32SimdCmpZ.Create);
            SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr,       InstEmit32.Vrshr,       OpCode32SimdShImm.Create);
            SetA32("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn,      InstEmit32.Vrshrn,      OpCode32SimdShImmNarrow.Create);
            SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte,     InstEmit32.Vrsqrte,     OpCode32SimdSqrte.Create);
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@ -323,6 +323,60 @@ namespace ARMeilleure.Instructions
            }
        }

+        // VRINTA (vector).
+        public static void Vrinta_V(ArmEmitterContext context)
+        {
+            EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m));
+        }
+
+        // VRINTM (vector).
+        public static void Vrintm_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                EmitVectorUnaryOpSimd32(context, (m) =>
+                {
+                    return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity)));
+                });
+            }
+            else
+            {
+                EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m));
+            }
+        }
+
+        // VRINTN (vector).
+        public static void Vrintn_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                EmitVectorUnaryOpSimd32(context, (m) =>
+                {
+                    return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
+                });
+            }
+            else
+            {
+                EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m));
+            }
+        }
+
+        // VRINTP (vector).
+        public static void Vrintp_V(ArmEmitterContext context)
+        {
+            if (Optimizations.UseSse2)
+            {
+                EmitVectorUnaryOpSimd32(context, (m) =>
+                {
+                    return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity)));
+                });
+            }
+            else
+            {
+                EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m));
+            }
+        }
+
        // VRINTZ (floating-point).
        public static void Vrint_Z(ArmEmitterContext context)
        {
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@ -636,6 +636,10 @@ namespace ARMeilleure.Instructions
        Vrev,
        Vrhadd,
        Vrint,
+        Vrinta,
+        Vrintm,
+        Vrintn,
+        Vrintp,
        Vrintx,
        Vrshr,
        Vrshrn,
--- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs
@ -13,6 +13,16 @@ namespace Ryujinx.Tests.Cpu
 #if SimdCvt32

 #region "ValueSource (Opcodes)"
+        private static uint[] _Vrint_AMNP_V_F32_()
+        {
+            return new uint[]
+            {
+                0xf3ba0500u, // VRINTA.F32 Q0, Q0
+                0xf3ba0680u, // VRINTM.F32 Q0, Q0
+                0xf3ba0400u, // VRINTN.F32 Q0, Q0
+                0xf3ba0780u  // VRINTP.F32 Q0, Q0
+            };
+        }
 #endregion

 #region "ValueSource (Types)"
@ -64,6 +74,47 @@ namespace Ryujinx.Tests.Cpu
            }
        }

+        private static IEnumerable<ulong> _2S_F_()
+        {
+            yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal    (float.MinValue)
+            yield return 0x8080000080800000ul; // -Min Normal
+            yield return 0x807FFFFF807FFFFFul; // -Max Subnormal
+            yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon)
+            yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal    (float.MaxValue)
+            yield return 0x0080000000800000ul; // +Min Normal
+            yield return 0x007FFFFF007FFFFFul; // +Max Subnormal
+            yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon)
+
+            if (!NoZeros)
+            {
+                yield return 0x8000000080000000ul; // -Zero
+                yield return 0x0000000000000000ul; // +Zero
+            }
+
+            if (!NoInfs)
+            {
+                yield return 0xFF800000FF800000ul; // -Infinity
+                yield return 0x7F8000007F800000ul; // +Infinity
+            }
+
+            if (!NoNaNs)
+            {
+                yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN)
+                yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones  payload)
+                yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
+                yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones  payload)
+            }
+
+            for (int cnt = 1; cnt <= RndCnt; cnt++)
+            {
+                ulong rnd1 = GenNormalS();
+                ulong rnd2 = GenSubnormalS();
+
+                yield return (rnd1 << 32) | rnd1;
+                yield return (rnd2 << 32) | rnd2;
+            }
+        }
+
        private static IEnumerable<ulong> _1D_F_()
        {
            yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal    (double.MinValue)
@ -224,6 +275,35 @@ namespace Ryujinx.Tests.Cpu
            CompareAgainstUnicorn();
        }

+        [Test, Pairwise] [Explicit]
+        public void Vrint_AMNP_V_F32([ValueSource(nameof(_Vrint_AMNP_V_F32_))] uint opcode,
+                                     [Values(0u, 1u, 2u, 3u)] uint rd,
+                                     [Values(0u, 1u, 2u, 3u)] uint rm,
+                                     [ValueSource(nameof(_2S_F_))] ulong d0,
+                                     [ValueSource(nameof(_2S_F_))] ulong d1,
+                                     [ValueSource(nameof(_2S_F_))] ulong d2,
+                                     [ValueSource(nameof(_2S_F_))] ulong d3,
+                                     [Values] bool q)
+        {
+            if (q)
+            {
+                opcode |= 1 << 6;
+
+                rd >>= 1; rd <<= 1;
+                rm >>= 1; rm <<= 1;
+            }
+
+            opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18);
+            opcode |= ((rm & 0xf) << 0)  | ((rm & 0x10) << 1);
+
+            V128 v0 = MakeVectorE0E1(d0, d1);
+            V128 v1 = MakeVectorE0E1(d2, d3);
+
+            SingleOpcode(opcode, v0: v0, v1: v1);
+
+            CompareAgainstUnicorn();
+        }
+
        [Test, Pairwise, Description("VRINTX.F<size> <Sd>, <Sm>")]
        public void Vrintx_S([Values(0u, 1u)] uint rd,
                             [Values(0u, 1u)] uint rm,
@ -253,7 +333,7 @@ namespace Ryujinx.Tests.Cpu
            }

            opcode |= ((size & 3) << 8);
-            
+
            int fpscr = (int)rMode << (int)Fpcr.RMode;
            SingleOpcode(opcode, v0: v0, v1: v1, v2: v2, fpscr: fpscr);