From c7387be0d296f54a6ad5678d25e2c0d4910b7da4 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Mon, 17 Sep 2018 06:54:05 +0200 Subject: [PATCH] Fix/Add 1+12 [Saturating] [Rounded] Shift Right Narrow (imm.) Instructions; add 14 Tests. Add 6 Tests for PR#405. Add 2 Tests for PR#412. (#409) * Update AOpCodeTable.cs * Update AInstEmitSimdShift.cs * Update CpuTestSimdShImm.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Create CpuTestSimdIns.cs * Update CpuTest.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update CpuTest.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update CpuTest.cs * Update CpuTestSimdReg.cs * Update CpuTestSimd.cs --- ChocolArm64/AOpCodeTable.cs | 12 + .../Instruction/AInstEmitSimdArithmetic.cs | 12 +- .../Instruction/AInstEmitSimdHelper.cs | 46 +- ChocolArm64/Instruction/AInstEmitSimdShift.cs | 243 +++++++--- Ryujinx.Tests/Cpu/CpuTest.cs | 125 +++++- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 420 ++++++++++-------- Ryujinx.Tests/Cpu/CpuTestSimdIns.cs | 74 +++ Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 307 ++++++++++++- Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs | 318 +++++++++++++ 9 files changed, 1254 insertions(+), 303 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdIns.cs diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index b053334f..fe3dce41 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -364,6 +364,7 @@ namespace ChocolArm64 SetA64("0x00111000100000000110xxxxxxxxxx", AInstEmit.Rev16_V, typeof(AOpCodeSimd)); SetA64("0x1011100x100000000010xxxxxxxxxx", AInstEmit.Rev32_V, typeof(AOpCodeSimd)); SetA64("0x001110<<100000000010xxxxxxxxxx", AInstEmit.Rev64_V, typeof(AOpCodeSimd)); + SetA64("0x00111100>>>xxx100011xxxxxxxxxx", AInstEmit.Rshrn_V, typeof(AOpCodeSimdShImm)); SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", AInstEmit.Rsubhn_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", AInstEmit.Saba_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", AInstEmit.Sabal_V, typeof(AOpCodeSimdReg)); @@ -409,7 +410,14 @@ namespace ChocolArm64 SetA64("01111110101xxxxx101101xxxxxxxxxx", AInstEmit.Sqrdmulh_S, typeof(AOpCodeSimdReg)); SetA64("0x101110011xxxxx101101xxxxxxxxxx", AInstEmit.Sqrdmulh_V, typeof(AOpCodeSimdReg)); SetA64("0x101110101xxxxx101101xxxxxxxxxx", AInstEmit.Sqrdmulh_V, typeof(AOpCodeSimdReg)); + SetA64("0101111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_S, typeof(AOpCodeSimdShImm)); SetA64("0x00111100>>>xxx100111xxxxxxxxxx", AInstEmit.Sqrshrn_V, typeof(AOpCodeSimdShImm)); + SetA64("0111111100>>>xxx100011xxxxxxxxxx", AInstEmit.Sqrshrun_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100011xxxxxxxxxx", AInstEmit.Sqrshrun_V, typeof(AOpCodeSimdShImm)); + SetA64("0101111100>>>xxx100101xxxxxxxxxx", AInstEmit.Sqshrn_S, typeof(AOpCodeSimdShImm)); + SetA64("0x00111100>>>xxx100101xxxxxxxxxx", AInstEmit.Sqshrn_V, typeof(AOpCodeSimdShImm)); + SetA64("0111111100>>>xxx100001xxxxxxxxxx", AInstEmit.Sqshrun_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100001xxxxxxxxxx", AInstEmit.Sqshrun_V, typeof(AOpCodeSimdShImm)); SetA64("01011110xx1xxxxx001011xxxxxxxxxx", AInstEmit.Sqsub_S, typeof(AOpCodeSimdReg)); SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", AInstEmit.Sqsub_V, typeof(AOpCodeSimdReg)); SetA64("01011110<<100001010010xxxxxxxxxx", AInstEmit.Sqxtn_S, typeof(AOpCodeSimd)); @@ -476,6 +484,10 @@ namespace ChocolArm64 SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Umull_V, typeof(AOpCodeSimdReg)); SetA64("01111110xx1xxxxx000011xxxxxxxxxx", AInstEmit.Uqadd_S, typeof(AOpCodeSimdReg)); SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", AInstEmit.Uqadd_V, typeof(AOpCodeSimdReg)); + SetA64("0111111100>>>xxx100111xxxxxxxxxx", AInstEmit.Uqrshrn_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100111xxxxxxxxxx", AInstEmit.Uqrshrn_V, typeof(AOpCodeSimdShImm)); + SetA64("0111111100>>>xxx100101xxxxxxxxxx", AInstEmit.Uqshrn_S, typeof(AOpCodeSimdShImm)); + SetA64("0x10111100>>>xxx100101xxxxxxxxxx", AInstEmit.Uqshrn_V, typeof(AOpCodeSimdShImm)); SetA64("01111110xx1xxxxx001011xxxxxxxxxx", AInstEmit.Uqsub_S, typeof(AOpCodeSimdReg)); SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", AInstEmit.Uqsub_V, typeof(AOpCodeSimdReg)); SetA64("01111110<<100001010010xxxxxxxxxx", AInstEmit.Uqxtn_S, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index b9aedd07..27a86d84 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -1199,22 +1199,22 @@ namespace ChocolArm64.Instruction public static void Sqxtn_S(AILEmitterCtx Context) { - EmitScalarSaturatingNarrowOpSxSx(Context, () => { }); + EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.ScalarSxSx); } public static void Sqxtn_V(AILEmitterCtx Context) { - EmitVectorSaturatingNarrowOpSxSx(Context, () => { }); + EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.VectorSxSx); } public static void Sqxtun_S(AILEmitterCtx Context) { - EmitScalarSaturatingNarrowOpSxZx(Context, () => { }); + EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.ScalarSxZx); } public static void Sqxtun_V(AILEmitterCtx Context) { - EmitVectorSaturatingNarrowOpSxZx(Context, () => { }); + EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.VectorSxZx); } public static void Srhadd_V(AILEmitterCtx Context) @@ -1455,12 +1455,12 @@ namespace ChocolArm64.Instruction public static void Uqxtn_S(AILEmitterCtx Context) { - EmitScalarSaturatingNarrowOpZxZx(Context, () => { }); + EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.ScalarZxZx); } public static void Uqxtn_V(AILEmitterCtx Context) { - EmitVectorSaturatingNarrowOpZxZx(Context, () => { }); + EmitSaturatingNarrowOp(Context, SaturatingNarrowFlags.VectorZxZx); } public static void Urhadd_V(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index cb884c1a..75a5a0d0 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -1004,56 +1004,14 @@ namespace ChocolArm64.Instruction ScalarSxSx = Scalar | SignedSrc | SignedDst, ScalarSxZx = Scalar | SignedSrc, - ScalarZxSx = Scalar | SignedDst, ScalarZxZx = Scalar, VectorSxSx = SignedSrc | SignedDst, VectorSxZx = SignedSrc, - VectorZxSx = SignedDst, VectorZxZx = 0 } - public static void EmitScalarSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarSxSx); - } - - public static void EmitScalarSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarSxZx); - } - - public static void EmitScalarSaturatingNarrowOpZxSx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarZxSx); - } - - public static void EmitScalarSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.ScalarZxZx); - } - - public static void EmitVectorSaturatingNarrowOpSxSx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorSxSx); - } - - public static void EmitVectorSaturatingNarrowOpSxZx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorSxZx); - } - - public static void EmitVectorSaturatingNarrowOpZxSx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorZxSx); - } - - public static void EmitVectorSaturatingNarrowOpZxZx(AILEmitterCtx Context, Action Emit) - { - EmitSaturatingNarrowOp(Context, Emit, SaturatingNarrowFlags.VectorZxZx); - } - - public static void EmitSaturatingNarrowOp(AILEmitterCtx Context, Action Emit, SaturatingNarrowFlags Flags) + public static void EmitSaturatingNarrowOp(AILEmitterCtx Context, SaturatingNarrowFlags Flags) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -1080,8 +1038,6 @@ namespace ChocolArm64.Instruction { EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); - Emit(); - EmitSatQ(Context, Op.Size, SignedSrc, SignedDst); EmitVectorInsertTmp(Context, Part + Index, Op.Size); diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs index 4dee53b9..127abf1d 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs @@ -10,6 +10,11 @@ namespace ChocolArm64.Instruction { static partial class AInstEmit { + public static void Rshrn_V(AILEmitterCtx Context) + { + EmitVectorShrImmNarrowOpZx(Context, Round: true); + } + public static void Shl_S(AILEmitterCtx Context) { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; @@ -45,9 +50,7 @@ namespace ChocolArm64.Instruction public static void Shrn_V(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - - EmitVectorShImmNarrowBinaryZx(Context, () => Context.Emit(OpCodes.Shr_Un), GetImmShr(Op)); + EmitVectorShrImmNarrowOpZx(Context, Round: false); } public static void Sli_V(AILEmitterCtx Context) @@ -85,26 +88,44 @@ namespace ChocolArm64.Instruction } } + public static void Sqrshrn_S(AILEmitterCtx Context) + { + EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + public static void Sqrshrn_V(AILEmitterCtx Context) { - AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } - int Shift = GetImmShr(Op); + public static void Sqrshrun_S(AILEmitterCtx Context) + { + EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } - long RoundConst = 1L << (Shift - 1); + public static void Sqrshrun_V(AILEmitterCtx Context) + { + EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } - Action Emit = () => - { - Context.EmitLdc_I8(RoundConst); + public static void Sqshrn_S(AILEmitterCtx Context) + { + EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } - Context.Emit(OpCodes.Add); + public static void Sqshrn_V(AILEmitterCtx Context) + { + EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } - Context.EmitLdc_I4(Shift); + public static void Sqshrun_S(AILEmitterCtx Context) + { + EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } - Context.Emit(OpCodes.Shr); - }; - - EmitVectorSaturatingNarrowOpSxSx(Context, Emit); + public static void Sqshrun_V(AILEmitterCtx Context) + { + EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorSxZx); } public static void Srshr_S(AILEmitterCtx Context) @@ -159,6 +180,26 @@ namespace ChocolArm64.Instruction EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); } + public static void Uqrshrn_S(AILEmitterCtx Context) + { + EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqrshrn_V(AILEmitterCtx Context) + { + EmitRoundShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + + public static void Uqshrn_S(AILEmitterCtx Context) + { + EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + + public static void Uqshrn_V(AILEmitterCtx Context) + { + EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + public static void Urshr_S(AILEmitterCtx Context) { EmitScalarShrImmOpZx(Context, ShrImmFlags.Round); @@ -367,6 +408,138 @@ namespace ChocolArm64.Instruction } } + private static void EmitVectorShrImmNarrowOpZx(AILEmitterCtx Context, bool Round) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + int Elems = 8 >> Op.Size; + + int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; + + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + + for (int Index = 0; Index < Elems; Index++) + { + EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); + + if (Round) + { + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + } + + Context.EmitLdc_I4(Shift); + + Context.Emit(OpCodes.Shr_Un); + + EmitVectorInsertTmp(Context, Part + Index, Op.Size); + } + + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if (Part == 0) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(AILEmitterCtx Context, ShrImmSaturatingNarrowFlags Flags) + { + EmitShrImmSaturatingNarrowOp(Context, ShrImmSaturatingNarrowFlags.Round | Flags); + } + + private static void EmitShrImmSaturatingNarrowOp(AILEmitterCtx Context, ShrImmSaturatingNarrowFlags Flags) + { + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + bool Scalar = (Flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool SignedSrc = (Flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool SignedDst = (Flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool Round = (Flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + int Shift = GetImmShr(Op); + + long RoundConst = 1L << (Shift - 1); + + int Elems = !Scalar ? 8 >> Op.Size : 1; + + int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; + + if (Scalar) + { + EmitVectorZeroLowerTmp(Context); + } + + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + + for (int Index = 0; Index < Elems; Index++) + { + EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, SignedSrc); + + if (Op.Size <= 1 || !Round) + { + if (Round) + { + Context.EmitLdc_I8(RoundConst); + + Context.Emit(OpCodes.Add); + } + + Context.EmitLdc_I4(Shift); + + Context.Emit(SignedSrc ? OpCodes.Shr : OpCodes.Shr_Un); + } + else /* if (Op.Size == 2 && Round) */ + { + EmitShrImm_64(Context, SignedSrc, RoundConst, Shift); // Shift <= 32 + } + + EmitSatQ(Context, Op.Size, SignedSrc, SignedDst); + + EmitVectorInsertTmp(Context, Part + Index, Op.Size); + } + + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + + if (Part == 0) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + // Dst_64 = (Int(Src_64, Signed) + RoundConst) >> Shift; private static void EmitShrImm_64( AILEmitterCtx Context, @@ -374,11 +547,6 @@ namespace ChocolArm64.Instruction long RoundConst, int Shift) { - if (((AOpCodeSimd)Context.CurrOp).Size < 3) - { - throw new InvalidOperationException(); - } - Context.EmitLdc_I8(RoundConst); Context.EmitLdc_I4(Shift); @@ -387,41 +555,6 @@ namespace ChocolArm64.Instruction : nameof(ASoftFallback.UnsignedShrImm_64)); } - private static void EmitVectorShImmNarrowBinarySx(AILEmitterCtx Context, Action Emit, int Imm) - { - EmitVectorShImmNarrowBinaryOp(Context, Emit, Imm, true); - } - - private static void EmitVectorShImmNarrowBinaryZx(AILEmitterCtx Context, Action Emit, int Imm) - { - EmitVectorShImmNarrowBinaryOp(Context, Emit, Imm, false); - } - - private static void EmitVectorShImmNarrowBinaryOp(AILEmitterCtx Context, Action Emit, int Imm, bool Signed) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int Elems = 8 >> Op.Size; - - int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; - - for (int Index = 0; Index < Elems; Index++) - { - EmitVectorExtract(Context, Op.Rn, Index, Op.Size + 1, Signed); - - Context.EmitLdc_I4(Imm); - - Emit(); - - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); - } - - if (Part == 0) - { - EmitVectorZeroUpper(Context, Op.Rd); - } - } - private static void EmitVectorShImmWidenBinarySx(AILEmitterCtx Context, Action Emit, int Imm) { EmitVectorShImmWidenBinaryOp(Context, Emit, Imm, true); diff --git a/Ryujinx.Tests/Cpu/CpuTest.cs b/Ryujinx.Tests/Cpu/CpuTest.cs index 1f7151ff..c69c7a02 100644 --- a/Ryujinx.Tests/Cpu/CpuTest.cs +++ b/Ryujinx.Tests/Cpu/CpuTest.cs @@ -93,6 +93,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V0 = default(Vector128), Vector128 V1 = default(Vector128), Vector128 V2 = default(Vector128), + Vector128 V3 = default(Vector128), bool Overflow = false, bool Carry = false, bool Zero = false, bool Negative = false, int Fpcr = 0x0, int Fpsr = 0x0) { @@ -106,6 +107,7 @@ namespace Ryujinx.Tests.Cpu Thread.ThreadState.V0 = V0; Thread.ThreadState.V1 = V1; Thread.ThreadState.V2 = V2; + Thread.ThreadState.V3 = V3; Thread.ThreadState.Overflow = Overflow; Thread.ThreadState.Carry = Carry; @@ -127,6 +129,7 @@ namespace Ryujinx.Tests.Cpu UnicornEmu.Q[0] = V0; UnicornEmu.Q[1] = V1; UnicornEmu.Q[2] = V2; + UnicornEmu.Q[3] = V3; UnicornEmu.OverflowFlag = Overflow; UnicornEmu.CarryFlag = Carry; @@ -162,13 +165,14 @@ namespace Ryujinx.Tests.Cpu Vector128 V0 = default(Vector128), Vector128 V1 = default(Vector128), Vector128 V2 = default(Vector128), + Vector128 V3 = default(Vector128), bool Overflow = false, bool Carry = false, bool Zero = false, bool Negative = false, int Fpcr = 0x0, int Fpsr = 0x0) { this.Opcode(Opcode); this.Opcode(0xD4200000); // BRK #0 this.Opcode(0xD65F03C0); // RET - SetThreadState(X0, X1, X2, X3, X31, V0, V1, V2, Overflow, Carry, Zero, Negative, Fpcr, Fpsr); + SetThreadState(X0, X1, X2, X3, X31, V0, V1, V2, V3, Overflow, Carry, Zero, Negative, Fpcr, Fpsr); ExecuteOpcodes(); return GetThreadState(); @@ -195,13 +199,30 @@ namespace Ryujinx.Tests.Cpu QC = 1 << 27 } - protected void CompareAgainstUnicorn(FPSR FpsrMask = FPSR.None) + protected enum FpSkips { None, IfNaN_S, IfNaN_D }; + + protected enum FpUseTolerance { None, OneUlps_S, OneUlps_D }; + + protected void CompareAgainstUnicorn( + FPSR FpsrMask = FPSR.None, + FpSkips FpSkips = FpSkips.None, + FpUseTolerance FpUseTolerance = FpUseTolerance.None) { if (!UnicornAvailable) { return; } + if (FpSkips == FpSkips.IfNaN_S && float.IsNaN(VectorExtractSingle(UnicornEmu.Q[0], (byte)0))) + { + Assert.Ignore("NaN test."); + } + + if (FpSkips == FpSkips.IfNaN_D && double.IsNaN(VectorExtractDouble(UnicornEmu.Q[0], (byte)0))) + { + Assert.Ignore("NaN test."); + } + Assert.That(Thread.ThreadState.X0, Is.EqualTo(UnicornEmu.X[0])); Assert.That(Thread.ThreadState.X1, Is.EqualTo(UnicornEmu.X[1])); Assert.That(Thread.ThreadState.X2, Is.EqualTo(UnicornEmu.X[2])); @@ -236,7 +257,51 @@ namespace Ryujinx.Tests.Cpu Assert.That(Thread.ThreadState.X31, Is.EqualTo(UnicornEmu.SP)); - Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); + if (FpUseTolerance == FpUseTolerance.None) + { + Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); + } + else + { + if (!Is.EqualTo(UnicornEmu.Q[0]).ApplyTo(Thread.ThreadState.V0).IsSuccess) + { + if (FpUseTolerance == FpUseTolerance.OneUlps_S) + { + if (float.IsNormal (VectorExtractSingle(UnicornEmu.Q[0], (byte)0)) || + float.IsSubnormal(VectorExtractSingle(UnicornEmu.Q[0], (byte)0))) + { + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)0), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)0)).Within(1).Ulps); + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)1), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)1)).Within(1).Ulps); + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)2), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)2)).Within(1).Ulps); + Assert.That (VectorExtractSingle(Thread.ThreadState.V0, (byte)3), + Is.EqualTo(VectorExtractSingle(UnicornEmu.Q[0], (byte)3)).Within(1).Ulps); + } + else + { + Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); + } + } + + if (FpUseTolerance == FpUseTolerance.OneUlps_D) + { + if (double.IsNormal (VectorExtractDouble(UnicornEmu.Q[0], (byte)0)) || + double.IsSubnormal(VectorExtractDouble(UnicornEmu.Q[0], (byte)0))) + { + Assert.That (VectorExtractDouble(Thread.ThreadState.V0, (byte)0), + Is.EqualTo(VectorExtractDouble(UnicornEmu.Q[0], (byte)0)).Within(1).Ulps); + Assert.That (VectorExtractDouble(Thread.ThreadState.V0, (byte)1), + Is.EqualTo(VectorExtractDouble(UnicornEmu.Q[0], (byte)1)).Within(1).Ulps); + } + else + { + Assert.That(Thread.ThreadState.V0, Is.EqualTo(UnicornEmu.Q[0])); + } + } + } + } Assert.That(Thread.ThreadState.V1, Is.EqualTo(UnicornEmu.Q[1])); Assert.That(Thread.ThreadState.V2, Is.EqualTo(UnicornEmu.Q[2])); Assert.That(Thread.ThreadState.V3, Is.EqualTo(UnicornEmu.Q[3])); @@ -310,6 +375,18 @@ namespace Ryujinx.Tests.Cpu return Sse.StaticCast(Sse2.SetVector128(BitConverter.DoubleToInt64Bits(E1), 0)); } + protected static float VectorExtractSingle(Vector128 Vector, byte Index) + { + if (!Sse41.IsSupported) + { + throw new PlatformNotSupportedException(); + } + + int Value = Sse41.Extract(Sse.StaticCast(Vector), Index); + + return BitConverter.Int32BitsToSingle(Value); + } + protected static double VectorExtractDouble(Vector128 Vector, byte Index) { if (!Sse41.IsSupported) @@ -371,5 +448,47 @@ namespace Ryujinx.Tests.Cpu return Sse41.Extract(Sse.StaticCast(Vector), (byte)1); } + + protected static uint GenNormal_S() + { + uint Rnd; + + do Rnd = TestContext.CurrentContext.Random.NextUInt(); + while ((Rnd & 0x7F800000u) == 0u || + (Rnd & 0x7F800000u) == 0x7F800000u); + + return Rnd; + } + + protected static uint GenSubNormal_S() + { + uint Rnd; + + do Rnd = TestContext.CurrentContext.Random.NextUInt(); + while ((Rnd & 0x007FFFFFu) == 0u); + + return Rnd & 0x807FFFFFu; + } + + protected static ulong GenNormal_D() + { + ulong Rnd; + + do Rnd = TestContext.CurrentContext.Random.NextULong(); + while ((Rnd & 0x7FF0000000000000ul) == 0ul || + (Rnd & 0x7FF0000000000000ul) == 0x7FF0000000000000ul); + + return Rnd; + } + + protected static ulong GenSubNormal_D() + { + ulong Rnd; + + do Rnd = TestContext.CurrentContext.Random.NextULong(); + while ((Rnd & 0x000FFFFFFFFFFFFFul) == 0ul); + + return Rnd & 0x800FFFFFFFFFFFFFul; + } } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index ec0cd104..b423b4de 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -4,6 +4,7 @@ using ChocolArm64.State; using NUnit.Framework; +using System.Collections.Generic; using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu @@ -13,7 +14,7 @@ namespace Ryujinx.Tests.Cpu { #if Simd -#region "ValueSource" +#region "ValueSource (Types)" private static ulong[] _1B1H1S1D_() { return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful, @@ -78,78 +79,183 @@ namespace Ryujinx.Tests.Cpu 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; } - private static ulong[] _1S_F_() + private static IEnumerable _1S_F_() { - return new ulong[] + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max SubNormal + yield return 0x0000000080000001ul; // -Min SubNormal + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max SubNormal + yield return 0x0000000000000001ul; // +Min SubNormal + + if (!NoZeros) { - 0x00000000FFFFFFFFul, // -QNaN (all ones payload) - 0x00000000FFBFFFFFul, // -SNaN (all ones payload) - 0x00000000FF800000ul, // -INF - 0x00000000FF7FFFFFul, // -Max Normal, float.MinValue - 0x0000000080800000ul, // -Min Normal - 0x00000000807FFFFFul, // -Max SubNormal - 0x0000000080000001ul, // -Min SubNormal - 0x0000000080000000ul, // -0 - 0x0000000000000000ul, // +0 - 0x0000000000000001ul, // +Min SubNormal - 0x00000000007FFFFFul, // +Max SubNormal - 0x0000000000800000ul, // +Min Normal - 0x000000007F7FFFFFul, // +Max Normal, float.MaxValue - 0x000000007F800000ul, // +INF - 0x000000007FBFFFFFul, // +SNaN (all ones payload) - 0x000000007FFFFFFFul // +QNaN (all ones payload) + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFFFFFFFul; // -QNaN (all ones payload) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FFFFFFFul; // +QNaN (all ones payload) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong Rnd1 = GenNormal_S(); + ulong Rnd2 = GenSubNormal_S(); + + yield return (Grbg << 32) | Rnd1; + yield return (Grbg << 32) | Rnd2; + } + } + + private static IEnumerable _2S_F_() + { + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x8080000080800000ul; // -Min Normal + yield return 0x807FFFFF807FFFFFul; // -Max SubNormal + yield return 0x8000000180000001ul; // -Min SubNormal + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0080000000800000ul; // +Min Normal + yield return 0x007FFFFF007FFFFFul; // +Max SubNormal + yield return 0x0000000100000001ul; // +Min SubNormal + + if (!NoZeros) + { + yield return 0x8000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFF800000FF800000ul; // -Infinity + yield return 0x7F8000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FFFFFFF7FFFFFFFul; // +QNaN (all ones payload) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Rnd1 = GenNormal_S(); + ulong Rnd2 = GenSubNormal_S(); + + yield return (Rnd1 << 32) | Rnd1; + yield return (Rnd2 << 32) | Rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max SubNormal + yield return 0x8000000000000001ul; // -Min SubNormal + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max SubNormal + yield return 0x0000000000000001ul; // +Min SubNormal + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FFFFFFFFFFFFFFFul; // +QNaN (all ones payload) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Rnd1 = GenNormal_D(); + ulong Rnd2 = GenSubNormal_D(); + + yield return Rnd1; + yield return Rnd2; + } + } +#endregion + +#region "ValueSource (Opcodes)" + private static uint[] _F_Cvt_NZ_SU_S_S_() + { + return new uint[] + { + 0x5E21A820u, // FCVTNS S0, S1 + 0x7E21A820u, // FCVTNU S0, S1 + 0x5EA1B820u, // FCVTZS S0, S1 + 0x7EA1B820u // FCVTZU S0, S1 }; } - private static ulong[] _2S_F_() + private static uint[] _F_Cvt_NZ_SU_S_D_() { - return new ulong[] + return new uint[] { - 0xFFFFFFFFFFFFFFFFul, // -QNaN (all ones payload) - 0xFFBFFFFFFFBFFFFFul, // -SNaN (all ones payload) - 0xFF800000FF800000ul, // -INF - 0xFF7FFFFFFF7FFFFFul, // -Max Normal, float.MinValue - 0x8080000080800000ul, // -Min Normal - 0x807FFFFF807FFFFFul, // -Max SubNormal - 0x8000000180000001ul, // -Min SubNormal - 0x8000000080000000ul, // -0 - 0x0000000000000000ul, // +0 - 0x0000000100000001ul, // +Min SubNormal - 0x007FFFFF007FFFFFul, // +Max SubNormal - 0x0080000000800000ul, // +Min Normal - 0x7F7FFFFF7F7FFFFFul, // +Max Normal, float.MaxValue - 0x7F8000007F800000ul, // +INF - 0x7FBFFFFF7FBFFFFFul, // +SNaN (all ones payload) - 0x7FFFFFFF7FFFFFFFul // +QNaN (all ones payload) + 0x5E61A820u, // FCVTNS D0, D1 + 0x7E61A820u, // FCVTNU D0, D1 + 0x5EE1B820u, // FCVTZS D0, D1 + 0x7EE1B820u // FCVTZU D0, D1 }; } - private static ulong[] _1D_F_() + private static uint[] _F_Cvt_NZ_SU_V_2S_4S_() { - return new ulong[] + return new uint[] { - 0xFFFFFFFFFFFFFFFFul, // -QNaN (all ones payload) - 0xFFF7FFFFFFFFFFFFul, // -SNaN (all ones payload) - 0xFFF0000000000000ul, // -INF - 0xFFEFFFFFFFFFFFFFul, // -Max Normal, double.MinValue - 0x8010000000000000ul, // -Min Normal - 0x800FFFFFFFFFFFFFul, // -Max SubNormal - 0x8000000000000001ul, // -Min SubNormal - 0x8000000000000000ul, // -0 - 0x0000000000000000ul, // +0 - 0x0000000000000001ul, // +Min SubNormal - 0x000FFFFFFFFFFFFFul, // +Max SubNormal - 0x0010000000000000ul, // +Min Normal - 0x7FEFFFFFFFFFFFFFul, // +Max Normal, double.MaxValue - 0x7FF0000000000000ul, // +INF - 0x7FF7FFFFFFFFFFFFul, // +SNaN (all ones payload) - 0x7FFFFFFFFFFFFFFFul // +QNaN (all ones payload) + 0x0E21A800u, // FCVTNS V0.2S, V0.2S + 0x2E21A800u, // FCVTNU V0.2S, V0.2S + 0x0EA1B800u, // FCVTZS V0.2S, V0.2S + 0x2EA1B800u // FCVTZU V0.2S, V0.2S + }; + } + + private static uint[] _F_Cvt_NZ_SU_V_2D_() + { + return new uint[] + { + 0x4E61A800u, // FCVTNS V0.2D, V0.2D + 0x6E61A800u, // FCVTNU V0.2D, V0.2D + 0x4EE1B800u, // FCVTZS V0.2D, V0.2D + 0x6EE1B800u // FCVTZU V0.2D, V0.2D }; } #endregion private const int RndCnt = 2; + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + [Test, Pairwise, Description("ABS , ")] public void Abs_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -645,176 +751,104 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("FCVTNS , ")] - public void Fcvtns_S_S([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_1S_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_1S_F_")] [Random(RndCnt)] ulong A) + [Test, Pairwise, Description("FCVT
, ")] + public void Fcvt_S_SD([ValueSource("_1S_F_")] ulong A) { + //const int DNFlagBit = 25; // Default NaN mode control bit. //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - uint Opcode = 0x5E21A800; // FCVTNS S0, S0 - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + uint Opcode = 0x1E22C020; // FCVT D0, S1 - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); + Vector128 V1 = MakeVectorE0(A); + //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. + //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); + + CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC*/); + } + + [Test, Pairwise, Description("FCVT , ")] + public void Fcvt_S_DS([ValueSource("_1D_F_")] ulong A) + { + uint Opcode = 0x1E624020; // FCVT S0, D1 + + ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void F_Cvt_NZ_SU_S_S([ValueSource("_F_Cvt_NZ_SU_S_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A) + { + //const int FZFlagBit = 24; // Flush-to-zero mode control bit. + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1/*, Fpcr: Fpcr*/); CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); } - [Test, Pairwise, Description("FCVTNS , ")] - public void Fcvtns_S_D([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong A) + [Test, Pairwise] + public void F_Cvt_NZ_SU_S_D([ValueSource("_F_Cvt_NZ_SU_S_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A) { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - - uint Opcode = 0x5E61A800; // FCVTNS D0, D0 - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); - - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. - - Vector128 V0 = MakeVectorE0E1(Z, Z); + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); Vector128 V1 = MakeVectorE0(A); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); + CompareAgainstUnicorn(); } - [Test, Pairwise, Description("FCVTNS ., .")] - public void Fcvtns_V_2S_4S([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_2S_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_2S_F_")] [Random(RndCnt)] ulong A, - [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + [Test, Pairwise] + public void F_Cvt_NZ_SU_V_2S_4S([ValueSource("_F_Cvt_NZ_SU_V_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_2S_F_")] ulong Z, + [ValueSource("_2S_F_")] ulong A, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - - uint Opcode = 0x0E21A800; // FCVTNS V0.2S, V0.2S - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); - Opcode |= ((Q & 1) << 30); - - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Q & 1) << 30); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0E1(A, A * Q); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); + CompareAgainstUnicorn(); } - [Test, Pairwise, Description("FCVTNS ., .")] - public void Fcvtns_V_2D([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong A) + [Test, Pairwise] + public void F_Cvt_NZ_SU_V_2D([ValueSource("_F_Cvt_NZ_SU_V_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A) { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - - uint Opcode = 0x4E61A800; // FCVTNS V0.2D, V0.2D - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); - - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0E1(A, A); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); - } - - [Test, Pairwise, Description("FCVTNU , ")] - public void Fcvtnu_S_S([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_1S_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_1S_F_")] [Random(RndCnt)] ulong A) - { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - - uint Opcode = 0x7E21A800; // FCVTNU S0, S0 - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); - - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. - - Vector128 V0 = MakeVectorE0E1(Z, Z); - Vector128 V1 = MakeVectorE0(A); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); - - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); - } - - [Test, Pairwise, Description("FCVTNU , ")] - public void Fcvtnu_S_D([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong A) - { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - - uint Opcode = 0x7E61A800; // FCVTNU D0, D0 - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); - - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. - - Vector128 V0 = MakeVectorE0E1(Z, Z); - Vector128 V1 = MakeVectorE0(A); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); - - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); - } - - [Test, Pairwise, Description("FCVTNU ., .")] - public void Fcvtnu_V_2S_4S([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_2S_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_2S_F_")] [Random(RndCnt)] ulong A, - [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> - { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - - uint Opcode = 0x2E21A800; // FCVTNU V0.2S, V0.2S - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); - Opcode |= ((Q & 1) << 30); - - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. - - Vector128 V0 = MakeVectorE0E1(Z, Z); - Vector128 V1 = MakeVectorE0E1(A, A * Q); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); - - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); - } - - [Test, Pairwise, Description("FCVTNU ., .")] - public void Fcvtnu_V_2D([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong Z, - [ValueSource("_1D_F_")] [Random(RndCnt)] ulong A) - { - //const int FZFlagBit = 24; // Flush-to-zero mode control bit. - - uint Opcode = 0x6E61A800; // FCVTNU V0.2D, V0.2D - Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); - - //int Fpcr = 1 << FZFlagBit; // Flush-to-zero mode enabled. - - Vector128 V0 = MakeVectorE0E1(Z, Z); - Vector128 V1 = MakeVectorE0E1(A, A); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1/*, Fpcr: Fpcr*/); - - CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IXC | FPSR.IOC*/); + CompareAgainstUnicorn(); } [Test, Pairwise, Description("NEG , ")] diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs new file mode 100644 index 00000000..387cdf5d --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs @@ -0,0 +1,74 @@ +#define SimdIns + +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdIns")] // Tested: second half of 2018. + public sealed class CpuTestSimdIns : CpuTest + { +#if SimdIns + +#region "ValueSource" + private static uint[] _W_() + { + return new uint[] { 0x00000000u, 0x0000007Fu, + 0x00000080u, 0x000000FFu, + 0x00007FFFu, 0x00008000u, + 0x0000FFFFu, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu }; + } + + private static ulong[] _X_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } +#endregion + + private const int RndCnt = 2; + + [Test, Pairwise, Description("DUP ., ")] + public void Dup_Gp_W([Values(0u)] uint Rd, + [Values(1u, 31u)] uint Rn, + [ValueSource("_W_")] [Random(RndCnt)] uint Wn, + [Values(0, 1, 2)] int Size, // Q0: <8B, 4H, 2S> + [Values(0b0u, 0b1u)] uint Q) // Q1: <16B, 8H, 4S> + { + uint Imm5 = (1U << Size) & 0x1F; + + uint Opcode = 0x0E000C00; // RESERVED + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (Imm5 << 16); + Opcode |= ((Q & 1) << 30); + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + + AThreadState ThreadState = SingleOpcode(Opcode, X1: Wn, V0: V0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("DUP ., ")] + public void Dup_Gp_X([Values(0u)] uint Rd, + [Values(1u, 31u)] uint Rn, + [ValueSource("_X_")] [Random(RndCnt)] ulong Xn) + { + uint Opcode = 0x4E080C00; // DUP V0.2D, X0 + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + + AThreadState ThreadState = SingleOpcode(Opcode, X1: Xn, V0: V0); + + CompareAgainstUnicorn(); + } +#endif + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index b7150db3..ae409a6d 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -4,6 +4,7 @@ using ChocolArm64.State; using NUnit.Framework; +using System.Collections.Generic; using System.Runtime.Intrinsics; namespace Ryujinx.Tests.Cpu @@ -13,7 +14,7 @@ namespace Ryujinx.Tests.Cpu { #if SimdReg -#region "ValueSource" +#region "ValueSource (Types)" private static ulong[] _1B1H1S1D_() { return new ulong[] { 0x0000000000000000ul, 0x000000000000007Ful, @@ -76,10 +77,188 @@ namespace Ryujinx.Tests.Cpu 0x8000000080000000ul, 0x7FFFFFFFFFFFFFFFul, 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; } + + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max SubNormal + yield return 0x0000000080000001ul; // -Min SubNormal + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max SubNormal + yield return 0x0000000000000001ul; // +Min SubNormal + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFFFFFFFul; // -QNaN (all ones payload) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FFFFFFFul; // +QNaN (all ones payload) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong Rnd1 = GenNormal_S(); + ulong Rnd2 = GenSubNormal_S(); + + yield return (Grbg << 32) | Rnd1; + yield return (Grbg << 32) | Rnd2; + } + } + + private static IEnumerable _2S_F_() + { + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x8080000080800000ul; // -Min Normal + yield return 0x807FFFFF807FFFFFul; // -Max SubNormal + yield return 0x8000000180000001ul; // -Min SubNormal + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0080000000800000ul; // +Min Normal + yield return 0x007FFFFF007FFFFFul; // +Max SubNormal + yield return 0x0000000100000001ul; // +Min SubNormal + + if (!NoZeros) + { + yield return 0x8000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFF800000FF800000ul; // -Infinity + yield return 0x7F8000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FFFFFFF7FFFFFFFul; // +QNaN (all ones payload) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Rnd1 = GenNormal_S(); + ulong Rnd2 = GenSubNormal_S(); + + yield return (Rnd1 << 32) | Rnd1; + yield return (Rnd2 << 32) | Rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max SubNormal + yield return 0x8000000000000001ul; // -Min SubNormal + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max SubNormal + yield return 0x0000000000000001ul; // +Min SubNormal + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFFFFFFFFFFFFFFFul; // -QNaN (all ones payload) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FFFFFFFFFFFFFFFul; // +QNaN (all ones payload) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Rnd1 = GenNormal_D(); + ulong Rnd2 = GenSubNormal_D(); + + yield return Rnd1; + yield return Rnd2; + } + } +#endregion + +#region "ValueSource (Opcodes)" + private static uint[] _F_Max_Min_Nm_S_S_() + { + return new uint[] + { + 0x1E224820u, // FMAX S0, S1, S2 + 0x1E226820u, // FMAXNM S0, S1, S2 + 0x1E225820u, // FMIN S0, S1, S2 + 0x1E227820u // FMINNM S0, S1, S2 + }; + } + + private static uint[] _F_Max_Min_Nm_S_D_() + { + return new uint[] + { + 0x1E624820u, // FMAX D0, D1, D2 + 0x1E626820u, // FMAXNM D0, D1, D2 + 0x1E625820u, // FMIN D0, D1, D2 + 0x1E627820u // FMINNM D0, D1, D2 + }; + } + + private static uint[] _F_Max_Min_Nm_P_V_2S_4S_() + { + return new uint[] + { + 0x0E20F400u, // FMAX V0.2S, V0.2S, V0.2S + 0x0E20C400u, // FMAXNM V0.2S, V0.2S, V0.2S + 0x2E20F400u, // FMAXP V0.2S, V0.2S, V0.2S + 0x0EA0F400u, // FMIN V0.2S, V0.2S, V0.2S + 0x0EA0C400u, // FMINNM V0.2S, V0.2S, V0.2S + 0x2EA0F400u // FMINP V0.2S, V0.2S, V0.2S + }; + } + + private static uint[] _F_Max_Min_Nm_P_V_2D_() + { + return new uint[] + { + 0x4E60F400u, // FMAX V0.2D, V0.2D, V0.2D + 0x4E60C400u, // FMAXNM V0.2D, V0.2D, V0.2D + 0x6E60F400u, // FMAXP V0.2D, V0.2D, V0.2D + 0x4EE0F400u, // FMIN V0.2D, V0.2D, V0.2D + 0x4EE0C400u, // FMINNM V0.2D, V0.2D, V0.2D + 0x6EE0F400u // FMINP V0.2D, V0.2D, V0.2D + }; + } #endregion private const int RndCnt = 2; + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + [Test, Pairwise, Description("ADD , , ")] public void Add_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -856,6 +1035,132 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + [Test, Pairwise, Description("FMADD , , , ")] + public void Fmadd_S_S([ValueSource("_1S_F_")] ulong A, + [ValueSource("_1S_F_")] ulong B, + [ValueSource("_1S_F_")] ulong C) + { + //const int DNFlagBit = 25; // Default NaN mode control bit. + //const int FZFlagBit = 24; // Flush-to-zero mode control bit. + + uint Opcode = 0x1F020C20; // FMADD S0, S1, S2, S3 + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + Vector128 V3 = MakeVectorE0(C); + + //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. + //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, V3: V3/*, Fpcr: Fpcr*/); + + CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC, */FpSkips: FpSkips.IfNaN_S/*, FpUseTolerance: FpUseTolerance.OneUlps_S*/); + } + + [Test, Pairwise, Description("FMADD
, , , ")] + public void Fmadd_S_D([ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B, + [ValueSource("_1D_F_")] ulong C) + { + uint Opcode = 0x1F420C20; // FMADD D0, D1, D2, D3 + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + Vector128 V3 = MakeVectorE0(C); + + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2, V3: V3); + + CompareAgainstUnicorn(FpSkips: FpSkips.IfNaN_D/*, FpUseTolerance: FpUseTolerance.OneUlps_D*/); + } + + [Test, Pairwise] + public void F_Max_Min_Nm_S_S([ValueSource("_F_Max_Min_Nm_S_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A, + [ValueSource("_1S_F_")] ulong B) + { + //const int DNFlagBit = 25; // Default NaN mode control bit. + //const int FZFlagBit = 24; // Flush-to-zero mode control bit. + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + + //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. + //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2/*, Fpcr: Fpcr*/); + + CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC*/); + } + + [Test, Pairwise] + public void F_Max_Min_Nm_S_D([ValueSource("_F_Max_Min_Nm_S_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B) + { + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0(B); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void F_Max_Min_Nm_P_V_2S_4S([ValueSource("_F_Max_Min_Nm_P_V_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_F_")] ulong Z, + [ValueSource("_2S_F_")] ulong A, + [ValueSource("_2S_F_")] ulong B, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + //const int DNFlagBit = 25; // Default NaN mode control bit. + //const int FZFlagBit = 24; // Flush-to-zero mode control bit. + + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * Q); + + //int Fpcr = 1 << DNFlagBit; // Any operation involving one or more NaNs returns the Default NaN. + //Fpcr |= 1 << FZFlagBit; // Flush-to-zero mode enabled. + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2/*, Fpcr: Fpcr*/); + + CompareAgainstUnicorn(/*FpsrMask: FPSR.IDC | FPSR.IOC*/); + } + + [Test, Pairwise] + public void F_Max_Min_Nm_P_V_2D([ValueSource("_F_Max_Min_Nm_P_V_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B) + { + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); + + CompareAgainstUnicorn(); + } + [Test, Pairwise, Description("ORN ., ., .")] public void Orn_V_8B([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs index 77285222..edc50d4d 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdShImm.cs @@ -20,6 +20,18 @@ namespace Ryujinx.Tests.Cpu 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; } + private static ulong[] _1H_() + { + return new ulong[] { 0x0000000000000000ul, 0x0000000000007FFFul, + 0x0000000000008000ul, 0x000000000000FFFFul }; + } + + private static ulong[] _1S_() + { + return new ulong[] { 0x0000000000000000ul, 0x000000007FFFFFFFul, + 0x0000000080000000ul, 0x00000000FFFFFFFFul }; + } + private static ulong[] _2S_() { return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul, @@ -114,6 +126,111 @@ namespace Ryujinx.Tests.Cpu 0x6F401400u // USRA V0.2D, V0.2D, #64 }; } + + private static uint[] _ShrImmNarrow_V_8H8B_8H16B_() + { + return new uint[] + { + 0x0F088C00u, // RSHRN V0.8B, V0.8H, #8 + 0x0F088400u // SHRN V0.8B, V0.8H, #8 + }; + } + + private static uint[] _ShrImmNarrow_V_4S4H_4S8H_() + { + return new uint[] + { + 0x0F108C00u, // RSHRN V0.4H, V0.4S, #16 + 0x0F108400u // SHRN V0.4H, V0.4S, #16 + }; + } + + private static uint[] _ShrImmNarrow_V_2D2S_2D4S_() + { + return new uint[] + { + 0x0F208C00u, // RSHRN V0.2S, V0.2D, #32 + 0x0F208400u // SHRN V0.2S, V0.2D, #32 + }; + } + + private static uint[] _ShrImmSaturatingNarrow_S_HB_() + { + return new uint[] + { + 0x5F089C00u, // SQRSHRN B0, H0, #8 + 0x7F089C00u, // UQRSHRN B0, H0, #8 + 0x7F088C00u, // SQRSHRUN B0, H0, #8 + 0x5F089400u, // SQSHRN B0, H0, #8 + 0x7F089400u, // UQSHRN B0, H0, #8 + 0x7F088400u // SQSHRUN B0, H0, #8 + }; + } + + private static uint[] _ShrImmSaturatingNarrow_S_SH_() + { + return new uint[] + { + 0x5F109C00u, // SQRSHRN H0, S0, #16 + 0x7F109C00u, // UQRSHRN H0, S0, #16 + 0x7F108C00u, // SQRSHRUN H0, S0, #16 + 0x5F109400u, // SQSHRN H0, S0, #16 + 0x7F109400u, // UQSHRN H0, S0, #16 + 0x7F108400u // SQSHRUN H0, S0, #16 + }; + } + + private static uint[] _ShrImmSaturatingNarrow_S_DS_() + { + return new uint[] + { + 0x5F209C00u, // SQRSHRN S0, D0, #32 + 0x7F209C00u, // UQRSHRN S0, D0, #32 + 0x7F208C00u, // SQRSHRUN S0, D0, #32 + 0x5F209400u, // SQSHRN S0, D0, #32 + 0x7F209400u, // UQSHRN S0, D0, #32 + 0x7F208400u // SQSHRUN S0, D0, #32 + }; + } + + private static uint[] _ShrImmSaturatingNarrow_V_8H8B_8H16B_() + { + return new uint[] + { + 0x0F089C00u, // SQRSHRN V0.8B, V0.8H, #8 + 0x2F089C00u, // UQRSHRN V0.8B, V0.8H, #8 + 0x2F088C00u, // SQRSHRUN V0.8B, V0.8H, #8 + 0x0F089400u, // SQSHRN V0.8B, V0.8H, #8 + 0x2F089400u, // UQSHRN V0.8B, V0.8H, #8 + 0x2F088400u // SQSHRUN V0.8B, V0.8H, #8 + }; + } + + private static uint[] _ShrImmSaturatingNarrow_V_4S4H_4S8H_() + { + return new uint[] + { + 0x0F109C00u, // SQRSHRN V0.4H, V0.4S, #16 + 0x2F109C00u, // UQRSHRN V0.4H, V0.4S, #16 + 0x2F108C00u, // SQRSHRUN V0.4H, V0.4S, #16 + 0x0F109400u, // SQSHRN V0.4H, V0.4S, #16 + 0x2F109400u, // UQSHRN V0.4H, V0.4S, #16 + 0x2F108400u // SQSHRUN V0.4H, V0.4S, #16 + }; + } + + private static uint[] _ShrImmSaturatingNarrow_V_2D2S_2D4S_() + { + return new uint[] + { + 0x0F209C00u, // SQRSHRN V0.2S, V0.2D, #32 + 0x2F209C00u, // UQRSHRN V0.2S, V0.2D, #32 + 0x2F208C00u, // SQRSHRUN V0.2S, V0.2D, #32 + 0x0F209400u, // SQSHRN V0.2S, V0.2D, #32 + 0x2F209400u, // UQSHRN V0.2S, V0.2D, #32 + 0x2F208400u // SQSHRUN V0.2S, V0.2D, #32 + }; + } #endregion private const int RndCnt = 2; @@ -339,6 +456,207 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise] + public void ShrImmNarrow_V_8H8B_8H16B([ValueSource("_ShrImmNarrow_V_8H8B_8H16B_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong A, + [Range(1u, 8u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <8H8B, 8H16B> + { + uint ImmHB = (16 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImmNarrow_V_4S4H_4S8H([ValueSource("_ShrImmNarrow_V_4S4H_4S8H_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong A, + [Range(1u, 16u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <4S4H, 4S8H> + { + uint ImmHB = (32 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImmNarrow_V_2D2S_2D4S([ValueSource("_ShrImmNarrow_V_2D2S_2D4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Range(1u, 32u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <2D2S, 2D4S> + { + uint ImmHB = (64 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] + public void ShrImmSaturatingNarrow_S_HB([ValueSource("_ShrImmSaturatingNarrow_S_HB_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1H_")] [Random(RndCnt)] ulong A, + [Range(1u, 8u)] uint Shift) + { + uint ImmHB = (16 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(FpsrMask: FPSR.QC); + } + + [Test, Pairwise] + public void ShrImmSaturatingNarrow_S_SH([ValueSource("_ShrImmSaturatingNarrow_S_SH_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1S_")] [Random(RndCnt)] ulong A, + [Range(1u, 16u)] uint Shift) + { + uint ImmHB = (32 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(FpsrMask: FPSR.QC); + } + + [Test, Pairwise] + public void ShrImmSaturatingNarrow_S_DS([ValueSource("_ShrImmSaturatingNarrow_S_DS_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Range(1u, 32u)] uint Shift) + { + uint ImmHB = (64 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(FpsrMask: FPSR.QC); + } + + [Test, Pairwise] + public void ShrImmSaturatingNarrow_V_8H8B_8H16B([ValueSource("_ShrImmSaturatingNarrow_V_8H8B_8H16B_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong A, + [Range(1u, 8u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <8H8B, 8H16B> + { + uint ImmHB = (16 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(FpsrMask: FPSR.QC); + } + + [Test, Pairwise] + public void ShrImmSaturatingNarrow_V_4S4H_4S8H([ValueSource("_ShrImmSaturatingNarrow_V_4S4H_4S8H_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong A, + [Range(1u, 16u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <4S4H, 4S8H> + { + uint ImmHB = (32 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(FpsrMask: FPSR.QC); + } + + [Test, Pairwise] + public void ShrImmSaturatingNarrow_V_2D2S_2D4S([ValueSource("_ShrImmSaturatingNarrow_V_2D2S_2D4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Range(1u, 32u)] uint Shift, + [Values(0b0u, 0b1u)] uint Q) // <2D2S, 2D4S> + { + uint ImmHB = (64 - Shift) & 0x7F; + + Opcodes |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (ImmHB << 16); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); + + CompareAgainstUnicorn(FpsrMask: FPSR.QC); + } #endif } }