diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index 8cf5c2c5..fa9666eb 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -2206,6 +2206,11 @@ namespace ChocolArm64.Instructions EmitAddLongPairwise(context, signed: true, accumulate: false); } + public static void Saddlv_V(ILEmitterCtx context) + { + EmitVectorLongAcrossVectorOpSx(context, () => context.Emit(OpCodes.Add)); + } + public static void Saddw_V(ILEmitterCtx context) { if (Optimizations.UseSse41) @@ -3041,21 +3046,7 @@ namespace ChocolArm64.Instructions public static void Uaddlv_V(ILEmitterCtx context) { - OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; - - int bytes = op.GetBitsCount() >> 3; - int elems = bytes >> op.Size; - - EmitVectorExtractZx(context, op.Rn, 0, op.Size); - - for (int index = 1; index < elems; index++) - { - EmitVectorExtractZx(context, op.Rn, index, op.Size); - - context.Emit(OpCodes.Add); - } - - EmitScalarSet(context, op.Rd, op.Size + 1); + EmitVectorLongAcrossVectorOpZx(context, () => context.Emit(OpCodes.Add)); } public static void Uaddw_V(ILEmitterCtx context) diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs index 573b8040..c8c8df74 100644 --- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs +++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs @@ -823,15 +823,29 @@ namespace ChocolArm64.Instructions public static void EmitVectorAcrossVectorOpSx(ILEmitterCtx context, Action emit) { - EmitVectorAcrossVectorOp(context, emit, true); + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false); } public static void EmitVectorAcrossVectorOpZx(ILEmitterCtx context, Action emit) { - EmitVectorAcrossVectorOp(context, emit, false); + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false); } - public static void EmitVectorAcrossVectorOp(ILEmitterCtx context, Action emit, bool signed) + public static void EmitVectorLongAcrossVectorOpSx(ILEmitterCtx context, Action emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true); + } + + public static void EmitVectorLongAcrossVectorOpZx(ILEmitterCtx context, Action emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true); + } + + public static void EmitVectorAcrossVectorOp( + ILEmitterCtx context, + Action emit, + bool signed, + bool isLong) { OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; @@ -847,7 +861,7 @@ namespace ChocolArm64.Instructions emit(); } - EmitScalarSet(context, op.Rd, op.Size); + EmitScalarSet(context, op.Rd, isLong ? op.Size + 1 : op.Size); } public static void EmitVectorPairwiseOpF(ILEmitterCtx context, Action emit) diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs index 819881ed..94272d51 100644 --- a/ChocolArm64/OpCodeTable.cs +++ b/ChocolArm64/OpCodeTable.cs @@ -439,6 +439,8 @@ namespace ChocolArm64 SetA64("0x001110<<100000011010xxxxxxxxxx", InstEmit.Sadalp_V, typeof(OpCodeSimd64)); SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstEmit.Saddl_V, typeof(OpCodeSimdReg64)); SetA64("0x001110<<100000001010xxxxxxxxxx", InstEmit.Saddlp_V, typeof(OpCodeSimd64)); + SetA64("000011100x110000001110xxxxxxxxxx", InstEmit.Saddlv_V, typeof(OpCodeSimd64)); + SetA64("01001110<<110000001110xxxxxxxxxx", InstEmit.Saddlv_V, typeof(OpCodeSimd64)); SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstEmit.Saddw_V, typeof(OpCodeSimdReg64)); SetA64("x00111100x100010000000xxxxxxxxxx", InstEmit.Scvtf_Gp, typeof(OpCodeSimdCvt64)); SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstEmit.Scvtf_Gp_Fixed, typeof(OpCodeSimdCvt64)); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index a62845fe..b446d953 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -117,6 +117,12 @@ namespace Ryujinx.Tests.Cpu 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; } + private static ulong[] _4H_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul }; + } + private static ulong[] _4H2S1D_() { return new ulong[] { 0x0000000000000000ul, 0x7FFF7FFF7FFF7FFFul, @@ -155,73 +161,16 @@ namespace Ryujinx.Tests.Cpu 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; } - private static IEnumerable _GenLeadingSigns8B_() + private static uint[] _W_() { - for (int cnt = 0; cnt <= 7; cnt++) - { - ulong rnd1 = GenLeadingSignsMinus8(cnt); - ulong rnd2 = GenLeadingSignsPlus8(cnt); - - yield return (rnd1 << 56) | (rnd1 << 48) | (rnd1 << 40) | (rnd1 << 32) | - (rnd1 << 24) | (rnd1 << 16) | (rnd1 << 08) | rnd1; - yield return (rnd2 << 56) | (rnd2 << 48) | (rnd2 << 40) | (rnd2 << 32) | - (rnd2 << 24) | (rnd2 << 16) | (rnd2 << 08) | rnd2; - } + return new uint[] { 0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu }; } - private static IEnumerable _GenLeadingSigns4H_() + private static ulong[] _X_() { - for (int cnt = 0; cnt <= 15; cnt++) - { - ulong rnd1 = GenLeadingSignsMinus16(cnt); - ulong rnd2 = GenLeadingSignsPlus16(cnt); - - yield return (rnd1 << 48) | (rnd1 << 32) | (rnd1 << 16) | rnd1; - yield return (rnd2 << 48) | (rnd2 << 32) | (rnd2 << 16) | rnd2; - } - } - - private static IEnumerable _GenLeadingSigns2S_() - { - for (int cnt = 0; cnt <= 31; cnt++) - { - ulong rnd1 = GenLeadingSignsMinus32(cnt); - ulong rnd2 = GenLeadingSignsPlus32(cnt); - - yield return (rnd1 << 32) | rnd1; - yield return (rnd2 << 32) | rnd2; - } - } - - private static IEnumerable _GenLeadingZeros8B_() - { - for (int cnt = 0; cnt <= 8; cnt++) - { - ulong rnd = GenLeadingZeros8(cnt); - - yield return (rnd << 56) | (rnd << 48) | (rnd << 40) | (rnd << 32) | - (rnd << 24) | (rnd << 16) | (rnd << 08) | rnd; - } - } - - private static IEnumerable _GenLeadingZeros4H_() - { - for (int cnt = 0; cnt <= 16; cnt++) - { - ulong rnd = GenLeadingZeros16(cnt); - - yield return (rnd << 48) | (rnd << 32) | (rnd << 16) | rnd; - } - } - - private static IEnumerable _GenLeadingZeros2S_() - { - for (int cnt = 0; cnt <= 32; cnt++) - { - ulong rnd = GenLeadingZeros32(cnt); - - yield return (rnd << 32) | rnd; - } + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; } private static IEnumerable _1H_F_() @@ -619,16 +568,82 @@ namespace Ryujinx.Tests.Cpu } } - private static uint[] _W_() + private static IEnumerable _GenLeadingSigns8B_() { - return new uint[] { 0x00000000u, 0x7FFFFFFFu, - 0x80000000u, 0xFFFFFFFFu }; + for (int cnt = 0; cnt <= 7; cnt++) + { + ulong rnd1 = GenLeadingSignsMinus8(cnt); + ulong rnd2 = GenLeadingSignsPlus8(cnt); + + yield return (rnd1 << 56) | (rnd1 << 48) | (rnd1 << 40) | (rnd1 << 32) | + (rnd1 << 24) | (rnd1 << 16) | (rnd1 << 08) | rnd1; + yield return (rnd2 << 56) | (rnd2 << 48) | (rnd2 << 40) | (rnd2 << 32) | + (rnd2 << 24) | (rnd2 << 16) | (rnd2 << 08) | rnd2; + } } - private static ulong[] _X_() + private static IEnumerable _GenLeadingSigns4H_() { - return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + for (int cnt = 0; cnt <= 15; cnt++) + { + ulong rnd1 = GenLeadingSignsMinus16(cnt); + ulong rnd2 = GenLeadingSignsPlus16(cnt); + + yield return (rnd1 << 48) | (rnd1 << 32) | (rnd1 << 16) | rnd1; + yield return (rnd2 << 48) | (rnd2 << 32) | (rnd2 << 16) | rnd2; + } + } + + private static IEnumerable _GenLeadingSigns2S_() + { + for (int cnt = 0; cnt <= 31; cnt++) + { + ulong rnd1 = GenLeadingSignsMinus32(cnt); + ulong rnd2 = GenLeadingSignsPlus32(cnt); + + yield return (rnd1 << 32) | rnd1; + yield return (rnd2 << 32) | rnd2; + } + } + + private static IEnumerable _GenLeadingZeros8B_() + { + for (int cnt = 0; cnt <= 8; cnt++) + { + ulong rnd = GenLeadingZeros8(cnt); + + yield return (rnd << 56) | (rnd << 48) | (rnd << 40) | (rnd << 32) | + (rnd << 24) | (rnd << 16) | (rnd << 08) | rnd; + } + } + + private static IEnumerable _GenLeadingZeros4H_() + { + for (int cnt = 0; cnt <= 16; cnt++) + { + ulong rnd = GenLeadingZeros16(cnt); + + yield return (rnd << 48) | (rnd << 32) | (rnd << 16) | rnd; + } + } + + private static IEnumerable _GenLeadingZeros2S_() + { + for (int cnt = 0; cnt <= 32; cnt++) + { + ulong rnd = GenLeadingZeros32(cnt); + + yield return (rnd << 32) | rnd; + } + } + + private static IEnumerable _GenPopCnt8B_() + { + for (ulong cnt = 0ul; cnt <= 255ul; cnt++) + { + yield return (cnt << 56) | (cnt << 48) | (cnt << 40) | (cnt << 32) | + (cnt << 24) | (cnt << 16) | (cnt << 08) | cnt; + } } #endregion @@ -1264,7 +1279,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CLS ., .")] public void Cls_V_8B_16B([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_GenLeadingSigns8B_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_GenLeadingSigns8B_")] [Random(RndCnt)] ulong a, [Values(0b0u, 0b1u)] uint q) // <8B, 16B> { @@ -1283,7 +1298,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CLS ., .")] public void Cls_V_4H_8H([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_GenLeadingSigns4H_")] [Random(RndCnt)] ulong z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_GenLeadingSigns4H_")] [Random(RndCnt)] ulong a, [Values(0b0u, 0b1u)] uint q) // <4H, 8H> { @@ -1302,7 +1317,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CLS ., .")] public void Cls_V_2S_4S([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_GenLeadingSigns2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong z, [ValueSource("_GenLeadingSigns2S_")] [Random(RndCnt)] ulong a, [Values(0b0u, 0b1u)] uint q) // <2S, 4S> { @@ -1321,7 +1336,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CLZ ., .")] public void Clz_V_8B_16B([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_GenLeadingZeros8B_")] [Random(RndCnt)] ulong z, + [ValueSource("_8B_")] [Random(RndCnt)] ulong z, [ValueSource("_GenLeadingZeros8B_")] [Random(RndCnt)] ulong a, [Values(0b0u, 0b1u)] uint q) // <8B, 16B> { @@ -1340,7 +1355,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CLZ ., .")] public void Clz_V_4H_8H([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_GenLeadingZeros4H_")] [Random(RndCnt)] ulong z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong z, [ValueSource("_GenLeadingZeros4H_")] [Random(RndCnt)] ulong a, [Values(0b0u, 0b1u)] uint q) // <4H, 8H> { @@ -1359,7 +1374,7 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CLZ ., .")] public void Clz_V_2S_4S([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_GenLeadingZeros2S_")] [Random(RndCnt)] ulong z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong z, [ValueSource("_GenLeadingZeros2S_")] [Random(RndCnt)] ulong a, [Values(0b0u, 0b1u)] uint q) // <2S, 4S> { @@ -1653,8 +1668,8 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CNT ., .")] public void Cnt_V_8B([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_8B_")] [Random(RndCnt)] ulong z, - [ValueSource("_8B_")] [Random(RndCnt)] ulong a) + [ValueSource("_8B_")] [Random(RndCnt)] ulong z, + [ValueSource("_GenPopCnt8B_")] [Random(RndCnt)] ulong a) { uint opcode = 0x0E205800; // CNT V0.8B, V0.8B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); @@ -1670,8 +1685,8 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("CNT ., .")] public void Cnt_V_16B([Values(0u)] uint rd, [Values(1u, 0u)] uint rn, - [ValueSource("_8B_")] [Random(RndCnt)] ulong z, - [ValueSource("_8B_")] [Random(RndCnt)] ulong a) + [ValueSource("_8B_")] [Random(RndCnt)] ulong z, + [ValueSource("_GenPopCnt8B_")] [Random(RndCnt)] ulong a) { uint opcode = 0x4E205800; // CNT V0.16B, V0.16B opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs index ff8e8027..8e205855 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt.cs @@ -14,6 +14,18 @@ namespace Ryujinx.Tests.Cpu #if SimdCvt #region "ValueSource (Types)" + private static uint[] _W_() + { + return new uint[] { 0x00000000u, 0x7FFFFFFFu, + 0x80000000u, 0xFFFFFFFFu }; + } + + private static ulong[] _X_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + private static IEnumerable _1S_F_WX_() { // int @@ -177,18 +189,6 @@ namespace Ryujinx.Tests.Cpu yield return rnd6; } } - - private static uint[] _W_() - { - return new uint[] { 0x00000000u, 0x7FFFFFFFu, - 0x80000000u, 0xFFFFFFFFu }; - } - - private static ulong[] _X_() - { - return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, - 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; - } #endregion #region "ValueSource (Opcodes)" diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs index 15581d69..ea372704 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs @@ -84,10 +84,11 @@ namespace Ryujinx.Tests.Cpu opcode |= (imm5 << 16); opcode |= ((q & 1) << 30); - ulong z = TestContext.CurrentContext.Random.NextULong(); + uint w31 = TestContext.CurrentContext.Random.NextUInt(); + ulong z = TestContext.CurrentContext.Random.NextULong(); Vector128 v0 = MakeVectorE0E1(z, z); - SingleOpcode(opcode, x1: wn, v0: v0); + SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); CompareAgainstUnicorn(); } @@ -100,10 +101,11 @@ namespace Ryujinx.Tests.Cpu uint opcode = 0x4E080C00; // DUP V0.2D, X0 opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); - ulong z = TestContext.CurrentContext.Random.NextULong(); + ulong x31 = TestContext.CurrentContext.Random.NextULong(); + ulong z = TestContext.CurrentContext.Random.NextULong(); Vector128 v0 = MakeVectorE0E1(z, z); - SingleOpcode(opcode, x1: xn, v0: v0); + SingleOpcode(opcode, x1: xn, x31: x31, v0: v0); CompareAgainstUnicorn(); } @@ -303,9 +305,10 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= (imm5 << 16); + uint w31 = TestContext.CurrentContext.Random.NextUInt(); Vector128 v0 = MakeVectorE0E1(z, z); - SingleOpcode(opcode, x1: wn, v0: v0); + SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); CompareAgainstUnicorn(); } @@ -325,9 +328,10 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= (imm5 << 16); + uint w31 = TestContext.CurrentContext.Random.NextUInt(); Vector128 v0 = MakeVectorE0E1(z, z); - SingleOpcode(opcode, x1: wn, v0: v0); + SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); CompareAgainstUnicorn(); } @@ -347,9 +351,10 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= (imm5 << 16); + uint w31 = TestContext.CurrentContext.Random.NextUInt(); Vector128 v0 = MakeVectorE0E1(z, z); - SingleOpcode(opcode, x1: wn, v0: v0); + SingleOpcode(opcode, x1: wn, x31: w31, v0: v0); CompareAgainstUnicorn(); } @@ -369,9 +374,10 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rn & 31) << 5) | ((rd & 31) << 0); opcode |= (imm5 << 16); + ulong x31 = TestContext.CurrentContext.Random.NextULong(); Vector128 v0 = MakeVectorE0E1(z, z); - SingleOpcode(opcode, x1: xn, v0: v0); + SingleOpcode(opcode, x1: xn, x31: x31, v0: v0); CompareAgainstUnicorn(); } diff --git a/Ryujinx.Tests/Ryujinx.Tests.csproj b/Ryujinx.Tests/Ryujinx.Tests.csproj index 9ddeb314..fd305a4a 100644 --- a/Ryujinx.Tests/Ryujinx.Tests.csproj +++ b/Ryujinx.Tests/Ryujinx.Tests.csproj @@ -27,8 +27,8 @@ - - + +