diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 4010e3ab..f255b528 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -752,6 +752,10 @@ namespace ARMeilleure.Decoders SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, typeof(OpCode32AluUx)); // FP & SIMD + SetA32("111100111x110000xxx0001101x0xxx0", InstName.Aesd_V, InstEmit32.Aesd_V, typeof(OpCode32Simd)); + SetA32("111100111x110000xxx0001100x0xxx0", InstName.Aese_V, InstEmit32.Aese_V, typeof(OpCode32Simd)); + SetA32("111100111x110000xxx0001111x0xxx0", InstName.Aesimc_V, InstEmit32.Aesimc_V, typeof(OpCode32Simd)); + SetA32("111100111x110000xxx0001110x0xxx0", InstName.Aesmc_V, InstEmit32.Aesmc_V, typeof(OpCode32Simd)); SetA32("<<<<11101x110000xxxx10xx11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, typeof(OpCode32SimdRegS)); SetA32("111100111x11xx01xxxx0x110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, typeof(OpCode32SimdReg)); SetA32("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, typeof(OpCode32SimdReg)); @@ -782,6 +786,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS)); SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdDupGP)); SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem)); + SetA32("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, typeof(OpCode32SimdBinary)); SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdExt)); SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle)); SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 1. diff --git a/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs b/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs new file mode 100644 index 00000000..1cfce3ad --- /dev/null +++ b/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs @@ -0,0 +1,49 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + partial class InstEmit32 + { + public static void Aesd_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand n = GetVecA32(op.Qm); + + context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Decrypt), d, n)); + } + + public static void Aese_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand n = GetVecA32(op.Qm); + + context.Copy(d, context.Call(new _V128_V128_V128(SoftFallback.Encrypt), d, n)); + } + + public static void Aesimc_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand n = GetVecA32(op.Qm); + + context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.InverseMixColumns), n)); + } + + public static void Aesmc_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand n = GetVecA32(op.Qm); + + context.Copy(GetVec(op.Qd), context.Call(new _V128_V128(SoftFallback.MixColumns), n)); + } + } +} diff --git a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs index 3698f332..6505e834 100644 --- a/ARMeilleure/Instructions/InstEmitSimdLogical32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdLogical32.cs @@ -55,6 +55,18 @@ namespace ARMeilleure.Instructions } } + public static void Veor_I(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitVectorBinaryOpF32(context, Intrinsic.X86Pxor, Intrinsic.X86Pxor); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2)); + } + } + public static void Vorr_I(ArmEmitterContext context) { if (Optimizations.UseSse2) diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 5e92da0a..e217c6ec 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -547,6 +547,7 @@ namespace ARMeilleure.Instructions Vcvt, Vdiv, Vdup, + Veor, Vext, Vld1, Vld2, diff --git a/Ryujinx.Tests/Cpu/CpuTest32.cs b/Ryujinx.Tests/Cpu/CpuTest32.cs index a039d280..07354442 100644 --- a/Ryujinx.Tests/Cpu/CpuTest32.cs +++ b/Ryujinx.Tests/Cpu/CpuTest32.cs @@ -164,11 +164,11 @@ namespace Ryujinx.Tests.Cpu } } - protected void ExecuteOpcodes() + protected void ExecuteOpcodes(bool runUnicorn = true) { _translator.Execute(_context, _entryPoint); - if (_unicornAvailable) + if (_unicornAvailable && runUnicorn) { _unicornEmu.RunForCount((ulong)(_currAddress - _entryPoint - 4) / 4); } @@ -193,7 +193,8 @@ namespace Ryujinx.Tests.Cpu bool zero = false, bool negative = false, int fpscr = 0, - bool copyFpFlags = false) + bool copyFpFlags = false, + bool runUnicorn = true) { Opcode(opcode); if (copyFpFlags) @@ -202,7 +203,7 @@ namespace Ryujinx.Tests.Cpu } Opcode(0xe12fff1e); // BX LR SetContext(r0, r1, r2, r3, sp, v0, v1, v2, v3, v4, v5, v14, v15, overflow, carry, zero, negative, fpscr); - ExecuteOpcodes(); + ExecuteOpcodes(runUnicorn); return GetContext(); } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCrypto32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto32.cs new file mode 100644 index 00000000..0bcb7a1f --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCrypto32.cs @@ -0,0 +1,155 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using ARMeilleure.State; + +using NUnit.Framework; + +namespace Ryujinx.Tests.Cpu +{ + public class CpuTestSimdCrypto32 : CpuTest32 + { + [Test, Description("AESD.8 , ")] + public void Aesd_V([Values(0u)] uint rd, + [Values(2u)] uint rm, + [Values(0x7B5B546573745665ul)] ulong valueH, + [Values(0x63746F725D53475Dul)] ulong valueL, + [Random(2)] ulong roundKeyH, + [Random(2)] ulong roundKeyL, + [Values(0x8DCAB9BC035006BCul)] ulong resultH, + [Values(0x8F57161E00CAFD8Dul)] ulong resultL) + { + uint opcode = 0xf3b00340; // AESD.8 Q0, Q0 + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + + V128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH); + V128 v1 = MakeVectorE0E1(roundKeyL, roundKeyH); + + ExecutionContext context = SingleOpcode(opcode, v0: v0, v1: v1, runUnicorn: false); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); + }); + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(roundKeyL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(roundKeyH)); + }); + + // Unicorn does not yet support crypto instructions in A32. + // CompareAgainstUnicorn(); + } + + [Test, Description("AESE.8 , ")] + public void Aese_V([Values(0u)] uint rd, + [Values(2u)] uint rm, + [Values(0x7B5B546573745665ul)] ulong valueH, + [Values(0x63746F725D53475Dul)] ulong valueL, + [Random(2)] ulong roundKeyH, + [Random(2)] ulong roundKeyL, + [Values(0x8F92A04DFBED204Dul)] ulong resultH, + [Values(0x4C39B1402192A84Cul)] ulong resultL) + { + uint opcode = 0xf3b00300; // AESE.8 Q0, Q0 + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + + V128 v0 = MakeVectorE0E1(roundKeyL ^ valueL, roundKeyH ^ valueH); + V128 v1 = MakeVectorE0E1(roundKeyL, roundKeyH); + + ExecutionContext context = SingleOpcode(opcode, v0: v0, v1: v1, runUnicorn: false); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); + }); + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(roundKeyL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(roundKeyH)); + }); + + // Unicorn does not yet support crypto instructions in A32. + // CompareAgainstUnicorn(); + } + + [Test, Description("AESIMC.8 , ")] + public void Aesimc_V([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0x8DCAB9DC035006BCul)] ulong valueH, + [Values(0x8F57161E00CAFD8Dul)] ulong valueL, + [Values(0xD635A667928B5EAEul)] ulong resultH, + [Values(0xEEC9CC3BC55F5777ul)] ulong resultL) + { + uint opcode = 0xf3b003c0; // AESIMC.8 Q0, Q0 + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + + V128 v = MakeVectorE0E1(valueL, valueH); + + ExecutionContext context = SingleOpcode( + opcode, + v0: rm == 0u ? v : default(V128), + v1: rm == 2u ? v : default(V128), + runUnicorn: false); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); + }); + if (rm == 2u) + { + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(valueL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(valueH)); + }); + } + + // Unicorn does not yet support crypto instructions in A32. + // CompareAgainstUnicorn(); + } + + [Test, Description("AESMC.8 , ")] + public void Aesmc_V([Values(0u)] uint rd, + [Values(2u, 0u)] uint rm, + [Values(0x627A6F6644B109C8ul)] ulong valueH, + [Values(0x2B18330A81C3B3E5ul)] ulong valueL, + [Values(0x7B5B546573745665ul)] ulong resultH, + [Values(0x63746F725D53475Dul)] ulong resultL) + { + uint opcode = 0xf3b00380; // AESMC.8 Q0, Q0 + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + + V128 v = MakeVectorE0E1(valueL, valueH); + + ExecutionContext context = SingleOpcode( + opcode, + v0: rm == 0u ? v : default(V128), + v1: rm == 2u ? v : default(V128), + runUnicorn: false); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(0)), Is.EqualTo(resultL)); + Assert.That(GetVectorE1(context.GetV(0)), Is.EqualTo(resultH)); + }); + if (rm == 2u) + { + Assert.Multiple(() => + { + Assert.That(GetVectorE0(context.GetV(1)), Is.EqualTo(valueL)); + Assert.That(GetVectorE1(context.GetV(1)), Is.EqualTo(valueH)); + }); + } + + // Unicorn does not yet support crypto instructions in A32. + // CompareAgainstUnicorn(); + } + } +} diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs index 459127de..b6c05b10 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdLogical32.cs @@ -12,14 +12,16 @@ namespace Ryujinx.Tests.Cpu #if SimdLogical32 #region "ValueSource (Opcodes)" - private static uint[] _Vbif_Vbit_Vbsl_Vand_() + private static uint[] _Vbif_Vbit_Vbsl_Vand_Vorr_Veor_() { return new uint[] { 0xf3300110u, // VBIF D0, D0, D0 0xf3200110u, // VBIT D0, D0, D0 0xf3100110u, // VBSL D0, D0, D0 - 0xf2000110u // VAND D0, D0, D0 + 0xf2000110u, // VAND D0, D0, D0 + 0xf2200110u, // VORR D0, D0, D0 + 0xf3000110u // VEOR D0, D0, D0 }; } #endregion @@ -27,14 +29,14 @@ namespace Ryujinx.Tests.Cpu private const int RndCnt = 2; [Test, Pairwise] - public void Vbif_Vbit_Vbsl_Vand([ValueSource("_Vbif_Vbit_Vbsl_Vand_")] uint opcode, - [Range(0u, 4u)] uint rd, - [Range(0u, 4u)] uint rn, - [Range(0u, 4u)] uint rm, - [Random(RndCnt)] ulong z, - [Random(RndCnt)] ulong a, - [Random(RndCnt)] ulong b, - [Values] bool q) + public void Vbif_Vbit_Vbsl_Vand_Vorr_Veor([ValueSource("_Vbif_Vbit_Vbsl_Vand_Vorr_Veor_")] uint opcode, + [Range(0u, 4u)] uint rd, + [Range(0u, 4u)] uint rn, + [Range(0u, 4u)] uint rm, + [Random(RndCnt)] ulong z, + [Random(RndCnt)] ulong a, + [Random(RndCnt)] ulong b, + [Values] bool q) { if (q) {