Add intrinsics support (#121)

* Initial intrinsics support * Update tests to work with the new Vector128 type and intrinsics * Drop SSE4.1 requirement * Fix copy-paste mistake
2018-05-11 20:10:27 -03:00 · 2018-05-11 20:10:27 -03:00 · f9f111bc85
commit f9f111bc85
parent 8e306b3ac1
36 changed files with 1658 additions and 1111 deletions
--- a/ChocolArm64/AOptimizations.cs
+++ b/ChocolArm64/AOptimizations.cs
@ -1,6 +1,12 @@
+using System.Runtime.Intrinsics.X86;
+
 public static class AOptimizations
 {
    public static bool DisableMemoryChecks = false;

    public static bool GenerateCallStack = true;
+
+    public static bool UseSse2IfAvailable = true;
+
+    internal static bool UseSse2 = UseSse2IfAvailable && Sse2.IsSupported;
 }
--- a/ChocolArm64/ChocolArm64.csproj
+++ b/ChocolArm64/ChocolArm64.csproj
@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
-    <TargetFramework>netcoreapp2.0</TargetFramework>
+    <TargetFramework>netcoreapp2.1</TargetFramework>
  </PropertyGroup>

  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
@ -12,4 +12,8 @@
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
  </PropertyGroup>

+  <ItemGroup>
+    <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
+  </ItemGroup>
+
 </Project>
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@ -4,6 +4,7 @@ using ChocolArm64.Translation;
 using System;
 using System.Reflection;
 using System.Reflection.Emit;
+using System.Runtime.Intrinsics.X86;

 using static ChocolArm64.Instruction.AInstEmitSimdHelper;

@ -41,7 +42,14 @@ namespace ChocolArm64.Instruction

        public static void Add_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2Call(Context, nameof(Sse2.Add));
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add));
+            }
        }

        public static void Addhn_V(AILEmitterCtx Context)
@ -158,7 +166,7 @@ namespace ChocolArm64.Instruction

                Context.Emit(OpCodes.Conv_U1);

-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.CountSetBits8));

                Context.Emit(OpCodes.Conv_U8);

@ -303,12 +311,26 @@ namespace ChocolArm64.Instruction

        public static void Fadd_S(AILEmitterCtx Context)
        {
-            EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.AddScalar));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
+            }
        }

        public static void Fadd_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.Add));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add));
+            }
        }

        public static void Faddp_V(AILEmitterCtx Context)
@ -345,12 +367,26 @@ namespace ChocolArm64.Instruction

        public static void Fdiv_S(AILEmitterCtx Context)
        {
-            EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.DivideScalar));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
+            }
        }

        public static void Fdiv_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.Divide));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div));
+            }
        }

        public static void Fmadd_S(AILEmitterCtx Context)
@ -370,11 +406,11 @@ namespace ChocolArm64.Instruction
            {
                if (Op.Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
                }
                else if (Op.Size == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
                }
                else
                {
@ -391,11 +427,11 @@ namespace ChocolArm64.Instruction
            {
                if (Op.Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF));
                }
                else if (Op.Size == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max));
                }
                else
                {
@ -412,11 +448,11 @@ namespace ChocolArm64.Instruction
            {
                if (Op.Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
                }
                else if (Op.Size == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
                }
                else
                {
@ -435,11 +471,11 @@ namespace ChocolArm64.Instruction
            {
                if (SizeF == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF));
                }
                else if (SizeF == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min));
                }
                else
                {
@ -505,7 +541,14 @@ namespace ChocolArm64.Instruction

        public static void Fmul_S(AILEmitterCtx Context)
        {
-            EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.MultiplyScalar));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
+            }
        }

        public static void Fmul_Se(AILEmitterCtx Context)
@ -515,7 +558,14 @@ namespace ChocolArm64.Instruction

        public static void Fmul_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.Multiply));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul));
+            }
        }

        public static void Fmul_Ve(AILEmitterCtx Context)
@ -716,11 +766,11 @@ namespace ChocolArm64.Instruction

                if (Op.Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
                }
                else if (Op.Size == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
                }
                else
                {
@ -743,11 +793,11 @@ namespace ChocolArm64.Instruction

                if (SizeF == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
                }
                else if (SizeF == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
                }
                else
                {
@ -819,11 +869,11 @@ namespace ChocolArm64.Instruction

                if (Op.Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
                }
                else if (Op.Size == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
                }
                else
                {
@ -844,11 +894,11 @@ namespace ChocolArm64.Instruction

                if (Op.Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF));
                }
                else if (Op.Size == 1)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round));
                }
                else
                {
@ -947,12 +997,26 @@ namespace ChocolArm64.Instruction

        public static void Fsub_S(AILEmitterCtx Context)
        {
-            EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.SubtractScalar));
+            }
+            else
+            {
+                EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
+            }
        }

        public static void Fsub_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.Subtract));
+            }
+            else
+            {
+                EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub));
+            }
        }

        public static void Mla_V(AILEmitterCtx Context)
@ -1066,7 +1130,14 @@ namespace ChocolArm64.Instruction

        public static void Sub_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2Call(Context, nameof(Sse2.Subtract));
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub));
+            }
        }

        public static void Subhn_V(AILEmitterCtx Context)
--- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
@ -3,6 +3,7 @@ using ChocolArm64.State;
 using ChocolArm64.Translation;
 using System;
 using System.Reflection.Emit;
+using System.Runtime.Intrinsics.X86;

 using static ChocolArm64.Instruction.AInstEmitAluHelper;
 using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@ -13,17 +14,38 @@ namespace ChocolArm64.Instruction
    {
        public static void Cmeq_V(AILEmitterCtx Context)
        {
-            EmitVectorCmp(Context, OpCodes.Beq_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2Call(Context, nameof(Sse2.CompareEqual));
+            }
+            else
+            {
+                EmitVectorCmp(Context, OpCodes.Beq_S);
+            }
        }

        public static void Cmge_V(AILEmitterCtx Context)
        {
-            EmitVectorCmp(Context, OpCodes.Bge_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2Call(Context, nameof(Sse2.CompareGreaterThanOrEqual));
+            }
+            else
+            {
+                EmitVectorCmp(Context, OpCodes.Bge_S);
+            }
        }

        public static void Cmgt_V(AILEmitterCtx Context)
        {
-            EmitVectorCmp(Context, OpCodes.Bgt_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan));
+            }
+            else
+            {
+                EmitVectorCmp(Context, OpCodes.Bgt_S);
+            }
        }

        public static void Cmhi_V(AILEmitterCtx Context)
@ -112,32 +134,74 @@ namespace ChocolArm64.Instruction

        public static void Fcmeq_S(AILEmitterCtx Context)
        {
-            EmitScalarFcmp(Context, OpCodes.Beq_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.CompareEqualScalar));
+            }
+            else
+            {
+                EmitScalarFcmp(Context, OpCodes.Beq_S);
+            }
        }

        public static void Fcmeq_V(AILEmitterCtx Context)
        {
-            EmitVectorFcmp(Context, OpCodes.Beq_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.CompareEqual));
+            }
+            else
+            {
+                EmitVectorFcmp(Context, OpCodes.Beq_S);
+            }
        }

        public static void Fcmge_S(AILEmitterCtx Context)
        {
-            EmitScalarFcmp(Context, OpCodes.Bge_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqualScalar));
+            }
+            else
+            {
+                EmitScalarFcmp(Context, OpCodes.Bge_S);
+            }
        }

        public static void Fcmge_V(AILEmitterCtx Context)
        {
-            EmitVectorFcmp(Context, OpCodes.Bge_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqual));
+            }
+            else
+            {
+                EmitVectorFcmp(Context, OpCodes.Bge_S);
+            }
        }

        public static void Fcmgt_S(AILEmitterCtx Context)
        {
-            EmitScalarFcmp(Context, OpCodes.Bgt_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanScalar));
+            }
+            else
+            {
+                EmitScalarFcmp(Context, OpCodes.Bgt_S);
+            }
        }

        public static void Fcmgt_V(AILEmitterCtx Context)
        {
-            EmitVectorFcmp(Context, OpCodes.Bgt_S);
+            if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg)
+            {
+                EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThan));
+            }
+            else
+            {
+                EmitVectorFcmp(Context, OpCodes.Bgt_S);
+            }
        }

        public static void Fcmle_S(AILEmitterCtx Context)
--- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
@ -382,15 +382,15 @@ namespace ChocolArm64.Instruction

                if (SizeF == 0)
                {
-                    ASoftFallback.EmitCall(Context, Signed
-                        ? nameof(ASoftFallback.SatF32ToS32)
-                        : nameof(ASoftFallback.SatF32ToU32));
+                    AVectorHelper.EmitCall(Context, Signed
+                        ? nameof(AVectorHelper.SatF32ToS32)
+                        : nameof(AVectorHelper.SatF32ToU32));
                }
                else /* if (SizeF == 1) */
                {
-                    ASoftFallback.EmitCall(Context, Signed
-                        ? nameof(ASoftFallback.SatF64ToS64)
-                        : nameof(ASoftFallback.SatF64ToU64));
+                    AVectorHelper.EmitCall(Context, Signed
+                        ? nameof(AVectorHelper.SatF64ToS64)
+                        : nameof(AVectorHelper.SatF64ToU64));
                }

                if (SizeF == 0)
@ -420,22 +420,22 @@ namespace ChocolArm64.Instruction
            {
                if (Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS32));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS32));
                }
                else /* if (Size == 1) */
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS32));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS32));
                }
            }
            else
            {
                if (Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS64));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS64));
                }
                else /* if (Size == 1) */
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS64));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS64));
                }
            }
        }
@ -453,22 +453,22 @@ namespace ChocolArm64.Instruction
            {
                if (Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU32));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU32));
                }
                else /* if (Size == 1) */
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU32));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU32));
                }
            }
            else
            {
                if (Size == 0)
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU64));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU64));
                }
                else /* if (Size == 1) */
                {
-                    ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU64));
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU64));
                }
            }
        }
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@ -3,6 +3,8 @@ using ChocolArm64.State;
 using ChocolArm64.Translation;
 using System;
 using System.Reflection;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;

 namespace ChocolArm64.Instruction
 {
@ -32,6 +34,129 @@ namespace ChocolArm64.Instruction
            return (8 << (Op.Size + 1)) - Op.Imm;
        }

+        public static void EmitSse2Call(AILEmitterCtx Context, string Name)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            void Ldvec(int Reg)
+            {
+                Context.EmitLdvec(Reg);
+
+                switch (Op.Size)
+                {
+                    case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break;
+                    case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break;
+                    case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break;
+                    case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break;
+                }
+            }
+
+            Ldvec(Op.Rn);
+
+            Type BaseType = null;
+
+            Type[] Types;
+
+            switch (Op.Size)
+            {
+                case 0: BaseType = typeof(Vector128<sbyte>); break;
+                case 1: BaseType = typeof(Vector128<short>); break;
+                case 2: BaseType = typeof(Vector128<int>);   break;
+                case 3: BaseType = typeof(Vector128<long>);  break;
+            }
+
+            if (Op is AOpCodeSimdReg BinOp)
+            {
+                Ldvec(BinOp.Rm);
+
+                Types = new Type[] { BaseType, BaseType };
+            }
+            else
+            {
+                Types = new Type[] { BaseType };
+            }
+
+            Context.EmitCall(typeof(Sse2).GetMethod(Name, Types));
+
+            switch (Op.Size)
+            {
+                case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break;
+                case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16ToSingle)); break;
+                case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32ToSingle)); break;
+                case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt64ToSingle)); break;
+            }
+
+            Context.EmitStvec(Op.Rd);
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
+        public static void EmitSse2CallF(AILEmitterCtx Context, string Name)
+        {
+            AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
+
+            int SizeF = Op.Size & 1;
+
+            void Ldvec(int Reg)
+            {
+                Context.EmitLdvec(Reg);
+
+                if (SizeF == 1)
+                {
+                    AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble));
+                }
+            }
+
+            Ldvec(Op.Rn);
+
+            Type BaseType = SizeF == 0
+                ? typeof(Vector128<float>)
+                : typeof(Vector128<double>);
+
+            Type[] Types;
+
+            if (Op is AOpCodeSimdReg BinOp)
+            {
+                Ldvec(BinOp.Rm);
+
+                Types = new Type[] { BaseType, BaseType };
+            }
+            else
+            {
+                Types = new Type[] { BaseType };
+            }
+
+            MethodInfo MthdInfo;
+
+            if (SizeF == 0)
+            {
+                MthdInfo = typeof(Sse).GetMethod(Name, Types);
+            }
+            else /* if (SizeF == 1) */
+            {
+                MthdInfo = typeof(Sse2).GetMethod(Name, Types);
+            }
+
+            Context.EmitCall(MthdInfo);
+
+            if (SizeF == 1)
+            {
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));
+            }
+
+            Context.EmitStvec(Op.Rd);
+
+            if (Op.RegisterSize == ARegisterSize.SIMD64)
+            {
+                EmitVectorZeroUpper(Context, Op.Rd);
+            }
+        }
+
        public static void EmitUnaryMathCall(AILEmitterCtx Context, string Name)
        {
            IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
@ -596,9 +721,9 @@ namespace ChocolArm64.Instruction
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

-            ASoftFallback.EmitCall(Context, Signed
-                ? nameof(ASoftFallback.VectorExtractIntSx)
-                : nameof(ASoftFallback.VectorExtractIntZx));
+            AVectorHelper.EmitCall(Context, Signed
+                ? nameof(AVectorHelper.VectorExtractIntSx)
+                : nameof(AVectorHelper.VectorExtractIntZx));
        }

        public static void EmitVectorExtractF(AILEmitterCtx Context, int Reg, int Index, int Size)
@ -610,11 +735,11 @@ namespace ChocolArm64.Instruction

            if (Size == 0)
            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractSingle));
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractSingle));
            }
            else if (Size == 1)
            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractDouble));
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractDouble));
            }
            else
            {
@ -646,7 +771,7 @@ namespace ChocolArm64.Instruction
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

-            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
+            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));

            Context.EmitStvec(Reg);
        }
@ -659,7 +784,7 @@ namespace ChocolArm64.Instruction
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

-            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
+            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));

            Context.EmitStvectmp();
        }
@ -673,7 +798,7 @@ namespace ChocolArm64.Instruction
            Context.EmitLdc_I4(Index);
            Context.EmitLdc_I4(Size);

-            ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt));
+            AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt));

            Context.EmitStvec(Reg);
        }
@ -687,11 +812,11 @@ namespace ChocolArm64.Instruction

            if (Size == 0)
            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
            }
            else if (Size == 1)
            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
            }
            else
            {
@ -710,11 +835,11 @@ namespace ChocolArm64.Instruction

            if (Size == 0)
            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle));
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle));
            }
            else if (Size == 1)
            {
-                ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble));
+                AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble));
            }
            else
            {
--- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
@ -2,6 +2,7 @@ using ChocolArm64.Decoder;
 using ChocolArm64.State;
 using ChocolArm64.Translation;
 using System.Reflection.Emit;
+using System.Runtime.Intrinsics.X86;

 using static ChocolArm64.Instruction.AInstEmitSimdHelper;

@ -11,7 +12,14 @@ namespace ChocolArm64.Instruction
    {
        public static void And_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2Call(Context, nameof(Sse2.And));
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And));
+            }
        }

        public static void Bic_V(AILEmitterCtx Context)
@ -95,7 +103,14 @@ namespace ChocolArm64.Instruction

        public static void Eor_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2Call(Context, nameof(Sse2.Xor));
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor));
+            }
        }

        public static void Not_V(AILEmitterCtx Context)
@ -114,7 +129,14 @@ namespace ChocolArm64.Instruction

        public static void Orr_V(AILEmitterCtx Context)
        {
-            EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or));
+            if (AOptimizations.UseSse2)
+            {
+                EmitSse2Call(Context, nameof(Sse2.Or));
+            }
+            else
+            {
+                EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or));
+            }
        }

        public static void Orr_Vi(AILEmitterCtx Context)
--- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
@ -234,21 +234,21 @@ namespace ChocolArm64.Instruction

            switch (Op.Size)
            {
-                case 1: ASoftFallback.EmitCall(Context,
-                    nameof(ASoftFallback.Tbl1_V64),
-                    nameof(ASoftFallback.Tbl1_V128)); break;
+                case 1: AVectorHelper.EmitCall(Context,
+                    nameof(AVectorHelper.Tbl1_V64),
+                    nameof(AVectorHelper.Tbl1_V128)); break;

-                case 2: ASoftFallback.EmitCall(Context,
-                    nameof(ASoftFallback.Tbl2_V64),
-                    nameof(ASoftFallback.Tbl2_V128)); break;
+                case 2: AVectorHelper.EmitCall(Context,
+                    nameof(AVectorHelper.Tbl2_V64),
+                    nameof(AVectorHelper.Tbl2_V128)); break;

-                case 3: ASoftFallback.EmitCall(Context,
-                    nameof(ASoftFallback.Tbl3_V64),
-                    nameof(ASoftFallback.Tbl3_V128)); break;
+                case 3: AVectorHelper.EmitCall(Context,
+                    nameof(AVectorHelper.Tbl3_V64),
+                    nameof(AVectorHelper.Tbl3_V128)); break;

-                case 4: ASoftFallback.EmitCall(Context,
-                    nameof(ASoftFallback.Tbl4_V64),
-                    nameof(ASoftFallback.Tbl4_V128)); break;
+                case 4: AVectorHelper.EmitCall(Context,
+                    nameof(AVectorHelper.Tbl4_V64),
+                    nameof(AVectorHelper.Tbl4_V128)); break;

                default: throw new InvalidOperationException();
            }
--- a/ChocolArm64/Instruction/ASoftFallback.cs
+++ b/ChocolArm64/Instruction/ASoftFallback.cs
@ -1,20 +1,11 @@
-using ChocolArm64.State;
 using ChocolArm64.Translation;
 using System;
 using System.Numerics;
-using System.Runtime.CompilerServices;

 namespace ChocolArm64.Instruction
 {
    static class ASoftFallback
    {
-        public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
-        {
-            bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
-
-            Context.EmitCall(typeof(ASoftFallback), IsSimd64 ? Name64 : Name128);
-        }
-
        public static void EmitCall(AILEmitterCtx Context, string MthdName)
        {
            Context.EmitCall(typeof(ASoftFallback), MthdName);
@ -160,78 +151,6 @@ namespace ChocolArm64.Instruction
            throw new ArgumentException(nameof(Size));
        }

-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static int SatF32ToS32(float Value)
-        {
-            if (float.IsNaN(Value)) return 0;
-
-            return Value > int.MaxValue ? int.MaxValue :
-                   Value < int.MinValue ? int.MinValue : (int)Value;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static long SatF32ToS64(float Value)
-        {
-            if (float.IsNaN(Value)) return 0;
-
-            return Value > long.MaxValue ? long.MaxValue :
-                   Value < long.MinValue ? long.MinValue : (long)Value;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static uint SatF32ToU32(float Value)
-        {
-            if (float.IsNaN(Value)) return 0;
-
-            return Value > uint.MaxValue ? uint.MaxValue :
-                   Value < uint.MinValue ? uint.MinValue : (uint)Value;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static ulong SatF32ToU64(float Value)
-        {
-            if (float.IsNaN(Value)) return 0;
-
-            return Value > ulong.MaxValue ? ulong.MaxValue :
-                   Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static int SatF64ToS32(double Value)
-        {
-            if (double.IsNaN(Value)) return 0;
-
-            return Value > int.MaxValue ? int.MaxValue :
-                   Value < int.MinValue ? int.MinValue : (int)Value;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static long SatF64ToS64(double Value)
-        {
-            if (double.IsNaN(Value)) return 0;
-
-            return Value > long.MaxValue ? long.MaxValue :
-                   Value < long.MinValue ? long.MinValue : (long)Value;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static uint SatF64ToU32(double Value)
-        {
-            if (double.IsNaN(Value)) return 0;
-
-            return Value > uint.MaxValue ? uint.MaxValue :
-                   Value < uint.MinValue ? uint.MinValue : (uint)Value;
-        }
-
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static ulong SatF64ToU64(double Value)
-        {
-            if (double.IsNaN(Value)) return 0;
-
-            return Value > ulong.MaxValue ? ulong.MaxValue :
-                   Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
-        }
-
        public static long SMulHi128(long LHS, long RHS)
        {
            return (long)(BigInteger.Multiply(LHS, RHS) >> 64);
@ -241,239 +160,5 @@ namespace ChocolArm64.Instruction
        {
            return (ulong)(BigInteger.Multiply(LHS, RHS) >> 64);
        }
-
-        public static int CountSetBits8(byte Value)
-        {
-            return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
-                   ((Value >> 2) & 1) + ((Value >> 3) & 1) +
-                   ((Value >> 4) & 1) + ((Value >> 5) & 1) +
-                   ((Value >> 6) & 1) +  (Value >> 7);
-        }
-
-        public static float MaxF(float val1, float val2)
-        {
-            if (val1 == 0.0 && val2 == 0.0)
-            {
-                if (BitConverter.SingleToInt32Bits(val1) < 0 && BitConverter.SingleToInt32Bits(val2) < 0)
-                    return -0.0f;
-                
-                return 0.0f;
-            }
-
-            if (val1 > val2) 
-                return val1; 
-     
-            if (float.IsNaN(val1)) 
-                return val1;
-     
-            return val2;
-        }
-
-        public static double Max(double val1, double val2)
-        {
-            if (val1 == 0.0 && val2 == 0.0)
-            {
-                if (BitConverter.DoubleToInt64Bits(val1) < 0 && BitConverter.DoubleToInt64Bits(val2) < 0)
-                    return -0.0;
-
-                return 0.0;
-            }
-
-            if (val1 > val2) 
-                return val1; 
-     
-            if (double.IsNaN(val1)) 
-                return val1;
-     
-            return val2;
-        }
-
-        public static float MinF(float val1, float val2)
-        {
-            if (val1 == 0.0 && val2 == 0.0)
-            {
-                if (BitConverter.SingleToInt32Bits(val1) < 0 || BitConverter.SingleToInt32Bits(val2) < 0)
-                    return -0.0f;
-                
-                return 0.0f;
-            }
-
-            if (val1 < val2) 
-                return val1; 
-     
-            if (float.IsNaN(val1)) 
-                return val1;
-     
-            return val2;
-        }
-
-        public static double Min(double val1, double val2)
-        {
-            if (val1 == 0.0 && val2 == 0.0)
-            {
-                if (BitConverter.DoubleToInt64Bits(val1) < 0 || BitConverter.DoubleToInt64Bits(val2) < 0)
-                    return -0.0;
-                
-                return 0.0;
-            }
-
-            if (val1 < val2) 
-                return val1; 
-     
-            if (double.IsNaN(val1)) 
-                return val1;
-     
-            return val2;
-        }
-
-        public static float RoundF(float Value, int Fpcr)
-        {
-            switch ((ARoundMode)((Fpcr >> 22) & 3))
-            {
-                case ARoundMode.ToNearest:            return MathF.Round   (Value);
-                case ARoundMode.TowardsPlusInfinity:  return MathF.Ceiling (Value);
-                case ARoundMode.TowardsMinusInfinity: return MathF.Floor   (Value);
-                case ARoundMode.TowardsZero:          return MathF.Truncate(Value);
-            }
-
-            throw new InvalidOperationException();
-        }
-
-        public static double Round(double Value, int Fpcr)
-        {
-            switch ((ARoundMode)((Fpcr >> 22) & 3))
-            {
-                case ARoundMode.ToNearest:            return Math.Round   (Value);
-                case ARoundMode.TowardsPlusInfinity:  return Math.Ceiling (Value);
-                case ARoundMode.TowardsMinusInfinity: return Math.Floor   (Value);
-                case ARoundMode.TowardsZero:          return Math.Truncate(Value);
-            }
-
-            throw new InvalidOperationException();
-        }
-
-        public static AVec Tbl1_V64(AVec Vector, AVec Tb0)
-        {
-            return Tbl(Vector, 8, Tb0);
-        }
-
-        public static AVec Tbl1_V128(AVec Vector, AVec Tb0)
-        {
-            return Tbl(Vector, 16, Tb0);
-        }
-
-        public static AVec Tbl2_V64(AVec Vector, AVec Tb0, AVec Tb1)
-        {
-            return Tbl(Vector, 8, Tb0, Tb1);
-        }
-
-        public static AVec Tbl2_V128(AVec Vector, AVec Tb0, AVec Tb1)
-        {
-            return Tbl(Vector, 16, Tb0, Tb1);
-        }
-
-        public static AVec Tbl3_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
-        {
-            return Tbl(Vector, 8, Tb0, Tb1, Tb2);
-        }
-
-        public static AVec Tbl3_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2)
-        {
-            return Tbl(Vector, 16, Tb0, Tb1, Tb2);
-        }
-
-        public static AVec Tbl4_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
-        {
-            return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
-        }
-
-        public static AVec Tbl4_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3)
-        {
-            return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
-        }
-
-        private static AVec Tbl(AVec Vector, int Bytes, params AVec[] Tb)
-        {
-            AVec Res = new AVec();
-
-            byte[] Table = new byte[Tb.Length * 16];
-
-            for (int Index  = 0; Index  < Tb.Length; Index++)
-            for (int Index2 = 0; Index2 < 16;        Index2++)
-            {
-                Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
-            }
-
-            for (int Index = 0; Index < Bytes; Index++)
-            {
-                byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
-
-                if (TblIdx < Table.Length)
-                {
-                    Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
-                }
-            }
-
-            return Res;
-        }
-
-        public static ulong VectorExtractIntZx(AVec Vector, int Index, int Size)
-        {
-            switch (Size)
-            {
-                case 0: return Vector.ExtractByte  (Index);
-                case 1: return Vector.ExtractUInt16(Index);
-                case 2: return Vector.ExtractUInt32(Index);
-                case 3: return Vector.ExtractUInt64(Index);
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Size));
-        }
-
-        public static long VectorExtractIntSx(AVec Vector, int Index, int Size)
-        {
-            switch (Size)
-            {
-                case 0: return (sbyte)Vector.ExtractByte  (Index);
-                case 1: return (short)Vector.ExtractUInt16(Index);
-                case 2: return   (int)Vector.ExtractUInt32(Index);
-                case 3: return  (long)Vector.ExtractUInt64(Index);
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Size));
-        }
-
-        public static float VectorExtractSingle(AVec Vector, int Index)
-        {
-            return Vector.ExtractSingle(Index);
-        }
-
-        public static double VectorExtractDouble(AVec Vector, int Index)
-        {
-            return Vector.ExtractDouble(Index);
-        }
-
-        public static AVec VectorInsertSingle(float Value, AVec Vector, int Index)
-        {
-            return AVec.InsertSingle(Vector, Index, Value);
-        }
-
-        public static AVec VectorInsertDouble(double Value, AVec Vector, int Index)
-        {
-            return AVec.InsertDouble(Vector, Index, Value);
-        }
-
-        public static AVec VectorInsertInt(ulong Value, AVec Vector, int Index, int Size)
-        {
-            switch (Size)
-            {
-                case 0: return AVec.InsertByte  (Vector, Index,   (byte)Value);
-                case 1: return AVec.InsertUInt16(Vector, Index, (ushort)Value);
-                case 2: return AVec.InsertUInt32(Vector, Index,   (uint)Value);
-                case 3: return AVec.InsertUInt64(Vector, Index,  (ulong)Value);
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Size));
-        }
    }
 }
--- a/ChocolArm64/Instruction/AVectorHelper.cs
+++ b/ChocolArm64/Instruction/AVectorHelper.cs
@ -0,0 +1,626 @@
+using ChocolArm64.State;
+using ChocolArm64.Translation;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace ChocolArm64.Instruction
+{
+    static class AVectorHelper
+    {
+        public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
+        {
+            bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
+
+            Context.EmitCall(typeof(AVectorHelper), IsSimd64 ? Name64 : Name128);
+        }
+
+        public static void EmitCall(AILEmitterCtx Context, string MthdName)
+        {
+            Context.EmitCall(typeof(AVectorHelper), MthdName);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int SatF32ToS32(float Value)
+        {
+            if (float.IsNaN(Value)) return 0;
+
+            return Value > int.MaxValue ? int.MaxValue :
+                   Value < int.MinValue ? int.MinValue : (int)Value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long SatF32ToS64(float Value)
+        {
+            if (float.IsNaN(Value)) return 0;
+
+            return Value > long.MaxValue ? long.MaxValue :
+                   Value < long.MinValue ? long.MinValue : (long)Value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint SatF32ToU32(float Value)
+        {
+            if (float.IsNaN(Value)) return 0;
+
+            return Value > uint.MaxValue ? uint.MaxValue :
+                   Value < uint.MinValue ? uint.MinValue : (uint)Value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ulong SatF32ToU64(float Value)
+        {
+            if (float.IsNaN(Value)) return 0;
+
+            return Value > ulong.MaxValue ? ulong.MaxValue :
+                   Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static int SatF64ToS32(double Value)
+        {
+            if (double.IsNaN(Value)) return 0;
+
+            return Value > int.MaxValue ? int.MaxValue :
+                   Value < int.MinValue ? int.MinValue : (int)Value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long SatF64ToS64(double Value)
+        {
+            if (double.IsNaN(Value)) return 0;
+
+            return Value > long.MaxValue ? long.MaxValue :
+                   Value < long.MinValue ? long.MinValue : (long)Value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint SatF64ToU32(double Value)
+        {
+            if (double.IsNaN(Value)) return 0;
+
+            return Value > uint.MaxValue ? uint.MaxValue :
+                   Value < uint.MinValue ? uint.MinValue : (uint)Value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ulong SatF64ToU64(double Value)
+        {
+            if (double.IsNaN(Value)) return 0;
+
+            return Value > ulong.MaxValue ? ulong.MaxValue :
+                   Value < ulong.MinValue ? ulong.MinValue : (ulong)Value;
+        }
+
+        public static int CountSetBits8(byte Value)
+        {
+            return ((Value >> 0) & 1) + ((Value >> 1) & 1) +
+                   ((Value >> 2) & 1) + ((Value >> 3) & 1) +
+                   ((Value >> 4) & 1) + ((Value >> 5) & 1) +
+                   ((Value >> 6) & 1) +  (Value >> 7);
+        }
+
+        public static double Max(double LHS, double RHS)
+        {
+            if (LHS == 0.0 && RHS == 0.0)
+            {
+                if (BitConverter.DoubleToInt64Bits(LHS) < 0 &&
+                    BitConverter.DoubleToInt64Bits(RHS) < 0)
+                    return -0.0;
+
+                return 0.0;
+            }
+
+            if (LHS > RHS)
+                return LHS;
+
+            if (double.IsNaN(LHS))
+                return LHS;
+
+            return RHS;
+        }
+
+        public static float MaxF(float LHS, float RHS)
+        {
+            if (LHS == 0.0 && RHS == 0.0)
+            {
+                if (BitConverter.SingleToInt32Bits(LHS) < 0 &&
+                    BitConverter.SingleToInt32Bits(RHS) < 0)
+                    return -0.0f;
+
+                return 0.0f;
+            }
+
+            if (LHS > RHS)
+                return LHS;
+
+            if (float.IsNaN(LHS))
+                return LHS;
+
+            return RHS;
+        }
+
+        public static double Min(double LHS, double RHS)
+        {
+            if (LHS == 0.0 && RHS == 0.0)
+            {
+                if (BitConverter.DoubleToInt64Bits(LHS) < 0 ||
+                    BitConverter.DoubleToInt64Bits(RHS) < 0)
+                    return -0.0;
+
+                return 0.0;
+            }
+
+            if (LHS < RHS)
+                return LHS;
+
+            if (double.IsNaN(LHS))
+                return LHS;
+
+            return RHS;
+        }
+
+        public static float MinF(float LHS, float RHS)
+        {
+            if (LHS == 0.0 && RHS == 0.0)
+            {
+                if (BitConverter.SingleToInt32Bits(LHS) < 0 ||
+                    BitConverter.SingleToInt32Bits(RHS) < 0)
+                    return -0.0f;
+
+                return 0.0f;
+            }
+
+            if (LHS < RHS)
+                return LHS;
+
+            if (float.IsNaN(LHS))
+                return LHS;
+
+            return RHS;
+        }
+
+        public static double Round(double Value, int Fpcr)
+        {
+            switch ((ARoundMode)((Fpcr >> 22) & 3))
+            {
+                case ARoundMode.ToNearest:            return Math.Round   (Value);
+                case ARoundMode.TowardsPlusInfinity:  return Math.Ceiling (Value);
+                case ARoundMode.TowardsMinusInfinity: return Math.Floor   (Value);
+                case ARoundMode.TowardsZero:          return Math.Truncate(Value);
+            }
+
+            throw new InvalidOperationException();
+        }
+
+        public static float RoundF(float Value, int Fpcr)
+        {
+            switch ((ARoundMode)((Fpcr >> 22) & 3))
+            {
+                case ARoundMode.ToNearest:            return MathF.Round   (Value);
+                case ARoundMode.TowardsPlusInfinity:  return MathF.Ceiling (Value);
+                case ARoundMode.TowardsMinusInfinity: return MathF.Floor   (Value);
+                case ARoundMode.TowardsZero:          return MathF.Truncate(Value);
+            }
+
+            throw new InvalidOperationException();
+        }
+
+        public static Vector128<float> Tbl1_V64(
+            Vector128<float> Vector,
+            Vector128<float> Tb0)
+        {
+            return Tbl(Vector, 8, Tb0);
+        }
+
+        public static Vector128<float> Tbl1_V128(
+            Vector128<float> Vector,
+            Vector128<float> Tb0)
+        {
+            return Tbl(Vector, 16, Tb0);
+        }
+
+        public static Vector128<float> Tbl2_V64(
+            Vector128<float> Vector,
+            Vector128<float> Tb0,
+            Vector128<float> Tb1)
+        {
+            return Tbl(Vector, 8, Tb0, Tb1);
+        }
+
+        public static Vector128<float> Tbl2_V128(
+            Vector128<float> Vector,
+            Vector128<float> Tb0,
+            Vector128<float> Tb1)
+        {
+            return Tbl(Vector, 16, Tb0, Tb1);
+        }
+
+        public static Vector128<float> Tbl3_V64(
+            Vector128<float> Vector,
+            Vector128<float> Tb0,
+            Vector128<float> Tb1,
+            Vector128<float> Tb2)
+        {
+            return Tbl(Vector, 8, Tb0, Tb1, Tb2);
+        }
+
+        public static Vector128<float> Tbl3_V128(
+            Vector128<float> Vector,
+            Vector128<float> Tb0,
+            Vector128<float> Tb1,
+            Vector128<float> Tb2)
+        {
+            return Tbl(Vector, 16, Tb0, Tb1, Tb2);
+        }
+
+        public static Vector128<float> Tbl4_V64(
+            Vector128<float> Vector,
+            Vector128<float> Tb0,
+            Vector128<float> Tb1,
+            Vector128<float> Tb2,
+            Vector128<float> Tb3)
+        {
+            return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3);
+        }
+
+        public static Vector128<float> Tbl4_V128(
+            Vector128<float> Vector,
+            Vector128<float> Tb0,
+            Vector128<float> Tb1,
+            Vector128<float> Tb2,
+            Vector128<float> Tb3)
+        {
+            return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3);
+        }
+
+        private static Vector128<float> Tbl(Vector128<float> Vector, int Bytes, params Vector128<float>[] Tb)
+        {
+            Vector128<float> Res = new Vector128<float>();
+
+            byte[] Table = new byte[Tb.Length * 16];
+
+            for (byte Index  = 0; Index  < Tb.Length; Index++)
+            for (byte Index2 = 0; Index2 < 16;        Index2++)
+            {
+                Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0);
+            }
+
+            for (byte Index = 0; Index < Bytes; Index++)
+            {
+                byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0);
+
+                if (TblIdx < Table.Length)
+                {
+                    Res = VectorInsertInt(Table[TblIdx], Res, Index, 0);
+                }
+            }
+
+            return Res;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static double VectorExtractDouble(Vector128<float> Vector, byte Index)
+        {
+            return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3));
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static long VectorExtractIntSx(Vector128<float> Vector, byte Index, int Size)
+        {
+            if (Sse41.IsSupported)
+            {
+                switch (Size)
+                {
+                    case 0:
+                        return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
+
+                    case 1:
+                        return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
+
+                    case 2:
+                        return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
+
+                    case 3:
+                        return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
+                }
+
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+            else if (Sse2.IsSupported)
+            {
+                switch (Size)
+                {
+                    case 0:
+                        return (sbyte)VectorExtractIntZx(Vector, Index, Size);
+
+                    case 1:
+                        return (short)VectorExtractIntZx(Vector, Index, Size);
+
+                    case 2:
+                        return (int)VectorExtractIntZx(Vector, Index, Size);
+
+                    case 3:
+                        return (long)VectorExtractIntZx(Vector, Index, Size);
+                }
+
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static ulong VectorExtractIntZx(Vector128<float> Vector, byte Index, int Size)
+        {
+            if (Sse41.IsSupported)
+            {
+                switch (Size)
+                {
+                    case 0:
+                        return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
+
+                    case 1:
+                        return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
+
+                    case 2:
+                        return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
+
+                    case 3:
+                        return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
+                }
+
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+            else if (Sse2.IsSupported)
+            {
+                int ShortIdx = Size == 0
+                    ? Index >> 1
+                    : Index << (Size - 1);
+
+                ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
+
+                switch (Size)
+                {
+                    case 0:
+                        return (byte)(Value >> (Index & 1) * 8);
+
+                    case 1:
+                        return Value;
+
+                    case 2:
+                    case 3:
+                    {
+                        ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
+
+                        if (Size == 2)
+                        {
+                            return (uint)(Value | (Value1 << 16));
+                        }
+
+                        ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
+                        ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
+
+                        return ((ulong)Value  <<  0) |
+                               ((ulong)Value1 << 16) |
+                               ((ulong)Value2 << 32) |
+                               ((ulong)Value3 << 48);
+                    }
+                }
+
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static float VectorExtractSingle(Vector128<float> Vector, byte Index)
+        {
+            if (Sse41.IsSupported)
+            {
+                return Sse41.Extract(Vector, Index);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorInsertDouble(double Value, Vector128<float> Vector, byte Index)
+        {
+            return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(Value), Vector, Index, 3);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorInsertInt(ulong Value, Vector128<float> Vector, byte Index, int Size)
+        {
+            if (Sse41.IsSupported)
+            {
+                switch (Size)
+                {
+                    case 0:
+                        return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
+
+                    case 1:
+                        return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+
+                    case 2:
+                        return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
+
+                    case 3:
+                        return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
+                }
+
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+            else if (Sse2.IsSupported)
+            {
+                Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector);
+
+                int ShortIdx = Size == 0
+                    ? Index >> 1
+                    : Index << (Size - 1);
+
+                switch (Size)
+                {
+                    case 0:
+                    {
+                        ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
+
+                        int Shift = (Index & 1) * 8;
+
+                        ShortVal &= (ushort)(0xff00 >> Shift);
+
+                        ShortVal |= (ushort)((byte)Value << Shift);
+
+                        return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
+                    }
+
+                    case 1:
+                        return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+
+                    case 2:
+                    case 3:
+                    {
+                        ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >>  0), (byte)(ShortIdx + 0));
+                        ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
+
+                        if (Size == 3)
+                        {
+                            ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
+                            ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
+                        }
+
+                        return Sse.StaticCast<ushort, float>(ShortVector);
+                    }
+                }
+
+                throw new ArgumentOutOfRangeException(nameof(Size));
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorInsertSingle(float Value, Vector128<float> Vector, byte Index)
+        {
+            if (Sse41.IsSupported)
+            {
+                return Sse41.Insert(Vector, Value, (byte)(Index << 4));
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<float, sbyte>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<short> VectorSingleToInt16(Vector128<float> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<float, short>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<int> VectorSingleToInt32(Vector128<float> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<float, int>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<long> VectorSingleToInt64(Vector128<float> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<float, long>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<float, double>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<sbyte, float>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorInt16ToSingle(Vector128<short> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<short, float>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorInt32ToSingle(Vector128<int> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<int, float>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorInt64ToSingle(Vector128<long> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<long, float>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.StaticCast<double, float>(Vector);
+            }
+
+            throw new PlatformNotSupportedException();
+        }
+    }
+}
--- a/ChocolArm64/Memory/AMemory.cs
+++ b/ChocolArm64/Memory/AMemory.cs
@ -3,6 +3,8 @@ using ChocolArm64.State;
 using System;
 using System.Collections.Generic;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;

 namespace ChocolArm64.Memory
 {
@ -189,33 +191,73 @@ namespace ChocolArm64.Memory
            return ReadUInt64Unchecked(Position);
        }

-        public AVec ReadVector8(long Position)
+        public Vector128<float> ReadVector8(long Position)
        {
-            return new AVec() { B0 = ReadByte(Position) };
-        }
-
-        public AVec ReadVector16(long Position)
-        {
-            return new AVec() { H0 = ReadUInt16(Position) };
-        }
-
-        public AVec ReadVector32(long Position)
-        {
-            return new AVec() { W0 = ReadUInt32(Position) };
-        }
-
-        public AVec ReadVector64(long Position)
-        {
-            return new AVec() { X0 = ReadUInt64(Position) };
-        }
-
-        public AVec ReadVector128(long Position)
-        {
-            return new AVec()
+            if (Sse2.IsSupported)
            {
-                X0 = ReadUInt64(Position + 0),
-                X1 = ReadUInt64(Position + 8)
-            };
+                return Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position)));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector16(long Position)
+        {
+            if (Sse2.IsSupported)
+            {
+                return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse2.SetZeroVector128<ushort>(), ReadUInt16(Position), 0));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector32(long Position)
+        {
+            EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
+            EnsureAccessIsValid(Position + 3, AMemoryPerm.Read);
+
+            if (Sse.IsSupported)
+            {
+                return Sse.LoadScalarVector128((float*)(RamPtr + (uint)Position));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector64(long Position)
+        {
+            EnsureAccessIsValid(Position + 0, AMemoryPerm.Read);
+            EnsureAccessIsValid(Position + 7, AMemoryPerm.Read);
+
+            if (Sse2.IsSupported)
+            {
+                return Sse.StaticCast<double, float>(Sse2.LoadScalarVector128((double*)(RamPtr + (uint)Position)));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector128(long Position)
+        {
+            EnsureAccessIsValid(Position + 0,  AMemoryPerm.Read);
+            EnsureAccessIsValid(Position + 15, AMemoryPerm.Read);
+
+            if (Sse.IsSupported)
+            {
+                return Sse.LoadVector128((float*)(RamPtr + (uint)Position));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

        public sbyte ReadSByteUnchecked(long Position)
@ -258,33 +300,64 @@ namespace ChocolArm64.Memory
            return *((ulong*)(RamPtr + (uint)Position));
        }

-        public AVec ReadVector8Unchecked(long Position)
+        public Vector128<float> ReadVector8Unchecked(long Position)
        {
-            return new AVec() { B0 = ReadByteUnchecked(Position) };
-        }
-
-        public AVec ReadVector16Unchecked(long Position)
-        {
-            return new AVec() { H0 = ReadUInt16Unchecked(Position) };
-        }
-
-        public AVec ReadVector32Unchecked(long Position)
-        {
-            return new AVec() { W0 = ReadUInt32Unchecked(Position) };
-        }
-
-        public AVec ReadVector64Unchecked(long Position)
-        {
-            return new AVec() { X0 = ReadUInt64Unchecked(Position) };
-        }
-
-        public AVec ReadVector128Unchecked(long Position)
-        {
-            return new AVec()
+            if (Sse2.IsSupported)
            {
-                X0 = ReadUInt64Unchecked(Position + 0),
-                X1 = ReadUInt64Unchecked(Position + 8)
-            };
+                return Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ReadByte(Position)));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector16Unchecked(long Position)
+        {
+            if (Sse2.IsSupported)
+            {
+                return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse2.SetZeroVector128<ushort>(), ReadUInt16Unchecked(Position), 0));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector32Unchecked(long Position)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.LoadScalarVector128((float*)(RamPtr + (uint)Position));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector64Unchecked(long Position)
+        {
+            if (Sse2.IsSupported)
+            {
+                return Sse.StaticCast<double, float>(Sse2.LoadScalarVector128((double*)(RamPtr + (uint)Position)));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
+        }
+
+        public Vector128<float> ReadVector128Unchecked(long Position)
+        {
+            if (Sse.IsSupported)
+            {
+                return Sse.LoadVector128((float*)(RamPtr + (uint)Position));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

        public void WriteSByte(long Position, sbyte Value)
@ -338,30 +411,77 @@ namespace ChocolArm64.Memory
            WriteUInt64Unchecked(Position, Value);
        }

-        public void WriteVector8(long Position, AVec Value)
+        public void WriteVector8(long Position, Vector128<float> Value)
        {
-            WriteByte(Position, Value.B0);
+            if (Sse41.IsSupported)
+            {
+                WriteByte(Position, Sse41.Extract(Sse.StaticCast<float, byte>(Value), 0));
+            }
+            else if (Sse2.IsSupported)
+            {
+                WriteByte(Position, (byte)Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector16(long Position, AVec Value)
+        public void WriteVector16(long Position, Vector128<float> Value)
        {
-            WriteUInt16(Position, Value.H0);
+            if (Sse2.IsSupported)
+            {
+                WriteUInt16(Position, Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector32(long Position, AVec Value)
+        public void WriteVector32(long Position, Vector128<float> Value)
        {
-            WriteUInt32(Position, Value.W0);
+            EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
+            EnsureAccessIsValid(Position + 3, AMemoryPerm.Write);
+
+            if (Sse.IsSupported)
+            {
+                Sse.StoreScalar((float*)(RamPtr + (uint)Position), Value);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector64(long Position, AVec Value)
+        public void WriteVector64(long Position, Vector128<float> Value)
        {
-            WriteUInt64(Position, Value.X0);
+            EnsureAccessIsValid(Position + 0, AMemoryPerm.Write);
+            EnsureAccessIsValid(Position + 7, AMemoryPerm.Write);
+
+            if (Sse2.IsSupported)
+            {
+                Sse2.StoreScalar((double*)(RamPtr + (uint)Position), Sse.StaticCast<float, double>(Value));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector128(long Position, AVec Value)
+        public void WriteVector128(long Position, Vector128<float> Value)
        {
-            WriteUInt64(Position + 0, Value.X0);
-            WriteUInt64(Position + 8, Value.X1);
+            EnsureAccessIsValid(Position + 0,  AMemoryPerm.Write);
+            EnsureAccessIsValid(Position + 15, AMemoryPerm.Write);
+
+            if (Sse.IsSupported)
+            {
+                Sse.Store((float*)(RamPtr + (uint)Position), Value);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

        public void WriteSByteUnchecked(long Position, sbyte Value)
@ -404,30 +524,68 @@ namespace ChocolArm64.Memory
            *((ulong*)(RamPtr + (uint)Position)) = Value;
        }

-        public void WriteVector8Unchecked(long Position, AVec Value)
+        public void WriteVector8Unchecked(long Position, Vector128<float> Value)
        {
-            WriteByteUnchecked(Position, Value.B0);
+            if (Sse41.IsSupported)
+            {
+                WriteByteUnchecked(Position, Sse41.Extract(Sse.StaticCast<float, byte>(Value), 0));
+            }
+            else if (Sse2.IsSupported)
+            {
+                WriteByteUnchecked(Position, (byte)Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector16Unchecked(long Position, AVec Value)
+        public void WriteVector16Unchecked(long Position, Vector128<float> Value)
        {
-            WriteUInt16Unchecked(Position, Value.H0);
+            if (Sse2.IsSupported)
+            {
+                WriteUInt16Unchecked(Position, Sse2.Extract(Sse.StaticCast<float, ushort>(Value), 0));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector32Unchecked(long Position, AVec Value)
+        public void WriteVector32Unchecked(long Position, Vector128<float> Value)
        {
-            WriteUInt32Unchecked(Position, Value.W0);
+            if (Sse.IsSupported)
+            {
+                Sse.StoreScalar((float*)(RamPtr + (uint)Position), Value);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector64Unchecked(long Position, AVec Value)
+        public void WriteVector64Unchecked(long Position, Vector128<float> Value)
        {
-            WriteUInt64Unchecked(Position, Value.X0);
+            if (Sse2.IsSupported)
+            {
+                Sse2.StoreScalar((double*)(RamPtr + (uint)Position), Sse.StaticCast<float, double>(Value));
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

-        public void WriteVector128Unchecked(long Position, AVec Value)
+        public void WriteVector128Unchecked(long Position, Vector128<float> Value)
        {
-            WriteUInt64Unchecked(Position + 0, Value.X0);
-            WriteUInt64Unchecked(Position + 8, Value.X1);
+            if (Sse.IsSupported)
+            {
+                Sse.Store((float*)(RamPtr + (uint)Position), Value);
+            }
+            else
+            {
+                throw new PlatformNotSupportedException();
+            }
        }

        private void EnsureAccessIsValid(long Position, AMemoryPerm Perm)
--- a/ChocolArm64/State/AThreadState.cs
+++ b/ChocolArm64/State/AThreadState.cs
@ -2,6 +2,7 @@ using ChocolArm64.Events;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.Runtime.Intrinsics;

 namespace ChocolArm64.State
 {
@ -18,10 +19,10 @@ namespace ChocolArm64.State
                     X16, X17, X18, X19, X20, X21, X22, X23,
                     X24, X25, X26, X27, X28, X29, X30, X31;

-        public AVec V0,  V1,  V2,  V3,  V4,  V5,  V6,  V7,
-                    V8,  V9,  V10, V11, V12, V13, V14, V15,
-                    V16, V17, V18, V19, V20, V21, V22, V23,
-                    V24, V25, V26, V27, V28, V29, V30, V31;
+        public Vector128<float> V0,  V1,  V2,  V3,  V4,  V5,  V6,  V7,
+                                V8,  V9,  V10, V11, V12, V13, V14, V15,
+                                V16, V17, V18, V19, V20, V21, V22, V23,
+                                V24, V25, V26, V27, V28, V29, V30, V31;

        public bool Overflow;
        public bool Carry;
--- a/ChocolArm64/State/AVec.cs
+++ b/ChocolArm64/State/AVec.cs
@ -1,243 +0,0 @@
-using System;
-using System.Runtime.InteropServices;
-
-namespace ChocolArm64.State
-{
-    [StructLayout(LayoutKind.Explicit, Size = 16)]
-    public struct AVec
-    {
-        [FieldOffset(0x0)] public byte B0;
-        [FieldOffset(0x1)] public byte B1;
-        [FieldOffset(0x2)] public byte B2;
-        [FieldOffset(0x3)] public byte B3;
-        [FieldOffset(0x4)] public byte B4;
-        [FieldOffset(0x5)] public byte B5;
-        [FieldOffset(0x6)] public byte B6;
-        [FieldOffset(0x7)] public byte B7;
-        [FieldOffset(0x8)] public byte B8;
-        [FieldOffset(0x9)] public byte B9;
-        [FieldOffset(0xa)] public byte B10;
-        [FieldOffset(0xb)] public byte B11;
-        [FieldOffset(0xc)] public byte B12;
-        [FieldOffset(0xd)] public byte B13;
-        [FieldOffset(0xe)] public byte B14;
-        [FieldOffset(0xf)] public byte B15;
-
-        [FieldOffset(0x0)] public ushort H0;
-        [FieldOffset(0x2)] public ushort H1;
-        [FieldOffset(0x4)] public ushort H2;
-        [FieldOffset(0x6)] public ushort H3;
-        [FieldOffset(0x8)] public ushort H4;
-        [FieldOffset(0xa)] public ushort H5;
-        [FieldOffset(0xc)] public ushort H6;
-        [FieldOffset(0xe)] public ushort H7;
-
-        [FieldOffset(0x0)] public uint W0;
-        [FieldOffset(0x4)] public uint W1;
-        [FieldOffset(0x8)] public uint W2;
-        [FieldOffset(0xc)] public uint W3;
-
-        [FieldOffset(0x0)] public float S0;
-        [FieldOffset(0x4)] public float S1;
-        [FieldOffset(0x8)] public float S2;
-        [FieldOffset(0xc)] public float S3;
-
-        [FieldOffset(0x0)] public ulong X0;
-        [FieldOffset(0x8)] public ulong X1;
-
-        [FieldOffset(0x0)] public double D0;
-        [FieldOffset(0x8)] public double D1;
-
-        public byte ExtractByte(int Index)
-        {
-            switch (Index)
-            {
-                case 0:  return B0;
-                case 1:  return B1;
-                case 2:  return B2;
-                case 3:  return B3;
-                case 4:  return B4;
-                case 5:  return B5;
-                case 6:  return B6;
-                case 7:  return B7;
-                case 8:  return B8;
-                case 9:  return B9;
-                case 10: return B10;
-                case 11: return B11;
-                case 12: return B12;
-                case 13: return B13;
-                case 14: return B14;
-                case 15: return B15;
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Index));
-        }
-
-        public ushort ExtractUInt16(int Index)
-        {
-            switch (Index)
-            {
-                case 0: return H0;
-                case 1: return H1;
-                case 2: return H2;
-                case 3: return H3;
-                case 4: return H4;
-                case 5: return H5;
-                case 6: return H6;
-                case 7: return H7;
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Index));
-        }
-
-        public uint ExtractUInt32(int Index)
-        {
-            switch (Index)
-            {
-                case 0: return W0;
-                case 1: return W1;
-                case 2: return W2;
-                case 3: return W3;
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Index));
-        }
-
-        public float ExtractSingle(int Index)
-        {
-            switch (Index)
-            {
-                case 0: return S0;
-                case 1: return S1;
-                case 2: return S2;
-                case 3: return S3;
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Index));
-        }
-
-        public ulong ExtractUInt64(int Index)
-        {
-            switch (Index)
-            {
-                case 0: return X0;
-                case 1: return X1;
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Index));
-        }
-
-        public double ExtractDouble(int Index)
-        {
-            switch (Index)
-            {
-                case 0: return D0;
-                case 1: return D1;
-            }
-
-            throw new ArgumentOutOfRangeException(nameof(Index));
-        }
-
-        public static AVec InsertByte(AVec Vec, int Index, byte Value)
-        {
-            switch (Index)
-            {
-                case 0:  Vec.B0  = Value; break;
-                case 1:  Vec.B1  = Value; break;
-                case 2:  Vec.B2  = Value; break;
-                case 3:  Vec.B3  = Value; break;
-                case 4:  Vec.B4  = Value; break;
-                case 5:  Vec.B5  = Value; break;
-                case 6:  Vec.B6  = Value; break;
-                case 7:  Vec.B7  = Value; break;
-                case 8:  Vec.B8  = Value; break;
-                case 9:  Vec.B9  = Value; break;
-                case 10: Vec.B10 = Value; break;
-                case 11: Vec.B11 = Value; break;
-                case 12: Vec.B12 = Value; break;
-                case 13: Vec.B13 = Value; break;
-                case 14: Vec.B14 = Value; break;
-                case 15: Vec.B15 = Value; break;
-
-                default: throw new ArgumentOutOfRangeException(nameof(Index));
-            }
-
-            return Vec;
-        }
-
-        public static AVec InsertUInt16(AVec Vec, int Index, ushort Value)
-        {
-            switch (Index)
-            {
-                case 0: Vec.H0 = Value; break;
-                case 1: Vec.H1 = Value; break;
-                case 2: Vec.H2 = Value; break;
-                case 3: Vec.H3 = Value; break;
-                case 4: Vec.H4 = Value; break;
-                case 5: Vec.H5 = Value; break;
-                case 6: Vec.H6 = Value; break;
-                case 7: Vec.H7 = Value; break;
-
-                default: throw new ArgumentOutOfRangeException(nameof(Index));
-            }
-
-            return Vec;
-        }
-
-        public static AVec InsertUInt32(AVec Vec, int Index, uint Value)
-        {
-            switch (Index)
-            {
-                case 0: Vec.W0 = Value; break;
-                case 1: Vec.W1 = Value; break;
-                case 2: Vec.W2 = Value; break;
-                case 3: Vec.W3 = Value; break;
-
-                default: throw new ArgumentOutOfRangeException(nameof(Index));
-            }
-
-            return Vec;
-        }
-
-        public static AVec InsertSingle(AVec Vec, int Index, float Value)
-        {
-            switch (Index)
-            {
-                case 0: Vec.S0 = Value; break;
-                case 1: Vec.S1 = Value; break;
-                case 2: Vec.S2 = Value; break;
-                case 3: Vec.S3 = Value; break;
-
-                default: throw new ArgumentOutOfRangeException(nameof(Index));
-            }
-
-            return Vec;
-        }
-
-        public static AVec InsertUInt64(AVec Vec, int Index, ulong Value)
-        {
-            switch (Index)
-            {
-                case 0: Vec.X0 = Value; break;
-                case 1: Vec.X1 = Value; break;
-
-                default: throw new ArgumentOutOfRangeException(nameof(Index));
-            }
-
-            return Vec;
-        }
-
-        public static AVec InsertDouble(AVec Vec, int Index, double Value)
-        {
-            switch (Index)
-            {
-                case 0: Vec.D0 = Value; break;
-                case 1: Vec.D1 = Value; break;
-
-                default: throw new ArgumentOutOfRangeException(nameof(Index));
-            }
-
-            return Vec;
-        }
-    }
-}
--- a/ChocolArm64/Translation/AILEmitter.cs
+++ b/ChocolArm64/Translation/AILEmitter.cs
@ -3,6 +3,7 @@ using ChocolArm64.State;
 using System;
 using System.Collections.Generic;
 using System.Reflection.Emit;
+using System.Runtime.Intrinsics;

 namespace ChocolArm64.Translation
 {
@ -157,7 +158,7 @@ namespace ChocolArm64.Translation
            {
                case ARegisterType.Flag:   return typeof(bool);
                case ARegisterType.Int:    return typeof(ulong);
-                case ARegisterType.Vector: return typeof(AVec);
+                case ARegisterType.Vector: return typeof(Vector128<float>);
            }

            throw new ArgumentException(nameof(RegType));
--- a/Ryujinx.Audio/Ryujinx.Audio.csproj
+++ b/Ryujinx.Audio/Ryujinx.Audio.csproj
@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
-    <TargetFramework>netcoreapp2.0</TargetFramework>
+    <TargetFramework>netcoreapp2.1</TargetFramework>
  </PropertyGroup>

  <ItemGroup>
--- a/Ryujinx.Core/Ryujinx.Core.csproj
+++ b/Ryujinx.Core/Ryujinx.Core.csproj
@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
-    <TargetFramework>netcoreapp2.0</TargetFramework>
+    <TargetFramework>netcoreapp2.1</TargetFramework>
  </PropertyGroup>

  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
--- a/Ryujinx.Graphics/Ryujinx.Graphics.csproj
+++ b/Ryujinx.Graphics/Ryujinx.Graphics.csproj
@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">

  <PropertyGroup>
-    <TargetFramework>netcoreapp2.0</TargetFramework>
+    <TargetFramework>netcoreapp2.1</TargetFramework>
  </PropertyGroup>

  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
--- a/Ryujinx.Tests/Cpu/CpuTest.cs
+++ b/Ryujinx.Tests/Cpu/CpuTest.cs
@ -4,7 +4,10 @@ using ChocolArm64.State;

 using NUnit.Framework;

+using System;
 using System.Threading;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;

 namespace Ryujinx.Tests.Cpu
 {
@ -54,7 +57,9 @@ namespace Ryujinx.Tests.Cpu
        }

        protected void SetThreadState(ulong X0 = 0, ulong X1 = 0, ulong X2 = 0, ulong X3 = 0, ulong X31 = 0,
-                                      AVec V0 = default(AVec), AVec V1 = default(AVec), AVec V2 = default(AVec),
+                                      Vector128<float> V0 = default(Vector128<float>),
+                                      Vector128<float> V1 = default(Vector128<float>),
+                                      Vector128<float> V2 = default(Vector128<float>),
                                      bool Overflow = false, bool Carry = false, bool Zero = false, bool Negative = false,
                                      int Fpcr = 0x0, int Fpsr = 0x0)
        {
@ -93,7 +98,9 @@ namespace Ryujinx.Tests.Cpu

        protected AThreadState SingleOpcode(uint Opcode,
                                            ulong X0 = 0, ulong X1 = 0, ulong X2 = 0, ulong X3 = 0, ulong X31 = 0,
-                                            AVec V0 = default(AVec), AVec V1 = default(AVec), AVec V2 = default(AVec),
+                                            Vector128<float> V0 = default(Vector128<float>),
+                                            Vector128<float> V1 = default(Vector128<float>),
+                                            Vector128<float> V2 = default(Vector128<float>),
                                            bool Overflow = false, bool Carry = false, bool Zero = false, bool Negative = false,
                                            int Fpcr = 0x0, int Fpsr = 0x0)
        {
@ -105,5 +112,42 @@ namespace Ryujinx.Tests.Cpu

            return GetThreadState();
        }
+
+        protected static double VectorExtractDouble(Vector128<float> Vector, byte Index)
+        {
+            long Value = Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
+
+            return BitConverter.Int64BitsToDouble(Value);
+        }
+
+        protected static Vector128<float> MakeVectorE0(double A)
+        {
+            return Sse.StaticCast<long, float>(Sse2.SetVector128(0, BitConverter.DoubleToInt64Bits(A)));
+        }
+
+        protected static Vector128<float> MakeVectorE0(ulong A)
+        {
+            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, A));
+        }
+
+        protected static Vector128<float> MakeVectorE0E1(ulong A, ulong B)
+        {
+            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(B, A));
+        }
+
+        protected static Vector128<float> MakeVectorE1(ulong B)
+        {
+            return Sse.StaticCast<ulong, float>(Sse2.SetVector128(B, 0));
+        }
+
+        protected static ulong GetVectorE0(Vector128<float> Vector)
+        {
+            return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), 0);
+        }
+
+        protected static ulong GetVectorE1(Vector128<float> Vector)
+        {
+            return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), 1);
+        }
    }
 }
--- a/Ryujinx.Tests/Cpu/CpuTestAlu.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAlu.cs
@ -1,14 +1,9 @@
 //#define Alu

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("Alu"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestAlu : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestAluImm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAluImm.cs
@ -1,14 +1,9 @@
 //#define AluImm

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("AluImm"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestAluImm : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestAluRs.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAluRs.cs
@ -1,14 +1,9 @@
 //#define AluRs

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("AluRs"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestAluRs : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestAluRx.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestAluRx.cs
@ -1,14 +1,9 @@
 //#define AluRx

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("AluRx"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestAluRx : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestBfm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestBfm.cs
@ -1,14 +1,9 @@
 //#define Bfm

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("Bfm"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestBfm : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestCcmpImm.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestCcmpImm.cs
@ -1,14 +1,9 @@
 //#define CcmpImm

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("CcmpImm"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestCcmpImm : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestCcmpReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestCcmpReg.cs
@ -1,14 +1,9 @@
 //#define CcmpReg

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("CcmpReg"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestCcmpReg : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestCsel.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestCsel.cs
@ -1,14 +1,9 @@
 //#define Csel

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("Csel"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestCsel : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestMisc.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestMisc.cs
@ -2,6 +2,8 @@ using ChocolArm64.State;

 using NUnit.Framework;

+using System.Runtime.Intrinsics.X86;
+
 namespace Ryujinx.Tests.Cpu
 {
    [Category("Misc"), Explicit]
@ -73,7 +75,9 @@ namespace Ryujinx.Tests.Cpu
            RET
            */

-            SetThreadState(V0: new AVec { S0 = A }, V1: new AVec { S0 = B });
+            SetThreadState(
+                V0: Sse.SetScalarVector128(A),
+                V1: Sse.SetScalarVector128(B));
            Opcode(0x1E2E1002);
            Opcode(0x1E201840);
            Opcode(0x1E211841);
@ -84,7 +88,7 @@ namespace Ryujinx.Tests.Cpu
            Opcode(0xD65F03C0);
            ExecuteOpcodes();

-            Assert.That(GetThreadState().V0.S0, Is.EqualTo(16f));
+            Assert.That(Sse41.Extract(GetThreadState().V0, 0), Is.EqualTo(16f));
        }

        [TestCase(-20d,  -5d)] // 18 integer solutions.
@ -120,7 +124,9 @@ namespace Ryujinx.Tests.Cpu
            RET
            */

-            SetThreadState(V0: new AVec { D0 = A }, V1: new AVec { D0 = B });
+            SetThreadState(
+                V0: Sse.StaticCast<double, float>(Sse2.SetScalarVector128(A)),
+                V1: Sse.StaticCast<double, float>(Sse2.SetScalarVector128(B)));
            Opcode(0x1E6E1002);
            Opcode(0x1E601840);
            Opcode(0x1E611841);
@ -131,7 +137,7 @@ namespace Ryujinx.Tests.Cpu
            Opcode(0xD65F03C0);
            ExecuteOpcodes();

-            Assert.That(GetThreadState().V0.D0, Is.EqualTo(16d));
+            Assert.That(VectorExtractDouble(GetThreadState().V0, 0), Is.EqualTo(16d));
        }

        [Test]
--- a/Ryujinx.Tests/Cpu/CpuTestMov.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestMov.cs
@ -1,14 +1,9 @@
 //#define Mov

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("Mov"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestMov : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestMul.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestMul.cs
@ -1,14 +1,9 @@
 //#define Mul

-using ChocolArm64.State;
-
 using NUnit.Framework;

 namespace Ryujinx.Tests.Cpu
 {
-    using Tester;
-    using Tester.Types;
-
    [Category("Mul"), Ignore("Tested: first half of 2018.")]
    public sealed class CpuTestMul : CpuTest
    {
--- a/Ryujinx.Tests/Cpu/CpuTestScalar.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestScalar.cs
@ -1,6 +1,9 @@
 using ChocolArm64.State;
+
 using NUnit.Framework;

+using System.Runtime.Intrinsics.X86;
+
 namespace Ryujinx.Tests.Cpu
 {
    public class CpuTestScalar : CpuTest
@ -25,8 +28,10 @@ namespace Ryujinx.Tests.Cpu
        public void Fmax_S(uint Opcode, ulong A, ulong B, ulong Result)
        {
            // FMAX S0, S1, S2
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: new AVec { X0 = A }, V2: new AVec { X0 = B });
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            AThreadState ThreadState = SingleOpcode(Opcode,
+                V1: Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, A)),
+                V2: Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, B)));
+            Assert.AreEqual(Result, Sse41.Extract(Sse.StaticCast<float, ulong>(ThreadState.V0), 0));
        }

        [TestCase(0x1E225820u, 0x0000000000000000ul, 0x0000000080000000ul, 0x0000000080000000ul)]
@ -49,8 +54,10 @@ namespace Ryujinx.Tests.Cpu
        public void Fmin_S(uint Opcode, ulong A, ulong B, ulong Result)
        {
            // FMIN S0, S1, S2
-            AThreadState ThreadState = SingleOpcode(Opcode, V1: new AVec { X0 = A }, V2: new AVec { X0 = B });
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            AThreadState ThreadState = SingleOpcode(Opcode,
+                V1: Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, A)),
+                V2: Sse.StaticCast<ulong, float>(Sse2.SetVector128(0, B)));
+            Assert.AreEqual(Result, Sse41.Extract(Sse.StaticCast<float, ulong>(ThreadState.V0), 0));
        }
    }
 }
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@ -4,6 +4,8 @@ using ChocolArm64.State;

 using NUnit.Framework;

+using System.Runtime.Intrinsics;
+
 namespace Ryujinx.Tests.Cpu
 {
    using Tester;
@ -74,8 +76,8 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x5EE0B820; // ABS D0, D1
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.V(1, new Bits(A));
@ -83,8 +85,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -96,8 +98,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.V(1, new Bits(A));
@ -105,8 +107,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -119,7 +121,7 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -128,8 +130,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -140,8 +142,8 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x5EF1B820; // ADDP D0, V1.2D
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -150,8 +152,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -163,9 +165,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = TestContext.CurrentContext.Random.NextULong(),
-                                 X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
+                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
@ -174,8 +176,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -188,9 +190,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = TestContext.CurrentContext.Random.NextULong(),
-                                 X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
+                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
@ -200,8 +202,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -213,8 +215,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.V(1, new Bits(A));
@ -222,8 +224,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -236,7 +238,7 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -245,8 +247,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -258,8 +260,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.V(1, new Bits(A));
@ -267,8 +269,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -281,7 +283,7 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -290,8 +292,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -301,8 +303,8 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x7EE0B820; // NEG D0, D1
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.V(1, new Bits(A));
@ -310,8 +312,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -323,8 +325,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.V(1, new Bits(A));
@ -332,8 +334,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -346,7 +348,7 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -355,8 +357,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -368,9 +370,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = TestContext.CurrentContext.Random.NextULong(),
-                                 X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
+                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
@ -379,8 +381,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); // FIXME: Temporary solution.
        }
@ -394,8 +396,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -404,8 +406,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); // FIXME: Temporary solution.
        }
@ -420,8 +422,8 @@ namespace Ryujinx.Tests.Cpu
            Bits Op = new Bits(Opcode);

            ulong _X0 = TestContext.CurrentContext.Random.NextULong();
-            AVec V0 = new AVec { X0 = _X0 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V0 = MakeVectorE0(_X0);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -430,8 +432,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(_X0));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_X0));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); // FIXME: Temporary solution.
        }
@ -444,9 +446,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = TestContext.CurrentContext.Random.NextULong(),
-                                 X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V0 = MakeVectorE0E1(TestContext.CurrentContext.Random.NextULong(),
+                                                 TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(0, 0, new Bits(TestContext.CurrentContext.Random.NextULong()));
@ -455,8 +457,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); // FIXME: Temporary solution.
        }
@ -470,8 +472,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -480,8 +482,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); // FIXME: Temporary solution.
        }
@ -496,8 +498,8 @@ namespace Ryujinx.Tests.Cpu
            Bits Op = new Bits(Opcode);

            ulong _X0 = TestContext.CurrentContext.Random.NextULong();
-            AVec V0 = new AVec { X0 = _X0 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
+            Vector128<float> V0 = MakeVectorE0(_X0);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -506,8 +508,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(_X0));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_X0));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
            Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); // FIXME: Temporary solution.
        }
--- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
@ -1,6 +1,10 @@
 using ChocolArm64.State;
+
 using NUnit.Framework;

+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
 namespace Ryujinx.Tests.Cpu
 {
    public class CpuTestSimdArithmetic : CpuTest
@ -35,13 +39,13 @@ namespace Ryujinx.Tests.Cpu
        [TestCase(0x4EE28420u,  0x0102030405060708ul, 0xAAAAAAAAAAAAAAAAul, 0x0807060504030201ul, 0x2222222222222222ul, 0x0909090909090909ul, 0xCCCCCCCCCCCCCCCCul)]
        public void Add_V(uint Opcode, ulong A0, ulong A1, ulong B0, ulong B1, ulong Result0, ulong Result1)
        {
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

@ -61,13 +65,13 @@ namespace Ryujinx.Tests.Cpu
        public void Fmax_V(uint A, uint B, uint C, uint D, uint Result0, uint Result1)
        {
            uint Opcode = 0x4E22F420;
-            AVec V1 = new AVec { X0 = A, X1 = B };
-            AVec V2 = new AVec { X0 = C, X1 = D };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
+            Vector128<float> V2 = MakeVectorE0E1(C, D);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

@ -87,63 +91,68 @@ namespace Ryujinx.Tests.Cpu
        public void Fmin_V(uint A, uint B, uint C, uint D, uint Result0, uint Result1)
        {
            uint Opcode = 0x4EA2F420;
-            AVec V1 = new AVec { X0 = A, X1 = B };
-            AVec V2 = new AVec { X0 = C, X1 = D };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
+            Vector128<float> V2 = MakeVectorE0E1(C, D);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

        [Test, Description("fmul s6, s1, v0.s[2]")]
        public void Fmul_Se([Random(10)] float A, [Random(10)] float B)
        {
-            AThreadState ThreadState = SingleOpcode(0x5F809826, V1: new AVec { S0 = A }, V0: new AVec { S2 = B });
+            AThreadState ThreadState = SingleOpcode(0x5F809826,
+                V1: Sse.SetVector128(0, 0, 0, A),
+                V0: Sse.SetVector128(0, B, 0, 0));

-            Assert.That(ThreadState.V6.S0, Is.EqualTo(A * B));
+            Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B));
        }

        [Test, Description("frecpe v2.4s, v0.4s")]
        public void Frecpe_V([Random(100)] float A)
        {
-            AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: new AVec { S0 = A, S1 = A, S2 = A, S3 = A });
+            AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: Sse.SetAllVector128(A));

-            Assert.That(ThreadState.V2.S0, Is.EqualTo(1 / A));
-            Assert.That(ThreadState.V2.S1, Is.EqualTo(1 / A));
-            Assert.That(ThreadState.V2.S2, Is.EqualTo(1 / A));
-            Assert.That(ThreadState.V2.S3, Is.EqualTo(1 / A));
+            Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A));
+            Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A));
+            Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A));
+            Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A));
        }

        [Test, Description("frecpe d0, d1")]
        public void Frecpe_S([Random(100)] double A)
        {
-            AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: new AVec { D0 = A });
+            AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: MakeVectorE0(A));

-            Assert.That(ThreadState.V0.D0, Is.EqualTo(1 / A));
+            Assert.That(VectorExtractDouble(ThreadState.V0, 0), Is.EqualTo(1 / A));
        }

        [Test, Description("frecps v4.4s, v2.4s, v0.4s")]
        public void Frecps_V([Random(10)] float A, [Random(10)] float B)
        {
-            AThreadState ThreadState = SingleOpcode(0x4E20FC44, V2: new AVec { S0 = A, S1 = A, S2 = A, S3 = A },
-                                                                V0: new AVec { S0 = B, S1 = B, S2 = B, S3 = B });
+            AThreadState ThreadState = SingleOpcode(0x4E20FC44,
+                V2: Sse.SetAllVector128(A),
+                V0: Sse.SetAllVector128(B));

-            Assert.That(ThreadState.V4.S0, Is.EqualTo(2 - (A * B)));
-            Assert.That(ThreadState.V4.S1, Is.EqualTo(2 - (A * B)));
-            Assert.That(ThreadState.V4.S2, Is.EqualTo(2 - (A * B)));
-            Assert.That(ThreadState.V4.S3, Is.EqualTo(2 - (A * B)));
+            Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B)));
+            Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B)));
+            Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B)));
+            Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B)));
        }

        [Test, Description("frecps d0, d1, d2")]
        public void Frecps_S([Random(10)] double A, [Random(10)] double B)
        {
-            AThreadState ThreadState = SingleOpcode(0x5E62FC20, V1: new AVec { D0 = A }, V2: new AVec { D0 = B });
+            AThreadState ThreadState = SingleOpcode(0x5E62FC20,
+                V1: MakeVectorE0(A),
+                V2: MakeVectorE0(B));

-            Assert.That(ThreadState.V0.D0, Is.EqualTo(2 - (A * B)));
+            Assert.That(VectorExtractDouble(ThreadState.V0, 0), Is.EqualTo(2 - (A * B)));
        }
-    
+
        [TestCase(0x3FE66666u, false, 0x40000000u)]
        [TestCase(0x3F99999Au, false, 0x3F800000u)]
        [TestCase(0x404CCCCDu, false, 0x40400000u)]
@ -189,17 +198,17 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(0x1E264020, V1: V1, Fpcr: FpcrTemp);
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
        }

        [TestCase(0x6E618820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
        [TestCase(0x6E618820u, 0x3FFCCCCCCCCCCCCDul, 0x3FFCCCCCCCCCCCCDul, false, 0x4000000000000000ul, 0x4000000000000000ul)]
        [TestCase(0x6E618820u, 0x3FF8000000000000ul, 0x3FF8000000000000ul, false, 0x4000000000000000ul, 0x4000000000000000ul)]
        [TestCase(0x6E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f80000040000000ul, 0x3f80000040000000ul)]
-        [TestCase(0x6E219820u, 0x3fc000003fc00000ul, 0x3fc000003fc00000ul, false, 0x4000000040000000ul, 0x4000000040000000ul)]    
-        [TestCase(0x2E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f80000040000000ul, 0x0000000000000000ul)]    
+        [TestCase(0x6E219820u, 0x3fc000003fc00000ul, 0x3fc000003fc00000ul, false, 0x4000000040000000ul, 0x4000000040000000ul)]
+        [TestCase(0x2E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f80000040000000ul, 0x0000000000000000ul)]
        [TestCase(0x2E219820u, 0x3fc000003fc00000ul, 0x3fc000003fc00000ul, false, 0x4000000040000000ul, 0x0000000000000000ul)]
        [TestCase(0x2E218820u, 0x0000000080000000ul, 0x0000000000000000ul, false, 0x0000000080000000ul, 0x0000000000000000ul)]
        [TestCase(0x2E218820u, 0x7F800000FF800000ul, 0x0000000000000000ul, false, 0x7F800000FF800000ul, 0x0000000000000000ul)]
@ -212,12 +221,12 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A, X1 = B };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, Fpcr: FpcrTemp);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

@ -286,9 +295,9 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp |= 1 << 25;
            }
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(0x1E27C020, V1: V1, Fpcr: FpcrTemp);
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
        }

        [TestCase(0x6EE19820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, 'N', false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
@ -300,11 +309,11 @@ namespace Ryujinx.Tests.Cpu
        [TestCase(0x6EE19820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, 'Z', false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
        [TestCase(0x6EE19820u, 0x3FFCCCCCCCCCCCCDul, 0x3FFCCCCCCCCCCCCDul, 'Z', false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
        [TestCase(0x6EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'N', false, 0x3f80000040000000ul, 0x3f80000040000000ul)]
-        [TestCase(0x6EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x4000000040000000ul)]    
+        [TestCase(0x6EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x4000000040000000ul)]
        [TestCase(0x6EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'M', false, 0x3f8000003f800000ul, 0x3f8000003f800000ul)]
        [TestCase(0x6EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'Z', false, 0x3f8000003f800000ul, 0x3f8000003f800000ul)]
        [TestCase(0x2EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'N', false, 0x3f80000040000000ul, 0x0000000000000000ul)]
-        [TestCase(0x2EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x0000000000000000ul)]    
+        [TestCase(0x2EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x0000000000000000ul)]
        [TestCase(0x2EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'M', false, 0x3f8000003f800000ul, 0x0000000000000000ul)]
        [TestCase(0x2EA19820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'Z', false, 0x3f8000003f800000ul, 0x0000000000000000ul)]
        [TestCase(0x2EA19820u, 0x0000000080000000ul, 0x0000000000000000ul, 'N', false, 0x0000000080000000ul, 0x0000000000000000ul)]
@ -348,12 +357,12 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp |= 1 << 25;
            }
-            AVec V1 = new AVec { X0 = A, X1 = B };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, Fpcr: FpcrTemp);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

@ -402,15 +411,15 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(0x1E254020, V1: V1, Fpcr: FpcrTemp);
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
        }

        [TestCase(0x4E619820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
        [TestCase(0x4E619820u, 0x3FFCCCCCCCCCCCCDul, 0x3FFCCCCCCCCCCCCDul, false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
-        [TestCase(0x4E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f8000003f800000ul, 0x3f8000003f800000ul)]    
-        [TestCase(0xE219820u,  0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f8000003f800000ul, 0x0000000000000000ul)]    
+        [TestCase(0x4E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f8000003f800000ul, 0x3f8000003f800000ul)]
+        [TestCase(0xE219820u,  0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f8000003f800000ul, 0x0000000000000000ul)]
        [TestCase(0xE219820u,  0x0000000080000000ul, 0x0000000000000000ul, false, 0x0000000080000000ul, 0x0000000000000000ul)]
        [TestCase(0xE219820u,  0x7F800000FF800000ul, 0x0000000000000000ul, false, 0x7F800000FF800000ul, 0x0000000000000000ul)]
        [TestCase(0xE219820u,  0xFF8000017FC00002ul, 0x0000000000000000ul, false, 0xFFC000017FC00002ul, 0x0000000000000000ul, Ignore = "NaN test.")]
@ -422,12 +431,12 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A, X1 = B };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, Fpcr: FpcrTemp);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

@ -476,17 +485,17 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(0x1E264020, V1: V1, Fpcr: FpcrTemp);
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
        }

        [TestCase(0x4E618820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
        [TestCase(0x4E618820u, 0x3FFCCCCCCCCCCCCDul, 0x3FFCCCCCCCCCCCCDul, false, 0x4000000000000000ul, 0x4000000000000000ul)]
        [TestCase(0x4E618820u, 0x3FF8000000000000ul, 0x3FF8000000000000ul, false, 0x4000000000000000ul, 0x4000000000000000ul)]
        [TestCase(0x4E218820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f80000040000000ul, 0x3f80000040000000ul)]
-        [TestCase(0x4E218820u, 0x3fc000003fc00000ul, 0x3fc000003fc00000ul, false, 0x4000000040000000ul, 0x4000000040000000ul)]    
-        [TestCase(0xE218820u,  0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f80000040000000ul, 0x0000000000000000ul)]    
+        [TestCase(0x4E218820u, 0x3fc000003fc00000ul, 0x3fc000003fc00000ul, false, 0x4000000040000000ul, 0x4000000040000000ul)]
+        [TestCase(0xE218820u,  0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x3f80000040000000ul, 0x0000000000000000ul)]
        [TestCase(0xE218820u,  0x3fc000003fc00000ul, 0x3fc000003fc00000ul, false, 0x4000000040000000ul, 0x0000000000000000ul)]
        [TestCase(0xE218820u,  0x0000000080000000ul, 0x0000000000000000ul, false, 0x0000000080000000ul, 0x0000000000000000ul)]
        [TestCase(0xE218820u,  0x7F800000FF800000ul, 0x0000000000000000ul, false, 0x7F800000FF800000ul, 0x0000000000000000ul)]
@ -499,12 +508,12 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A, X1 = B };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, Fpcr: FpcrTemp);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

@ -553,15 +562,15 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(0x1E24C020, V1: V1, Fpcr: FpcrTemp);
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
        }

        [TestCase(0x4EE18820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, false, 0x4000000000000000ul, 0x4000000000000000ul)]
        [TestCase(0x4EE18820u, 0x3FFCCCCCCCCCCCCDul, 0x3FFCCCCCCCCCCCCDul, false, 0x4000000000000000ul, 0x4000000000000000ul)]
-        [TestCase(0x4EA18820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x4000000040000000ul, 0x4000000040000000ul)]    
-        [TestCase(0xEA18820u,  0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x4000000040000000ul, 0x0000000000000000ul)]    
+        [TestCase(0x4EA18820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x4000000040000000ul, 0x4000000040000000ul)]
+        [TestCase(0xEA18820u,  0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, false, 0x4000000040000000ul, 0x0000000000000000ul)]
        [TestCase(0xEA18820u,  0x0000000080000000ul, 0x0000000000000000ul, false, 0x0000000080000000ul, 0x0000000000000000ul)]
        [TestCase(0xEA18820u,  0x7F800000FF800000ul, 0x0000000000000000ul, false, 0x7F800000FF800000ul, 0x0000000000000000ul)]
        [TestCase(0xEA18820u,  0xFF8000017FC00002ul, 0x0000000000000000ul, false, 0xFFC000017FC00002ul, 0x0000000000000000ul, Ignore = "NaN test.")]
@ -573,12 +582,12 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp = 0x2000000;
            }
-            AVec V1 = new AVec { X0 = A, X1 = B };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, Fpcr: FpcrTemp);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

@ -647,9 +656,9 @@ namespace Ryujinx.Tests.Cpu
        	{
        		FpcrTemp |= 1 << 25;
        	}
-        	AVec V1 = new AVec { X0 = A };
+        	Vector128<float> V1 = MakeVectorE0(A);
        	AThreadState ThreadState = SingleOpcode(0x1E274020, V1: V1, Fpcr: FpcrTemp);
-        	Assert.AreEqual(Result, ThreadState.V0.X0);
+        	Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
        }

        [TestCase(0x6E619820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, 'N', false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
@ -661,11 +670,11 @@ namespace Ryujinx.Tests.Cpu
        [TestCase(0x6E619820u, 0x3FF3333333333333ul, 0x3FF3333333333333ul, 'Z', false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
        [TestCase(0x6E619820u, 0x3FFCCCCCCCCCCCCDul, 0x3FFCCCCCCCCCCCCDul, 'Z', false, 0x3FF0000000000000ul, 0x3FF0000000000000ul)]
        [TestCase(0x6E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'N', false, 0x3f80000040000000ul, 0x3f80000040000000ul)]
-        [TestCase(0x6E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x4000000040000000ul)]    
+        [TestCase(0x6E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x4000000040000000ul)]
        [TestCase(0x6E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'M', false, 0x3f8000003f800000ul, 0x3f8000003f800000ul)]
        [TestCase(0x6E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'Z', false, 0x3f8000003f800000ul, 0x3f8000003f800000ul)]
        [TestCase(0x2E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'N', false, 0x3f80000040000000ul, 0x0000000000000000ul)]
-        [TestCase(0x2E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x0000000000000000ul)]    
+        [TestCase(0x2E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'P', false, 0x4000000040000000ul, 0x0000000000000000ul)]
        [TestCase(0x2E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'M', false, 0x3f8000003f800000ul, 0x0000000000000000ul)]
        [TestCase(0x2E219820u, 0x3f99999a3fe66666ul, 0x3f99999a3fe66666ul, 'Z', false, 0x3f8000003f800000ul, 0x0000000000000000ul)]
        [TestCase(0x2E219820u, 0x0000000080000000ul, 0x0000000000000000ul, 'N', false, 0x0000000080000000ul, 0x0000000000000000ul)]
@ -709,21 +718,21 @@ namespace Ryujinx.Tests.Cpu
            {
                FpcrTemp |= 1 << 25;
            }
-            AVec V1 = new AVec { X0 = A, X1 = B };
+            Vector128<float> V1 = MakeVectorE0E1(A, B);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, Fpcr: FpcrTemp);
            Assert.Multiple(() =>
            {
-                Assert.AreEqual(Result0, ThreadState.V0.X0);
-                Assert.AreEqual(Result1, ThreadState.V0.X1);
+                Assert.AreEqual(Result0, GetVectorE0(ThreadState.V0));
+                Assert.AreEqual(Result1, GetVectorE1(ThreadState.V0));
            });
        }

        [TestCase(0x41200000u, 0x3EA18000u)]
        public void Frsqrte_S(uint A, uint Result)
        {
-            AVec V1 = new AVec { X0 = A };
+            Vector128<float> V1 = MakeVectorE0(A);
            AThreadState ThreadState = SingleOpcode(0x7EA1D820, V1: V1);
-            Assert.AreEqual(Result, ThreadState.V0.X0);
+            Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
        }
    }
 }
--- a/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdMove.cs
@ -1,6 +1,10 @@
 using ChocolArm64.State;
+
 using NUnit.Framework;

+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
 namespace Ryujinx.Tests.Cpu
 {
    public class CpuTestSimdMove : CpuTest
@ -10,15 +14,17 @@ namespace Ryujinx.Tests.Cpu
                              [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3)
        {
            uint Opcode = 0x4E822820;
-            AVec V1 = new AVec { W0 = A0, W1 = A1, W2 = A2, W3 = A3 };
-            AVec V2 = new AVec { W0 = B0, W1 = B1, W2 = B2, W3 = B3 };
+            Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0));
+            Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0));

            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

-            Assert.That(ThreadState.V0.W0, Is.EqualTo(A0));
-            Assert.That(ThreadState.V0.W1, Is.EqualTo(B0));
-            Assert.That(ThreadState.V0.W2, Is.EqualTo(A2));
-            Assert.That(ThreadState.V0.W3, Is.EqualTo(B2));
+            Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0);
+
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A0));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B0));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A2));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B2));
        }

        [Test, Description("trn1 v0.8b, v1.8b, v2.8b")]
@ -28,19 +34,19 @@ namespace Ryujinx.Tests.Cpu
                              [Random(2)] byte B4, [Random(1)] byte B5, [Random(2)] byte B6, [Random(1)] byte B7)
        {
            uint Opcode = 0x0E022820;
-            AVec V1 = new AVec { B0 = A0, B1 = A1, B2 = A2, B3 = A3, B4 = A4, B5 = A5, B6 = A6, B7 = A7 };
-            AVec V2 = new AVec { B0 = B0, B1 = B1, B2 = B2, B3 = B3, B4 = B4, B5 = B5, B6 = B6, B7 = B7 };
+            Vector128<float> V1 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0));
+            Vector128<float> V2 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0));

            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

-            Assert.That(ThreadState.V0.B0, Is.EqualTo(A0));
-            Assert.That(ThreadState.V0.B1, Is.EqualTo(B0));
-            Assert.That(ThreadState.V0.B2, Is.EqualTo(A2));
-            Assert.That(ThreadState.V0.B3, Is.EqualTo(B2));
-            Assert.That(ThreadState.V0.B4, Is.EqualTo(A4));
-            Assert.That(ThreadState.V0.B5, Is.EqualTo(B4));
-            Assert.That(ThreadState.V0.B6, Is.EqualTo(A6));
-            Assert.That(ThreadState.V0.B7, Is.EqualTo(B6));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)0), Is.EqualTo(A0));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)1), Is.EqualTo(B0));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)2), Is.EqualTo(A2));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)3), Is.EqualTo(B2));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)4), Is.EqualTo(A4));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)5), Is.EqualTo(B4));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)6), Is.EqualTo(A6));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)7), Is.EqualTo(B6));
        }

        [Test, Description("trn2 v0.4s, v1.4s, v2.4s")]
@ -48,15 +54,15 @@ namespace Ryujinx.Tests.Cpu
                              [Random(2)] uint B0, [Random(2)] uint B1, [Random(2)] uint B2, [Random(2)] uint B3)
        {
            uint Opcode = 0x4E826820;
-            AVec V1 = new AVec { W0 = A0, W1 = A1, W2 = A2, W3 = A3 };
-            AVec V2 = new AVec { W0 = B0, W1 = B1, W2 = B2, W3 = B3 };
+            Vector128<float> V1 = Sse.StaticCast<uint, float>(Sse2.SetVector128(A3, A2, A1, A0));
+            Vector128<float> V2 = Sse.StaticCast<uint, float>(Sse2.SetVector128(B3, B2, B1, B0));

            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

-            Assert.That(ThreadState.V0.W0, Is.EqualTo(A1));
-            Assert.That(ThreadState.V0.W1, Is.EqualTo(B1));
-            Assert.That(ThreadState.V0.W2, Is.EqualTo(A3));
-            Assert.That(ThreadState.V0.W3, Is.EqualTo(B3));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)0), Is.EqualTo(A1));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)1), Is.EqualTo(B1));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)2), Is.EqualTo(A3));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, uint>(ThreadState.V0), (byte)3), Is.EqualTo(B3));
        }

        [Test, Description("trn2 v0.8b, v1.8b, v2.8b")]
@ -66,19 +72,19 @@ namespace Ryujinx.Tests.Cpu
                              [Random(1)] byte B4, [Random(2)] byte B5, [Random(1)] byte B6, [Random(2)] byte B7)
        {
            uint Opcode = 0x0E026820;
-            AVec V1 = new AVec { B0 = A0, B1 = A1, B2 = A2, B3 = A3, B4 = A4, B5 = A5, B6 = A6, B7 = A7 };
-            AVec V2 = new AVec { B0 = B0, B1 = B1, B2 = B2, B3 = B3, B4 = B4, B5 = B5, B6 = B6, B7 = B7 };
+            Vector128<float> V1 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, A7, A6, A5, A4, A3, A2, A1, A0));
+            Vector128<float> V2 = Sse.StaticCast<byte, float>(Sse2.SetVector128(0, 0, 0, 0, 0, 0, 0, 0, B7, B6, B5, B4, B3, B2, B1, B0));

            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

-            Assert.That(ThreadState.V0.B0, Is.EqualTo(A1));
-            Assert.That(ThreadState.V0.B1, Is.EqualTo(B1));
-            Assert.That(ThreadState.V0.B2, Is.EqualTo(A3));
-            Assert.That(ThreadState.V0.B3, Is.EqualTo(B3));
-            Assert.That(ThreadState.V0.B4, Is.EqualTo(A5));
-            Assert.That(ThreadState.V0.B5, Is.EqualTo(B5));
-            Assert.That(ThreadState.V0.B6, Is.EqualTo(A7));
-            Assert.That(ThreadState.V0.B7, Is.EqualTo(B7));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)0), Is.EqualTo(A1));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)1), Is.EqualTo(B1));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)2), Is.EqualTo(A3));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)3), Is.EqualTo(B3));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)4), Is.EqualTo(A5));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)5), Is.EqualTo(B5));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)6), Is.EqualTo(A7));
+            Assert.That(Sse41.Extract(Sse.StaticCast<float, byte>(ThreadState.V0), (byte)7), Is.EqualTo(B7));
        }

        [TestCase(0u, 0u, 0x2313221221112010ul, 0x0000000000000000ul)]
@ -92,11 +98,11 @@ namespace Ryujinx.Tests.Cpu
        {
            // ZIP1 V0.<T>, V1.<T>, V2.<T>
            uint Opcode = 0x0E023820 | (Q << 30) | (size << 22);
-            AVec V1 = new AVec { X0 = 0x1716151413121110, X1 = 0x1F1E1D1C1B1A1918 };
-            AVec V2 = new AVec { X0 = 0x2726252423222120, X1 = 0x2F2E2D2C2B2A2928 };
+            Vector128<float> V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918);
+            Vector128<float> V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-            Assert.AreEqual(Result_0, ThreadState.V0.X0);
-            Assert.AreEqual(Result_1, ThreadState.V0.X1);
+            Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0));
+            Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0));
        }

        [TestCase(0u, 0u, 0x2717261625152414ul, 0x0000000000000000ul)]
@ -110,11 +116,11 @@ namespace Ryujinx.Tests.Cpu
        {
            // ZIP2 V0.<T>, V1.<T>, V2.<T>
            uint Opcode = 0x0E027820 | (Q << 30) | (size << 22);
-            AVec V1 = new AVec { X0 = 0x1716151413121110, X1 = 0x1F1E1D1C1B1A1918 };
-            AVec V2 = new AVec { X0 = 0x2726252423222120, X1 = 0x2F2E2D2C2B2A2928 };
+            Vector128<float> V1 = MakeVectorE0E1(0x1716151413121110, 0x1F1E1D1C1B1A1918);
+            Vector128<float> V2 = MakeVectorE0E1(0x2726252423222120, 0x2F2E2D2C2B2A2928);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);
-            Assert.AreEqual(Result_0, ThreadState.V0.X0);
-            Assert.AreEqual(Result_1, ThreadState.V0.X1);
+            Assert.AreEqual(Result_0, GetVectorE0(ThreadState.V0));
+            Assert.AreEqual(Result_1, GetVectorE1(ThreadState.V0));
        }
    }
 }
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@ -4,6 +4,8 @@ using ChocolArm64.State;

 using NUnit.Framework;

+using System.Runtime.Intrinsics;
+
 namespace Ryujinx.Tests.Cpu
 {
    using Tester;
@ -65,9 +67,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x5EE28420; // ADD D0, D1, D2
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -76,8 +78,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -90,9 +92,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -101,8 +103,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -117,8 +119,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -129,8 +131,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -145,9 +147,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -158,8 +160,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -175,9 +177,9 @@ namespace Ryujinx.Tests.Cpu
            Bits Op = new Bits(Opcode);

            ulong _X0 = TestContext.CurrentContext.Random.NextULong();
-            AVec V0 = new AVec { X0 = _X0 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE0(_X0);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -188,8 +190,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(_X0));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_X0));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -202,9 +204,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -213,8 +215,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -229,8 +231,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -241,8 +243,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -253,9 +255,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x0E221C20; // AND V0.8B, V1.8B, V2.8B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -264,8 +266,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -278,8 +280,8 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x4E221C20; // AND V0.16B, V1.16B, V2.16B
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -290,8 +292,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -302,9 +304,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x0E621C20; // BIC V0.8B, V1.8B, V2.8B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -313,8 +315,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -327,8 +329,8 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x4E621C20; // BIC V0.16B, V1.16B, V2.16B
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -339,8 +341,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -352,9 +354,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x2EE21C20; // BIF V0.8B, V1.8B, V2.8B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = _Z, X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(0, 0, new Bits(_Z));
@ -364,8 +366,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -380,9 +382,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x6EE21C20; // BIF V0.16B, V1.16B, V2.16B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = _Z0, X1 = _Z1 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(0, 0, new Bits(_Z0));
@ -395,8 +397,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -408,9 +410,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x2EA21C20; // BIT V0.8B, V1.8B, V2.8B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = _Z, X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(0, 0, new Bits(_Z));
@ -420,8 +422,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -436,9 +438,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x6EA21C20; // BIT V0.16B, V1.16B, V2.16B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = _Z0, X1 = _Z1 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(0, 0, new Bits(_Z0));
@ -451,8 +453,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -464,9 +466,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x2E621C20; // BSL V0.8B, V1.8B, V2.8B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = _Z, X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE0E1(_Z, TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(0, 0, new Bits(_Z));
@ -476,8 +478,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -492,9 +494,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x6E621C20; // BSL V0.16B, V1.16B, V2.16B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X0 = _Z0, X1 = _Z1 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE0E1(_Z0, _Z1);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(0, 0, new Bits(_Z0));
@ -507,8 +509,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -519,9 +521,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x0EE21C20; // ORN V0.8B, V1.8B, V2.8B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -530,8 +532,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -544,8 +546,8 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x4EE21C20; // ORN V0.16B, V1.16B, V2.16B
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -556,8 +558,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -568,9 +570,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x0EA21C20; // ORR V0.8B, V1.8B, V2.8B
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -579,8 +581,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -593,8 +595,8 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x4EA21C20; // ORR V0.16B, V1.16B, V2.16B
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -605,8 +607,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -621,9 +623,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -634,8 +636,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -651,9 +653,9 @@ namespace Ryujinx.Tests.Cpu
            Bits Op = new Bits(Opcode);

            ulong _X0 = TestContext.CurrentContext.Random.NextULong();
-            AVec V0 = new AVec { X0 = _X0 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE0(_X0);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -664,8 +666,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(_X0));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_X0));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -680,9 +682,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -693,8 +695,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -710,9 +712,9 @@ namespace Ryujinx.Tests.Cpu
            Bits Op = new Bits(Opcode);

            ulong _X0 = TestContext.CurrentContext.Random.NextULong();
-            AVec V0 = new AVec { X0 = _X0 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE0(_X0);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -723,8 +725,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(_X0));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_X0));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -735,9 +737,9 @@ namespace Ryujinx.Tests.Cpu
            uint Opcode = 0x7EE28420; // SUB D0, D1, D2
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -746,8 +748,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -760,9 +762,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A };
-            AVec V2 = new AVec { X0 = B };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0(A);
+            Vector128<float> V2 = MakeVectorE0(B);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.V(1, new Bits(A));
@ -771,8 +773,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -787,8 +789,8 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -799,8 +801,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }

@ -815,9 +817,9 @@ namespace Ryujinx.Tests.Cpu
            Opcode |= ((size & 3) << 22);
            Bits Op = new Bits(Opcode);

-            AVec V0 = new AVec { X1 = TestContext.CurrentContext.Random.NextULong() };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE1(TestContext.CurrentContext.Random.NextULong());
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -828,8 +830,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
-                Assert.That(ThreadState.V0.X1, Is.Zero);
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.V(64, 0).ToUInt64()));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.Zero);
            });
        }

@ -845,9 +847,9 @@ namespace Ryujinx.Tests.Cpu
            Bits Op = new Bits(Opcode);

            ulong _X0 = TestContext.CurrentContext.Random.NextULong();
-            AVec V0 = new AVec { X0 = _X0 };
-            AVec V1 = new AVec { X0 = A0, X1 = A1 };
-            AVec V2 = new AVec { X0 = B0, X1 = B1 };
+            Vector128<float> V0 = MakeVectorE0(_X0);
+            Vector128<float> V1 = MakeVectorE0E1(A0, A1);
+            Vector128<float> V2 = MakeVectorE0E1(B0, B1);
            AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);

            AArch64.Vpart(1, 0, new Bits(A0));
@ -858,8 +860,8 @@ namespace Ryujinx.Tests.Cpu

            Assert.Multiple(() =>
            {
-                Assert.That(ThreadState.V0.X0, Is.EqualTo(_X0));
-                Assert.That(ThreadState.V0.X1, Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
+                Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(_X0));
+                Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64()));
            });
        }
 #endif
--- a/Ryujinx.Tests/Ryujinx.Tests.csproj
+++ b/Ryujinx.Tests/Ryujinx.Tests.csproj
@ -1,6 +1,6 @@
 <Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
-    <TargetFramework>netcoreapp2.0</TargetFramework>
+    <TargetFramework>netcoreapp2.1</TargetFramework>
    <RuntimeIdentifier>win10-x64</RuntimeIdentifier>
    <OutputType>Exe</OutputType>
    <IsPackable>false</IsPackable>
@ -12,6 +12,7 @@
    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.7.0" />
    <PackageReference Include="NUnit" Version="3.10.1" />
    <PackageReference Include="NUnit3TestAdapter" Version="3.10.0" />
+    <PackageReference Include="System.Runtime.Intrinsics.Experimental" Version="4.5.0-rc1" />
  </ItemGroup>
  <ItemGroup>
    <ProjectReference Include="..\ChocolArm64\ChocolArm64.csproj" />
--- a/Ryujinx/Ryujinx.csproj
+++ b/Ryujinx/Ryujinx.csproj
@ -1,7 +1,7 @@
 <Project Sdk="Microsoft.NET.Sdk">
  <PropertyGroup>
    <OutputType>Exe</OutputType>
-    <TargetFramework>netcoreapp2.0</TargetFramework>
+    <TargetFramework>netcoreapp2.1</TargetFramework>
    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
    <RuntimeIdentifiers>win10-x64;osx-x64</RuntimeIdentifiers>
  </PropertyGroup>