Arm64: Cpu feature detection (#4264)
* Arm64: Cpu feature detection * Ptc: Add Arm64 feature info * nits * simplify CheckSysctlName * restore some macos flags * feedback
This commit is contained in:
parent
fd36c8deca
commit
a11784fcbf
4 changed files with 237 additions and 31 deletions
185
ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
Normal file
185
ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
Normal file
|
@ -0,0 +1,185 @@
|
|||
using System;
|
||||
using System.Linq;
|
||||
using System.Reflection;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Runtime.Intrinsics.Arm;
|
||||
using System.Runtime.Versioning;
|
||||
|
||||
namespace ARMeilleure.CodeGen.Arm64
|
||||
{
|
||||
static partial class HardwareCapabilities
|
||||
{
|
||||
static HardwareCapabilities()
|
||||
{
|
||||
if (!ArmBase.Arm64.IsSupported)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if (OperatingSystem.IsLinux())
|
||||
{
|
||||
LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP);
|
||||
LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2);
|
||||
}
|
||||
|
||||
if (OperatingSystem.IsMacOS())
|
||||
{
|
||||
for (int i = 0; i < _sysctlNames.Length; i++)
|
||||
{
|
||||
if (CheckSysctlName(_sysctlNames[i]))
|
||||
{
|
||||
MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#region Linux
|
||||
|
||||
private const ulong AT_HWCAP = 16;
|
||||
private const ulong AT_HWCAP2 = 26;
|
||||
|
||||
[LibraryImport("libc", SetLastError = true)]
|
||||
private static partial ulong getauxval(ulong type);
|
||||
|
||||
[Flags]
|
||||
public enum LinuxFeatureFlagsHwCap : ulong
|
||||
{
|
||||
Fp = 1 << 0,
|
||||
Asimd = 1 << 1,
|
||||
Evtstrm = 1 << 2,
|
||||
Aes = 1 << 3,
|
||||
Pmull = 1 << 4,
|
||||
Sha1 = 1 << 5,
|
||||
Sha2 = 1 << 6,
|
||||
Crc32 = 1 << 7,
|
||||
Atomics = 1 << 8,
|
||||
FpHp = 1 << 9,
|
||||
AsimdHp = 1 << 10,
|
||||
CpuId = 1 << 11,
|
||||
AsimdRdm = 1 << 12,
|
||||
Jscvt = 1 << 13,
|
||||
Fcma = 1 << 14,
|
||||
Lrcpc = 1 << 15,
|
||||
DcpOp = 1 << 16,
|
||||
Sha3 = 1 << 17,
|
||||
Sm3 = 1 << 18,
|
||||
Sm4 = 1 << 19,
|
||||
AsimdDp = 1 << 20,
|
||||
Sha512 = 1 << 21,
|
||||
Sve = 1 << 22,
|
||||
AsimdFhm = 1 << 23,
|
||||
Dit = 1 << 24,
|
||||
Uscat = 1 << 25,
|
||||
Ilrcpc = 1 << 26,
|
||||
FlagM = 1 << 27,
|
||||
Ssbs = 1 << 28,
|
||||
Sb = 1 << 29,
|
||||
Paca = 1 << 30,
|
||||
Pacg = 1UL << 31
|
||||
}
|
||||
|
||||
[Flags]
|
||||
public enum LinuxFeatureFlagsHwCap2 : ulong
|
||||
{
|
||||
Dcpodp = 1 << 0,
|
||||
Sve2 = 1 << 1,
|
||||
SveAes = 1 << 2,
|
||||
SvePmull = 1 << 3,
|
||||
SveBitperm = 1 << 4,
|
||||
SveSha3 = 1 << 5,
|
||||
SveSm4 = 1 << 6,
|
||||
FlagM2 = 1 << 7,
|
||||
Frint = 1 << 8,
|
||||
SveI8mm = 1 << 9,
|
||||
SveF32mm = 1 << 10,
|
||||
SveF64mm = 1 << 11,
|
||||
SveBf16 = 1 << 12,
|
||||
I8mm = 1 << 13,
|
||||
Bf16 = 1 << 14,
|
||||
Dgh = 1 << 15,
|
||||
Rng = 1 << 16,
|
||||
Bti = 1 << 17,
|
||||
Mte = 1 << 18,
|
||||
Ecv = 1 << 19,
|
||||
Afp = 1 << 20,
|
||||
Rpres = 1 << 21,
|
||||
Mte3 = 1 << 22,
|
||||
Sme = 1 << 23,
|
||||
Sme_i16i64 = 1 << 24,
|
||||
Sme_f64f64 = 1 << 25,
|
||||
Sme_i8i32 = 1 << 26,
|
||||
Sme_f16f32 = 1 << 27,
|
||||
Sme_b16f32 = 1 << 28,
|
||||
Sme_f32f32 = 1 << 29,
|
||||
Sme_fa64 = 1 << 30,
|
||||
Wfxt = 1UL << 31,
|
||||
Ebf16 = 1UL << 32,
|
||||
Sve_Ebf16 = 1UL << 33,
|
||||
Cssc = 1UL << 34,
|
||||
Rprfm = 1UL << 35,
|
||||
Sve2p1 = 1UL << 36
|
||||
}
|
||||
|
||||
public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0;
|
||||
public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0;
|
||||
|
||||
#endregion
|
||||
|
||||
#region macOS
|
||||
|
||||
[LibraryImport("libSystem.dylib", SetLastError = true)]
|
||||
private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize);
|
||||
|
||||
[SupportedOSPlatform("macos")]
|
||||
private static bool CheckSysctlName(string name)
|
||||
{
|
||||
ulong size = sizeof(int);
|
||||
if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int))
|
||||
{
|
||||
return val != 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static string[] _sysctlNames = new string[]
|
||||
{
|
||||
"hw.optional.floatingpoint",
|
||||
"hw.optional.AdvSIMD",
|
||||
"hw.optional.arm.FEAT_FP16",
|
||||
"hw.optional.arm.FEAT_AES",
|
||||
"hw.optional.arm.FEAT_PMULL",
|
||||
"hw.optional.arm.FEAT_LSE",
|
||||
"hw.optional.armv8_crc32",
|
||||
"hw.optional.arm.FEAT_SHA1",
|
||||
"hw.optional.arm.FEAT_SHA256"
|
||||
};
|
||||
|
||||
[Flags]
|
||||
public enum MacOsFeatureFlags
|
||||
{
|
||||
Fp = 1 << 0,
|
||||
AdvSimd = 1 << 1,
|
||||
Fp16 = 1 << 2,
|
||||
Aes = 1 << 3,
|
||||
Pmull = 1 << 4,
|
||||
Lse = 1 << 5,
|
||||
Crc32 = 1 << 6,
|
||||
Sha1 = 1 << 7,
|
||||
Sha256 = 1 << 8
|
||||
}
|
||||
|
||||
public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0;
|
||||
|
||||
#endregion
|
||||
|
||||
public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd);
|
||||
public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes);
|
||||
public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull);
|
||||
public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse);
|
||||
public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32);
|
||||
public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1);
|
||||
public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256);
|
||||
}
|
||||
}
|
|
@ -2556,7 +2556,7 @@ namespace ARMeilleure.Instructions
|
|||
{
|
||||
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
|
||||
|
||||
if (Optimizations.UseAdvSimd && false) // Not supported by all Arm CPUs.
|
||||
if (Optimizations.UseArm64Pmull)
|
||||
{
|
||||
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV);
|
||||
}
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
using ARMeilleure.CodeGen.X86;
|
||||
using System.Runtime.Intrinsics.Arm;
|
||||
|
||||
namespace ARMeilleure
|
||||
{
|
||||
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
|
||||
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
|
||||
|
||||
public static class Optimizations
|
||||
{
|
||||
public static bool FastFP { get; set; } = true;
|
||||
|
@ -11,6 +13,7 @@ namespace ARMeilleure
|
|||
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
|
||||
|
||||
public static bool UseAdvSimdIfAvailable { get; set; } = true;
|
||||
public static bool UseArm64PmullIfAvailable { get; set; } = true;
|
||||
|
||||
public static bool UseSseIfAvailable { get; set; } = true;
|
||||
public static bool UseSse2IfAvailable { get; set; } = true;
|
||||
|
@ -29,25 +32,26 @@ namespace ARMeilleure
|
|||
|
||||
public static bool ForceLegacySse
|
||||
{
|
||||
get => HardwareCapabilities.ForceLegacySse;
|
||||
set => HardwareCapabilities.ForceLegacySse = value;
|
||||
get => X86HardwareCapabilities.ForceLegacySse;
|
||||
set => X86HardwareCapabilities.ForceLegacySse = value;
|
||||
}
|
||||
|
||||
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && AdvSimd.IsSupported;
|
||||
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
|
||||
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
|
||||
|
||||
internal static bool UseSse => UseSseIfAvailable && HardwareCapabilities.SupportsSse;
|
||||
internal static bool UseSse2 => UseSse2IfAvailable && HardwareCapabilities.SupportsSse2;
|
||||
internal static bool UseSse3 => UseSse3IfAvailable && HardwareCapabilities.SupportsSse3;
|
||||
internal static bool UseSsse3 => UseSsse3IfAvailable && HardwareCapabilities.SupportsSsse3;
|
||||
internal static bool UseSse41 => UseSse41IfAvailable && HardwareCapabilities.SupportsSse41;
|
||||
internal static bool UseSse42 => UseSse42IfAvailable && HardwareCapabilities.SupportsSse42;
|
||||
internal static bool UsePopCnt => UsePopCntIfAvailable && HardwareCapabilities.SupportsPopcnt;
|
||||
internal static bool UseAvx => UseAvxIfAvailable && HardwareCapabilities.SupportsAvx && !ForceLegacySse;
|
||||
internal static bool UseF16c => UseF16cIfAvailable && HardwareCapabilities.SupportsF16c;
|
||||
internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
|
||||
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
|
||||
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
|
||||
internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha;
|
||||
internal static bool UseGfni => UseGfniIfAvailable && HardwareCapabilities.SupportsGfni;
|
||||
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
|
||||
internal static bool UseSse2 => UseSse2IfAvailable && X86HardwareCapabilities.SupportsSse2;
|
||||
internal static bool UseSse3 => UseSse3IfAvailable && X86HardwareCapabilities.SupportsSse3;
|
||||
internal static bool UseSsse3 => UseSsse3IfAvailable && X86HardwareCapabilities.SupportsSsse3;
|
||||
internal static bool UseSse41 => UseSse41IfAvailable && X86HardwareCapabilities.SupportsSse41;
|
||||
internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
|
||||
internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
|
||||
internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
|
||||
internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
|
||||
internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
|
||||
internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
|
||||
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
|
||||
internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
|
||||
internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
|
||||
}
|
||||
}
|
|
@ -1,7 +1,6 @@
|
|||
using ARMeilleure.CodeGen;
|
||||
using ARMeilleure.CodeGen.Linking;
|
||||
using ARMeilleure.CodeGen.Unwinding;
|
||||
using ARMeilleure.CodeGen.X86;
|
||||
using ARMeilleure.Common;
|
||||
using ARMeilleure.Memory;
|
||||
using Ryujinx.Common;
|
||||
|
@ -22,12 +21,15 @@ using static ARMeilleure.Translation.PTC.PtcFormatter;
|
|||
|
||||
namespace ARMeilleure.Translation.PTC
|
||||
{
|
||||
using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities;
|
||||
using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities;
|
||||
|
||||
class Ptc : IPtcLoadState
|
||||
{
|
||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||
|
||||
private const uint InternalVersion = 4114; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
private const uint InternalVersion = 4264; //! To be incremented manually for each change to the ARMeilleure project.
|
||||
|
||||
private const string ActualDir = "0";
|
||||
private const string BackupDir = "1";
|
||||
|
@ -951,12 +953,27 @@ namespace ARMeilleure.Translation.PTC
|
|||
}
|
||||
|
||||
private static FeatureInfo GetFeatureInfo()
|
||||
{
|
||||
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
|
||||
{
|
||||
return new FeatureInfo(
|
||||
(uint)HardwareCapabilities.FeatureInfo1Ecx,
|
||||
(uint)HardwareCapabilities.FeatureInfo1Edx,
|
||||
(uint)HardwareCapabilities.FeatureInfo7Ebx,
|
||||
(uint)HardwareCapabilities.FeatureInfo7Ecx);
|
||||
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
|
||||
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
|
||||
(ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
|
||||
0);
|
||||
}
|
||||
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
|
||||
{
|
||||
return new FeatureInfo(
|
||||
(ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
|
||||
(ulong)X86HardwareCapabilities.FeatureInfo1Edx,
|
||||
(ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
|
||||
(ulong)X86HardwareCapabilities.FeatureInfo7Ecx);
|
||||
}
|
||||
else
|
||||
{
|
||||
return new FeatureInfo(0, 0, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
private byte GetMemoryManagerMode()
|
||||
|
@ -976,7 +993,7 @@ namespace ARMeilleure.Translation.PTC
|
|||
return osPlatform;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 58*/)]
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 74*/)]
|
||||
private struct OuterHeader
|
||||
{
|
||||
public ulong Magic;
|
||||
|
@ -1007,8 +1024,8 @@ namespace ARMeilleure.Translation.PTC
|
|||
}
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 16*/)]
|
||||
private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2, uint FeatureInfo3);
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)]
|
||||
private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3);
|
||||
|
||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
|
||||
private struct InnerHeader
|
||||
|
|
Reference in a new issue