0
0
Fork 0

ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)

* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection

Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.

* ARMeilleure: Add initial support for EVEX instruction encoding

Does not implement rounding, or exception controls.

* ARMeilleure: Add `X86Vpternlogd`

Accelerates the vector-`Not` instruction.

* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}

* ARMeilleure: Add check for `XCR0` flags

Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.

* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting

* ARMeilleure: Move XCR0 procedure to GetXcr0Eax

* ARMeilleure: Add `XCR0` to `FeatureInfo` structure

* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly

Avoids an additional allocation

* ARMeilleure: Formatting fixes

* ARMeilleure: Fix EVEX encoding src2 register index

> Just like in VEX prefix, vvvv is provided in inverted form.

* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`

Passes unit tests, verified instruction utilization

* ARMeilleure: Fix EVEX register operand designations

Operand 2 was being sourced improperly.

EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm

This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.

* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`

* ARMeilleure: PTC version bump

* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail

* ARMeilleure: Update EVEX encoding comment capitalization
This commit is contained in:
Wunk 2023-03-20 12:09:24 -07:00 committed by GitHub
parent 9f1cf6458c
commit 17620d18db
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 226 additions and 11 deletions

View file

@ -7,6 +7,7 @@
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
<ProjectReference Include="..\Ryujinx.Memory\Ryujinx.Memory.csproj" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>

View file

@ -1034,7 +1034,13 @@ namespace ARMeilleure.CodeGen.X86
Debug.Assert(opCode != BadOp, "Invalid opcode value."); Debug.Assert(opCode != BadOp, "Invalid opcode value.");
if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
{
WriteEvexInst(dest, src1, src2, type, flags, opCode);
opCode &= 0xff;
}
else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
{ {
// In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
@ -1153,6 +1159,103 @@ namespace ARMeilleure.CodeGen.X86
} }
} }
private void WriteEvexInst(
Operand dest,
Operand src1,
Operand src2,
OperandType type,
InstructionFlags flags,
int opCode,
bool broadcast = false,
int registerWidth = 128,
int maskRegisterIdx = 0,
bool zeroElements = false)
{
int op1Idx = dest.GetRegister().Index;
int op2Idx = src1.GetRegister().Index;
int op3Idx = src2.GetRegister().Index;
WriteByte(0x62);
// P0
// Extend operand 1 register
bool r = (op1Idx & 8) == 0;
// Extend operand 3 register
bool x = (op3Idx & 16) == 0;
// Extend operand 3 register
bool b = (op3Idx & 8) == 0;
// Extend operand 1 register
bool rp = (op1Idx & 16) == 0;
// Escape code index
byte mm = 0b00;
switch ((ushort)(opCode >> 8))
{
case 0xf00: mm = 0b01; break;
case 0xf38: mm = 0b10; break;
case 0xf3a: mm = 0b11; break;
default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break;
}
WriteByte(
(byte)(
(r ? 0x80 : 0) |
(x ? 0x40 : 0) |
(b ? 0x20 : 0) |
(rp ? 0x10 : 0) |
mm));
// P1
// Specify 64-bit lane mode
bool w = Is64Bits(type);
// Operand 2 register index
byte vvvv = (byte)(~op2Idx & 0b1111);
// Opcode prefix
byte pp = (flags & InstructionFlags.PrefixMask) switch
{
InstructionFlags.Prefix66 => 0b01,
InstructionFlags.PrefixF3 => 0b10,
InstructionFlags.PrefixF2 => 0b11,
_ => 0
};
WriteByte(
(byte)(
(w ? 0x80 : 0) |
(vvvv << 3) |
0b100 |
pp));
// P2
// Mask register determines what elements to zero, rather than what elements to merge
bool z = zeroElements;
// Specifies register-width
byte ll = 0b00;
switch (registerWidth)
{
case 128: ll = 0b00; break;
case 256: ll = 0b01; break;
case 512: ll = 0b10; break;
default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break;
}
// Embedded broadcast in the case of a memory operand
bool bcast = broadcast;
// Extend operand 2 register
bool vp = (op2Idx & 16) == 0;
// Mask register index
Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
byte aaa = (byte)(maskRegisterIdx & 0b111);
WriteByte(
(byte)(
(z ? 0x80 : 0) |
(ll << 5) |
(bcast ? 0x10 : 0) |
(vp ? 8 : 0) |
aaa));
}
private void WriteCompactInst(Operand operand, int opCode) private void WriteCompactInst(Operand operand, int opCode)
{ {
int regIndex = operand.GetRegister().Index; int regIndex = operand.GetRegister().Index;

View file

@ -20,6 +20,7 @@ namespace ARMeilleure.CodeGen.X86
Reg8Dest = 1 << 2, Reg8Dest = 1 << 2,
RexW = 1 << 3, RexW = 1 << 3,
Vex = 1 << 4, Vex = 1 << 4,
Evex = 1 << 5,
PrefixBit = 16, PrefixBit = 16,
PrefixMask = 7 << PrefixBit, PrefixMask = 7 << PrefixBit,
@ -278,6 +279,7 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66));
Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));

View file

@ -1,10 +1,14 @@
using Ryujinx.Memory;
using System; using System;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86; using System.Runtime.Intrinsics.X86;
namespace ARMeilleure.CodeGen.X86 namespace ARMeilleure.CodeGen.X86
{ {
static class HardwareCapabilities static class HardwareCapabilities
{ {
private delegate uint GetXcr0();
static HardwareCapabilities() static HardwareCapabilities()
{ {
if (!X86Base.IsSupported) if (!X86Base.IsSupported)
@ -24,6 +28,28 @@ namespace ARMeilleure.CodeGen.X86
FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7; FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7; FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
} }
Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax();
}
private static uint GetXcr0Eax()
{
ReadOnlySpan<byte> asmGetXcr0 = new byte[]
{
0x31, 0xc9, // xor ecx, ecx
0xf, 0x01, 0xd0, // xgetbv
0xc3, // ret
};
using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length);
memGetXcr0.Write(0, asmGetXcr0);
memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute);
var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer);
return fGetXcr0();
} }
[Flags] [Flags]
@ -44,6 +70,7 @@ namespace ARMeilleure.CodeGen.X86
Sse42 = 1 << 20, Sse42 = 1 << 20,
Popcnt = 1 << 23, Popcnt = 1 << 23,
Aes = 1 << 25, Aes = 1 << 25,
Osxsave = 1 << 27,
Avx = 1 << 28, Avx = 1 << 28,
F16c = 1 << 29 F16c = 1 << 29
} }
@ -52,7 +79,11 @@ namespace ARMeilleure.CodeGen.X86
public enum FeatureFlags7Ebx public enum FeatureFlags7Ebx
{ {
Avx2 = 1 << 5, Avx2 = 1 << 5,
Sha = 1 << 29 Avx512f = 1 << 16,
Avx512dq = 1 << 17,
Sha = 1 << 29,
Avx512bw = 1 << 30,
Avx512vl = 1 << 31
} }
[Flags] [Flags]
@ -61,10 +92,21 @@ namespace ARMeilleure.CodeGen.X86
Gfni = 1 << 8, Gfni = 1 << 8,
} }
[Flags]
public enum Xcr0FlagsEax
{
Sse = 1 << 1,
YmmHi128 = 1 << 2,
Opmask = 1 << 5,
ZmmHi256 = 1 << 6,
Hi16Zmm = 1 << 7
}
public static FeatureFlags1Edx FeatureInfo1Edx { get; } public static FeatureFlags1Edx FeatureInfo1Edx { get; }
public static FeatureFlags1Ecx FeatureInfo1Ecx { get; } public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0; public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0; public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0;
public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse); public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2); public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
@ -76,8 +118,13 @@ namespace ARMeilleure.CodeGen.X86
public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42); public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt); public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes); public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx); public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128);
public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx; public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Osxsave)
&& Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm);
public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F;
public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F;
public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F;
public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c); public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha); public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni); public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
@ -85,5 +132,6 @@ namespace ARMeilleure.CodeGen.X86
public static bool ForceLegacySse { get; set; } public static bool ForceLegacySse { get; set; }
public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse; public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse;
} }
} }

View file

@ -180,6 +180,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma)); Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
} }

View file

@ -219,6 +219,7 @@ namespace ARMeilleure.CodeGen.X86
Vfnmsub231sd, Vfnmsub231sd,
Vfnmsub231ss, Vfnmsub231ss,
Vpblendvb, Vpblendvb,
Vpternlogd,
Xor, Xor,
Xorpd, Xorpd,
Xorps, Xorps,

View file

@ -254,7 +254,22 @@ namespace ARMeilleure.Instructions
public static void Not_V(ArmEmitterContext context) public static void Not_V(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAvx512Ortho)
{
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010));
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
else if (Optimizations.UseSse2)
{ {
OpCodeSimd op = (OpCodeSimd)context.CurrOp; OpCodeSimd op = (OpCodeSimd)context.CurrOp;
@ -283,6 +298,22 @@ namespace ARMeilleure.Instructions
{ {
InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV); InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV);
} }
else if (Optimizations.UseAvx512Ortho)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
if (op.RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
context.Copy(GetVec(op.Rd), res);
}
else if (Optimizations.UseSse2) else if (Optimizations.UseSse2)
{ {
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;

View file

@ -151,6 +151,13 @@ namespace ARMeilleure.Instructions
{ {
InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m)); InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m));
} }
else if (Optimizations.UseAvx512Ortho)
{
EmitVectorBinaryOpSimd32(context, (n, m) =>
{
return context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010));
});
}
else if (Optimizations.UseSse2) else if (Optimizations.UseSse2)
{ {
Operand mask = context.VectorOne(); Operand mask = context.VectorOne();

View file

@ -34,7 +34,14 @@ namespace ARMeilleure.Instructions
public static void Vmvn_I(ArmEmitterContext context) public static void Vmvn_I(ArmEmitterContext context)
{ {
if (Optimizations.UseSse2) if (Optimizations.UseAvx512Ortho)
{
EmitVectorUnaryOpSimd32(context, (op1) =>
{
return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101));
});
}
else if (Optimizations.UseSse2)
{ {
EmitVectorUnaryOpSimd32(context, (op1) => EmitVectorUnaryOpSimd32(context, (op1) =>
{ {

View file

@ -173,6 +173,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Vfnmadd231ss, X86Vfnmadd231ss,
X86Vfnmsub231sd, X86Vfnmsub231sd,
X86Vfnmsub231ss, X86Vfnmsub231ss,
X86Vpternlogd,
X86Xorpd, X86Xorpd,
X86Xorps, X86Xorps,

View file

@ -23,6 +23,10 @@ namespace ARMeilleure
public static bool UseSse42IfAvailable { get; set; } = true; public static bool UseSse42IfAvailable { get; set; } = true;
public static bool UsePopCntIfAvailable { get; set; } = true; public static bool UsePopCntIfAvailable { get; set; } = true;
public static bool UseAvxIfAvailable { get; set; } = true; public static bool UseAvxIfAvailable { get; set; } = true;
public static bool UseAvx512FIfAvailable { get; set; } = true;
public static bool UseAvx512VlIfAvailable { get; set; } = true;
public static bool UseAvx512BwIfAvailable { get; set; } = true;
public static bool UseAvx512DqIfAvailable { get; set; } = true;
public static bool UseF16cIfAvailable { get; set; } = true; public static bool UseF16cIfAvailable { get; set; } = true;
public static bool UseFmaIfAvailable { get; set; } = true; public static bool UseFmaIfAvailable { get; set; } = true;
public static bool UseAesniIfAvailable { get; set; } = true; public static bool UseAesniIfAvailable { get; set; } = true;
@ -47,11 +51,18 @@ namespace ARMeilleure
internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42; internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42;
internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt; internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt;
internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse; internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse;
internal static bool UseAvx512F => UseAvx512FIfAvailable && X86HardwareCapabilities.SupportsAvx512F && !ForceLegacySse;
internal static bool UseAvx512Vl => UseAvx512VlIfAvailable && X86HardwareCapabilities.SupportsAvx512Vl && !ForceLegacySse;
internal static bool UseAvx512Bw => UseAvx512BwIfAvailable && X86HardwareCapabilities.SupportsAvx512Bw && !ForceLegacySse;
internal static bool UseAvx512Dq => UseAvx512DqIfAvailable && X86HardwareCapabilities.SupportsAvx512Dq && !ForceLegacySse;
internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c; internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c;
internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma; internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma;
internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni; internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni;
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq; internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq;
internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha; internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha;
internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni; internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni;
internal static bool UseAvx512Ortho => UseAvx512F && UseAvx512Vl;
internal static bool UseAvx512OrthoFloat => UseAvx512Ortho && UseAvx512Dq;
} }
} }

View file

@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 4484; //! To be incremented manually for each change to the ARMeilleure project. private const uint InternalVersion = 4485; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0"; private const string ActualDir = "0";
private const string BackupDir = "1"; private const string BackupDir = "1";
@ -969,6 +969,7 @@ namespace ARMeilleure.Translation.PTC
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap, (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap,
(ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2, (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2,
(ulong)Arm64HardwareCapabilities.MacOsFeatureInfo, (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo,
0,
0); 0);
} }
else if (RuntimeInformation.ProcessArchitecture == Architecture.X64) else if (RuntimeInformation.ProcessArchitecture == Architecture.X64)
@ -977,11 +978,12 @@ namespace ARMeilleure.Translation.PTC
(ulong)X86HardwareCapabilities.FeatureInfo1Ecx, (ulong)X86HardwareCapabilities.FeatureInfo1Ecx,
(ulong)X86HardwareCapabilities.FeatureInfo1Edx, (ulong)X86HardwareCapabilities.FeatureInfo1Edx,
(ulong)X86HardwareCapabilities.FeatureInfo7Ebx, (ulong)X86HardwareCapabilities.FeatureInfo7Ebx,
(ulong)X86HardwareCapabilities.FeatureInfo7Ecx); (ulong)X86HardwareCapabilities.FeatureInfo7Ecx,
(ulong)X86HardwareCapabilities.Xcr0InfoEax);
} }
else else
{ {
return new FeatureInfo(0, 0, 0, 0); return new FeatureInfo(0, 0, 0, 0, 0);
} }
} }
@ -1002,7 +1004,7 @@ namespace ARMeilleure.Translation.PTC
return osPlatform; return osPlatform;
} }
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 78*/)] [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 86*/)]
private struct OuterHeader private struct OuterHeader
{ {
public ulong Magic; public ulong Magic;
@ -1034,8 +1036,8 @@ namespace ARMeilleure.Translation.PTC
} }
} }
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)] [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 40*/)]
private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3); private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3, ulong FeatureInfo4);
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)] [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
private struct InnerHeader private struct InnerHeader