mirror of
https://github.com/ryujinx-mirror/ryujinx.git
synced 2025-01-10 21:41:59 +00:00
ARMeilleure: Hardware accelerate SHA256 (#3585)
* ARMeilleure/HardwareCapabilities: Add Sha * ARMeilleure/Intrinsic: Add X86Sha256Rnds2 * ARmeilleure: Hardware accelerate SHA256H/SHA256H2 * ARMeilleure/Intrinsic: Add X86Sha256Msg1, X86Sha256Msg2 * ARMeilleure/Intrinsic: Add X86Palignr * ARMeilleure: Hardware accelerate SHA256SU0, SHA256SU1 * PTC: Bump InternalVersion
This commit is contained in:
parent
eba682b767
commit
f5235fff29
12 changed files with 136 additions and 37 deletions
|
@ -157,6 +157,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
|
Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
|
@ -239,6 +240,9 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
|
||||||
Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||||
Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
|
Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
|
||||||
|
Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None));
|
||||||
|
Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None));
|
||||||
|
Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None));
|
||||||
Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||||
Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
|
||||||
Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
|
||||||
|
|
|
@ -12,21 +12,28 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
(_, _, int ecx, int edx) = X86Base.CpuId(0x00000001, 0x00000000);
|
(int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000);
|
||||||
|
|
||||||
FeatureInfoEdx = (FeatureFlagsEdx)edx;
|
(_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000);
|
||||||
FeatureInfoEcx = (FeatureFlagsEcx)ecx;
|
FeatureInfo1Edx = (FeatureFlags1Edx)edx1;
|
||||||
|
FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1;
|
||||||
|
|
||||||
|
if (maxNum >= 7)
|
||||||
|
{
|
||||||
|
(_, int ebx7, _, _) = X86Base.CpuId(0x00000007, 0x00000000);
|
||||||
|
FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[Flags]
|
[Flags]
|
||||||
public enum FeatureFlagsEdx
|
public enum FeatureFlags1Edx
|
||||||
{
|
{
|
||||||
Sse = 1 << 25,
|
Sse = 1 << 25,
|
||||||
Sse2 = 1 << 26
|
Sse2 = 1 << 26
|
||||||
}
|
}
|
||||||
|
|
||||||
[Flags]
|
[Flags]
|
||||||
public enum FeatureFlagsEcx
|
public enum FeatureFlags1Ecx
|
||||||
{
|
{
|
||||||
Sse3 = 1 << 0,
|
Sse3 = 1 << 0,
|
||||||
Pclmulqdq = 1 << 1,
|
Pclmulqdq = 1 << 1,
|
||||||
|
@ -40,21 +47,31 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
F16c = 1 << 29
|
F16c = 1 << 29
|
||||||
}
|
}
|
||||||
|
|
||||||
public static FeatureFlagsEdx FeatureInfoEdx { get; }
|
[Flags]
|
||||||
public static FeatureFlagsEcx FeatureInfoEcx { get; }
|
public enum FeatureFlags7Ebx
|
||||||
|
{
|
||||||
|
Avx2 = 1 << 5,
|
||||||
|
Sha = 1 << 29
|
||||||
|
}
|
||||||
|
|
||||||
public static bool SupportsSse => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse);
|
public static FeatureFlags1Edx FeatureInfo1Edx { get; }
|
||||||
public static bool SupportsSse2 => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse2);
|
public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
|
||||||
public static bool SupportsSse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse3);
|
public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
|
||||||
public static bool SupportsPclmulqdq => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Pclmulqdq);
|
|
||||||
public static bool SupportsSsse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Ssse3);
|
public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
|
||||||
public static bool SupportsFma => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Fma);
|
public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
|
||||||
public static bool SupportsSse41 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse41);
|
public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3);
|
||||||
public static bool SupportsSse42 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse42);
|
public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq);
|
||||||
public static bool SupportsPopcnt => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Popcnt);
|
public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3);
|
||||||
public static bool SupportsAesni => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Aes);
|
public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma);
|
||||||
public static bool SupportsAvx => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Avx);
|
public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41);
|
||||||
public static bool SupportsF16c => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.F16c);
|
public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
|
||||||
|
public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
|
||||||
|
public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
|
||||||
|
public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx);
|
||||||
|
public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
|
||||||
|
public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
|
||||||
|
public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
|
||||||
|
|
||||||
public static bool ForceLegacySse { get; set; }
|
public static bool ForceLegacySse { get; set; }
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
|
Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
|
||||||
|
Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm));
|
||||||
Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
|
Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
|
Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
|
||||||
Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
|
Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
|
||||||
|
@ -151,6 +152,9 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
|
Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
|
||||||
Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
|
Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
|
||||||
Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
|
Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
|
||||||
|
Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary));
|
||||||
|
Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary));
|
||||||
|
Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary));
|
||||||
Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
|
Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
|
||||||
Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
|
Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
|
||||||
Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
|
Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
|
||||||
|
|
|
@ -308,11 +308,13 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
|
|
||||||
case Instruction.Extended:
|
case Instruction.Extended:
|
||||||
{
|
{
|
||||||
// BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
|
bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd ||
|
||||||
if ((node.Intrinsic == Intrinsic.X86Blendvpd ||
|
|
||||||
node.Intrinsic == Intrinsic.X86Blendvps ||
|
node.Intrinsic == Intrinsic.X86Blendvps ||
|
||||||
node.Intrinsic == Intrinsic.X86Pblendvb) &&
|
node.Intrinsic == Intrinsic.X86Pblendvb;
|
||||||
!HardwareCapabilities.SupportsVexEncoding)
|
|
||||||
|
// BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
|
||||||
|
// SHA256RNDS2 always has an implied XMM0 as a last operand.
|
||||||
|
if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2)
|
||||||
{
|
{
|
||||||
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
|
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
|
||||||
|
|
||||||
|
|
|
@ -98,6 +98,7 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Paddd,
|
Paddd,
|
||||||
Paddq,
|
Paddq,
|
||||||
Paddw,
|
Paddw,
|
||||||
|
Palignr,
|
||||||
Pand,
|
Pand,
|
||||||
Pandn,
|
Pandn,
|
||||||
Pavgb,
|
Pavgb,
|
||||||
|
@ -180,6 +181,9 @@ namespace ARMeilleure.CodeGen.X86
|
||||||
Rsqrtss,
|
Rsqrtss,
|
||||||
Sar,
|
Sar,
|
||||||
Setcc,
|
Setcc,
|
||||||
|
Sha256Msg1,
|
||||||
|
Sha256Msg2,
|
||||||
|
Sha256Rnds2,
|
||||||
Shl,
|
Shl,
|
||||||
Shr,
|
Shr,
|
||||||
Shufpd,
|
Shufpd,
|
||||||
|
|
|
@ -100,7 +100,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand n = GetVec(op.Rn);
|
Operand n = GetVec(op.Rn);
|
||||||
Operand m = GetVec(op.Rm);
|
Operand m = GetVec(op.Rm);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)), d, n, m);
|
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
@ -113,7 +113,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand n = GetVec(op.Rn);
|
Operand n = GetVec(op.Rn);
|
||||||
Operand m = GetVec(op.Rm);
|
Operand m = GetVec(op.Rm);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)), d, n, m);
|
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
@ -125,7 +125,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand d = GetVec(op.Rd);
|
Operand d = GetVec(op.Rd);
|
||||||
Operand n = GetVec(op.Rn);
|
Operand n = GetVec(op.Rn);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), d, n);
|
Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, n);
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
@ -138,7 +138,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand n = GetVec(op.Rn);
|
Operand n = GetVec(op.Rn);
|
||||||
Operand m = GetVec(op.Rm);
|
Operand m = GetVec(op.Rm);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), d, n, m);
|
Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
|
||||||
|
|
||||||
context.Copy(GetVec(op.Rd), res);
|
context.Copy(GetVec(op.Rd), res);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand n = GetVecA32(op.Qn);
|
Operand n = GetVecA32(op.Qn);
|
||||||
Operand m = GetVecA32(op.Qm);
|
Operand m = GetVecA32(op.Qm);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)), d, n, m);
|
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
|
||||||
|
|
||||||
context.Copy(GetVecA32(op.Qd), res);
|
context.Copy(GetVecA32(op.Qd), res);
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand n = GetVecA32(op.Qn);
|
Operand n = GetVecA32(op.Qn);
|
||||||
Operand m = GetVecA32(op.Qm);
|
Operand m = GetVecA32(op.Qm);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)), d, n, m);
|
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
|
||||||
|
|
||||||
context.Copy(GetVecA32(op.Qd), res);
|
context.Copy(GetVecA32(op.Qd), res);
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand d = GetVecA32(op.Qd);
|
Operand d = GetVecA32(op.Qd);
|
||||||
Operand m = GetVecA32(op.Qm);
|
Operand m = GetVecA32(op.Qm);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), d, m);
|
Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m);
|
||||||
|
|
||||||
context.Copy(GetVecA32(op.Qd), res);
|
context.Copy(GetVecA32(op.Qd), res);
|
||||||
}
|
}
|
||||||
|
@ -55,7 +55,7 @@ namespace ARMeilleure.Instructions
|
||||||
Operand n = GetVecA32(op.Qn);
|
Operand n = GetVecA32(op.Qn);
|
||||||
Operand m = GetVecA32(op.Qm);
|
Operand m = GetVecA32(op.Qm);
|
||||||
|
|
||||||
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), d, n, m);
|
Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
|
||||||
|
|
||||||
context.Copy(GetVecA32(op.Qd), res);
|
context.Copy(GetVecA32(op.Qd), res);
|
||||||
}
|
}
|
||||||
|
|
56
ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
Normal file
56
ARMeilleure/Instructions/InstEmitSimdHashHelper.cs
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
using ARMeilleure.IntermediateRepresentation;
|
||||||
|
using ARMeilleure.Translation;
|
||||||
|
using System;
|
||||||
|
|
||||||
|
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
|
||||||
|
|
||||||
|
namespace ARMeilleure.Instructions
|
||||||
|
{
|
||||||
|
static class InstEmitSimdHashHelper
|
||||||
|
{
|
||||||
|
public static Operand EmitSha256h(ArmEmitterContext context, Operand x, Operand y, Operand w, bool part2)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseSha)
|
||||||
|
{
|
||||||
|
Operand src1 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0xbb));
|
||||||
|
Operand src2 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0x11));
|
||||||
|
Operand w2 = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, w, w);
|
||||||
|
|
||||||
|
Operand round2 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src1, src2, w);
|
||||||
|
Operand round4 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src2, round2, w2);
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, round4, round2, Const(part2 ? 0x11 : 0xbb));
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
String method = part2 ? nameof(SoftFallback.HashUpper) : nameof(SoftFallback.HashLower);
|
||||||
|
return context.Call(typeof(SoftFallback).GetMethod(method), x, y, w);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Operand EmitSha256su0(ArmEmitterContext context, Operand x, Operand y)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseSha)
|
||||||
|
{
|
||||||
|
return context.AddIntrinsic(Intrinsic.X86Sha256Msg1, x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z)
|
||||||
|
{
|
||||||
|
if (Optimizations.UseSha && Optimizations.UseSsse3)
|
||||||
|
{
|
||||||
|
Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4));
|
||||||
|
Operand tmp = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x);
|
||||||
|
|
||||||
|
Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1129,7 +1129,7 @@ namespace ARMeilleure.Instructions
|
||||||
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
|
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk)
|
public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
|
||||||
{
|
{
|
||||||
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
|
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -71,6 +71,7 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||||
X86Paddd,
|
X86Paddd,
|
||||||
X86Paddq,
|
X86Paddq,
|
||||||
X86Paddw,
|
X86Paddw,
|
||||||
|
X86Palignr,
|
||||||
X86Pand,
|
X86Pand,
|
||||||
X86Pandn,
|
X86Pandn,
|
||||||
X86Pavgb,
|
X86Pavgb,
|
||||||
|
@ -140,6 +141,9 @@ namespace ARMeilleure.IntermediateRepresentation
|
||||||
X86Roundss,
|
X86Roundss,
|
||||||
X86Rsqrtps,
|
X86Rsqrtps,
|
||||||
X86Rsqrtss,
|
X86Rsqrtss,
|
||||||
|
X86Sha256Msg1,
|
||||||
|
X86Sha256Msg2,
|
||||||
|
X86Sha256Rnds2,
|
||||||
X86Shufpd,
|
X86Shufpd,
|
||||||
X86Shufps,
|
X86Shufps,
|
||||||
X86Sqrtpd,
|
X86Sqrtpd,
|
||||||
|
|
|
@ -21,6 +21,7 @@ namespace ARMeilleure
|
||||||
public static bool UseFmaIfAvailable { get; set; } = true;
|
public static bool UseFmaIfAvailable { get; set; } = true;
|
||||||
public static bool UseAesniIfAvailable { get; set; } = true;
|
public static bool UseAesniIfAvailable { get; set; } = true;
|
||||||
public static bool UsePclmulqdqIfAvailable { get; set; } = true;
|
public static bool UsePclmulqdqIfAvailable { get; set; } = true;
|
||||||
|
public static bool UseShaIfAvailable { get; set; } = true;
|
||||||
|
|
||||||
public static bool ForceLegacySse
|
public static bool ForceLegacySse
|
||||||
{
|
{
|
||||||
|
@ -40,5 +41,6 @@ namespace ARMeilleure
|
||||||
internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
|
internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
|
||||||
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
|
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
|
||||||
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
|
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
|
||||||
|
internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||||
|
|
||||||
private const uint InternalVersion = 3439; //! To be incremented manually for each change to the ARMeilleure project.
|
private const uint InternalVersion = 3585; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
@ -946,9 +946,12 @@ namespace ARMeilleure.Translation.PTC
|
||||||
return BitConverter.IsLittleEndian;
|
return BitConverter.IsLittleEndian;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static ulong GetFeatureInfo()
|
private static FeatureInfo GetFeatureInfo()
|
||||||
{
|
{
|
||||||
return (ulong)HardwareCapabilities.FeatureInfoEdx << 32 | (uint)HardwareCapabilities.FeatureInfoEcx;
|
return new FeatureInfo(
|
||||||
|
(uint)HardwareCapabilities.FeatureInfo1Ecx,
|
||||||
|
(uint)HardwareCapabilities.FeatureInfo1Edx,
|
||||||
|
(uint)HardwareCapabilities.FeatureInfo7Ebx);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static byte GetMemoryManagerMode()
|
private static byte GetMemoryManagerMode()
|
||||||
|
@ -968,7 +971,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
return osPlatform;
|
return osPlatform;
|
||||||
}
|
}
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 50*/)]
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 54*/)]
|
||||||
private struct OuterHeader
|
private struct OuterHeader
|
||||||
{
|
{
|
||||||
public ulong Magic;
|
public ulong Magic;
|
||||||
|
@ -976,7 +979,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
public uint CacheFileVersion;
|
public uint CacheFileVersion;
|
||||||
|
|
||||||
public bool Endianness;
|
public bool Endianness;
|
||||||
public ulong FeatureInfo;
|
public FeatureInfo FeatureInfo;
|
||||||
public byte MemoryManagerMode;
|
public byte MemoryManagerMode;
|
||||||
public uint OSPlatform;
|
public uint OSPlatform;
|
||||||
|
|
||||||
|
@ -999,6 +1002,9 @@ namespace ARMeilleure.Translation.PTC
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 12*/)]
|
||||||
|
private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2);
|
||||||
|
|
||||||
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
|
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
|
||||||
private struct InnerHeader
|
private struct InnerHeader
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue