ARMeilleure: Hardware accelerate SHA256 (#3585)

* ARMeilleure/HardwareCapabilities: Add Sha

* ARMeilleure/Intrinsic: Add X86Sha256Rnds2

* ARmeilleure: Hardware accelerate SHA256H/SHA256H2

* ARMeilleure/Intrinsic: Add X86Sha256Msg1, X86Sha256Msg2

* ARMeilleure/Intrinsic: Add X86Palignr

* ARMeilleure: Hardware accelerate SHA256SU0, SHA256SU1

* PTC: Bump InternalVersion
This commit is contained in:
merry 2022-08-25 11:12:13 +01:00 committed by GitHub
parent eba682b767
commit f5235fff29
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 136 additions and 37 deletions

View file

@ -157,6 +157,7 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
@ -239,6 +240,9 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None));
Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None));
Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None));
Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));

View file

@ -12,21 +12,28 @@ namespace ARMeilleure.CodeGen.X86
return;
}
(_, _, int ecx, int edx) = X86Base.CpuId(0x00000001, 0x00000000);
(int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000);
FeatureInfoEdx = (FeatureFlagsEdx)edx;
FeatureInfoEcx = (FeatureFlagsEcx)ecx;
(_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000);
FeatureInfo1Edx = (FeatureFlags1Edx)edx1;
FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1;
if (maxNum >= 7)
{
(_, int ebx7, _, _) = X86Base.CpuId(0x00000007, 0x00000000);
FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
}
}
[Flags]
public enum FeatureFlagsEdx
public enum FeatureFlags1Edx
{
Sse = 1 << 25,
Sse2 = 1 << 26
}
[Flags]
public enum FeatureFlagsEcx
public enum FeatureFlags1Ecx
{
Sse3 = 1 << 0,
Pclmulqdq = 1 << 1,
@ -40,21 +47,31 @@ namespace ARMeilleure.CodeGen.X86
F16c = 1 << 29
}
public static FeatureFlagsEdx FeatureInfoEdx { get; }
public static FeatureFlagsEcx FeatureInfoEcx { get; }
[Flags]
public enum FeatureFlags7Ebx
{
Avx2 = 1 << 5,
Sha = 1 << 29
}
public static bool SupportsSse => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse);
public static bool SupportsSse2 => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse2);
public static bool SupportsSse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse3);
public static bool SupportsPclmulqdq => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Pclmulqdq);
public static bool SupportsSsse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Ssse3);
public static bool SupportsFma => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Fma);
public static bool SupportsSse41 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse41);
public static bool SupportsSse42 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse42);
public static bool SupportsPopcnt => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Popcnt);
public static bool SupportsAesni => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Aes);
public static bool SupportsAvx => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Avx);
public static bool SupportsF16c => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.F16c);
public static FeatureFlags1Edx FeatureInfo1Edx { get; }
public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3);
public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq);
public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3);
public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma);
public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41);
public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx);
public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
public static bool ForceLegacySse { get; set; }

View file

@ -82,6 +82,7 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
@ -151,6 +152,9 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary));
Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary));
Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary));
Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));

View file

@ -308,11 +308,13 @@ namespace ARMeilleure.CodeGen.X86
case Instruction.Extended:
{
bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd ||
node.Intrinsic == Intrinsic.X86Blendvps ||
node.Intrinsic == Intrinsic.X86Pblendvb;
// BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
if ((node.Intrinsic == Intrinsic.X86Blendvpd ||
node.Intrinsic == Intrinsic.X86Blendvps ||
node.Intrinsic == Intrinsic.X86Pblendvb) &&
!HardwareCapabilities.SupportsVexEncoding)
// SHA256RNDS2 always has an implied XMM0 as a last operand.
if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2)
{
Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);

View file

@ -98,6 +98,7 @@ namespace ARMeilleure.CodeGen.X86
Paddd,
Paddq,
Paddw,
Palignr,
Pand,
Pandn,
Pavgb,
@ -180,6 +181,9 @@ namespace ARMeilleure.CodeGen.X86
Rsqrtss,
Sar,
Setcc,
Sha256Msg1,
Sha256Msg2,
Sha256Rnds2,
Shl,
Shr,
Shufpd,

View file

@ -100,7 +100,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)), d, n, m);
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
context.Copy(GetVec(op.Rd), res);
}
@ -113,7 +113,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)), d, n, m);
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
context.Copy(GetVec(op.Rd), res);
}
@ -125,7 +125,7 @@ namespace ARMeilleure.Instructions
Operand d = GetVec(op.Rd);
Operand n = GetVec(op.Rn);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), d, n);
Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, n);
context.Copy(GetVec(op.Rd), res);
}
@ -138,7 +138,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVec(op.Rn);
Operand m = GetVec(op.Rm);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), d, n, m);
Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
context.Copy(GetVec(op.Rd), res);
}

View file

@ -17,7 +17,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower)), d, n, m);
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false);
context.Copy(GetVecA32(op.Qd), res);
}
@ -30,7 +30,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper)), d, n, m);
Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true);
context.Copy(GetVecA32(op.Qd), res);
}
@ -42,7 +42,7 @@ namespace ARMeilleure.Instructions
Operand d = GetVecA32(op.Qd);
Operand m = GetVecA32(op.Qm);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), d, m);
Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m);
context.Copy(GetVecA32(op.Qd), res);
}
@ -55,7 +55,7 @@ namespace ARMeilleure.Instructions
Operand n = GetVecA32(op.Qn);
Operand m = GetVecA32(op.Qm);
Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), d, n, m);
Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m);
context.Copy(GetVecA32(op.Qd), res);
}

View file

@ -0,0 +1,56 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System;
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
namespace ARMeilleure.Instructions
{
static class InstEmitSimdHashHelper
{
public static Operand EmitSha256h(ArmEmitterContext context, Operand x, Operand y, Operand w, bool part2)
{
if (Optimizations.UseSha)
{
Operand src1 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0xbb));
Operand src2 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0x11));
Operand w2 = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, w, w);
Operand round2 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src1, src2, w);
Operand round4 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src2, round2, w2);
Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, round4, round2, Const(part2 ? 0x11 : 0xbb));
return res;
}
String method = part2 ? nameof(SoftFallback.HashUpper) : nameof(SoftFallback.HashLower);
return context.Call(typeof(SoftFallback).GetMethod(method), x, y, w);
}
public static Operand EmitSha256su0(ArmEmitterContext context, Operand x, Operand y)
{
if (Optimizations.UseSha)
{
return context.AddIntrinsic(Intrinsic.X86Sha256Msg1, x, y);
}
return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), x, y);
}
public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z)
{
if (Optimizations.UseSha && Optimizations.UseSsse3)
{
Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4));
Operand tmp = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x);
Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z);
return res;
}
return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z);
}
}
}

View file

@ -1129,7 +1129,7 @@ namespace ARMeilleure.Instructions
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true);
}
public static V128 HashUpper(V128 hash_efgh, V128 hash_abcd, V128 wk)
public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk)
{
return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false);
}

View file

@ -71,6 +71,7 @@ namespace ARMeilleure.IntermediateRepresentation
X86Paddd,
X86Paddq,
X86Paddw,
X86Palignr,
X86Pand,
X86Pandn,
X86Pavgb,
@ -140,6 +141,9 @@ namespace ARMeilleure.IntermediateRepresentation
X86Roundss,
X86Rsqrtps,
X86Rsqrtss,
X86Sha256Msg1,
X86Sha256Msg2,
X86Sha256Rnds2,
X86Shufpd,
X86Shufps,
X86Sqrtpd,

View file

@ -21,6 +21,7 @@ namespace ARMeilleure
public static bool UseFmaIfAvailable { get; set; } = true;
public static bool UseAesniIfAvailable { get; set; } = true;
public static bool UsePclmulqdqIfAvailable { get; set; } = true;
public static bool UseShaIfAvailable { get; set; } = true;
public static bool ForceLegacySse
{
@ -40,5 +41,6 @@ namespace ARMeilleure
internal static bool UseFma => UseFmaIfAvailable && HardwareCapabilities.SupportsFma;
internal static bool UseAesni => UseAesniIfAvailable && HardwareCapabilities.SupportsAesni;
internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && HardwareCapabilities.SupportsPclmulqdq;
internal static bool UseSha => UseShaIfAvailable && HardwareCapabilities.SupportsSha;
}
}

View file

@ -27,7 +27,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 3439; //! To be incremented manually for each change to the ARMeilleure project.
private const uint InternalVersion = 3585; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0";
private const string BackupDir = "1";
@ -946,9 +946,12 @@ namespace ARMeilleure.Translation.PTC
return BitConverter.IsLittleEndian;
}
private static ulong GetFeatureInfo()
private static FeatureInfo GetFeatureInfo()
{
return (ulong)HardwareCapabilities.FeatureInfoEdx << 32 | (uint)HardwareCapabilities.FeatureInfoEcx;
return new FeatureInfo(
(uint)HardwareCapabilities.FeatureInfo1Ecx,
(uint)HardwareCapabilities.FeatureInfo1Edx,
(uint)HardwareCapabilities.FeatureInfo7Ebx);
}
private static byte GetMemoryManagerMode()
@ -968,7 +971,7 @@ namespace ARMeilleure.Translation.PTC
return osPlatform;
}
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 50*/)]
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 54*/)]
private struct OuterHeader
{
public ulong Magic;
@ -976,7 +979,7 @@ namespace ARMeilleure.Translation.PTC
public uint CacheFileVersion;
public bool Endianness;
public ulong FeatureInfo;
public FeatureInfo FeatureInfo;
public byte MemoryManagerMode;
public uint OSPlatform;
@ -999,6 +1002,9 @@ namespace ARMeilleure.Translation.PTC
}
}
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 12*/)]
private record struct FeatureInfo(uint FeatureInfo0, uint FeatureInfo1, uint FeatureInfo2);
[StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)]
private struct InnerHeader
{