Implement fast path for AES crypto instructions on Arm64 (#5281)

* Implement fast path for AES crypto instructions on Arm64

* PPTC version bump

* Use AES HW feature check
This commit is contained in:
gdkchan 2023-06-10 21:51:35 -03:00 committed by GitHub
parent eb0bb36bbf
commit 193ca3c9a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 79 additions and 16 deletions

View file

@ -168,8 +168,6 @@ namespace ARMeilleure.CodeGen.Arm64
Logger.StartPass(PassName.CodeGeneration); Logger.StartPass(PassName.CodeGeneration);
//Console.Error.WriteLine(IRDumper.GetDump(cfg));
bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);

View file

@ -179,6 +179,35 @@ namespace ARMeilleure.CodeGen.Arm64
(uint)operation.GetSource(2).AsInt32()); (uint)operation.GetSource(2).AsInt32());
break; break;
case IntrinsicType.Vector128Unary:
GenerateVectorUnary(
context,
1,
0,
info.Inst,
operation.Destination,
operation.GetSource(0));
break;
case IntrinsicType.Vector128Binary:
GenerateVectorBinary(
context,
1,
0,
info.Inst,
operation.Destination,
operation.GetSource(0),
operation.GetSource(1));
break;
case IntrinsicType.Vector128BinaryRd:
GenerateVectorUnary(
context,
1,
0,
info.Inst,
operation.Destination,
operation.GetSource(1));
break;
case IntrinsicType.VectorUnary: case IntrinsicType.VectorUnary:
GenerateVectorUnary( GenerateVectorUnary(
context, context,

View file

@ -19,8 +19,8 @@ namespace ARMeilleure.CodeGen.Arm64
Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary)); Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary)); Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary)); Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary)); Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128BinaryRd));
Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary)); Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128BinaryRd));
Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary)); Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary)); Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise)); Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));

View file

@ -23,6 +23,10 @@ namespace ARMeilleure.CodeGen.Arm64
ScalarTernaryShlRd, ScalarTernaryShlRd,
ScalarTernaryShrRd, ScalarTernaryShrRd,
Vector128Unary,
Vector128Binary,
Vector128BinaryRd,
VectorUnary, VectorUnary,
VectorUnaryBitwise, VectorUnaryBitwise,
VectorUnaryByElem, VectorUnaryByElem,
@ -50,9 +54,6 @@ namespace ARMeilleure.CodeGen.Arm64
VectorTernaryShlRd, VectorTernaryShlRd,
VectorTernaryShrRd, VectorTernaryShrRd,
Vector128Unary,
Vector128Binary,
GetRegister, GetRegister,
SetRegister SetRegister
} }

View file

@ -746,6 +746,7 @@ namespace ARMeilleure.CodeGen.Arm64
info.Type == IntrinsicType.ScalarTernaryFPRdByElem || info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
info.Type == IntrinsicType.ScalarTernaryShlRd || info.Type == IntrinsicType.ScalarTernaryShlRd ||
info.Type == IntrinsicType.ScalarTernaryShrRd || info.Type == IntrinsicType.ScalarTernaryShrRd ||
info.Type == IntrinsicType.Vector128BinaryRd ||
info.Type == IntrinsicType.VectorBinaryRd || info.Type == IntrinsicType.VectorBinaryRd ||
info.Type == IntrinsicType.VectorInsertByElem || info.Type == IntrinsicType.VectorInsertByElem ||
info.Type == IntrinsicType.VectorTernaryRd || info.Type == IntrinsicType.VectorTernaryRd ||

View file

@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
}
else if (Optimizations.UseAesni)
{ {
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
} }
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
}
else if (Optimizations.UseAesni)
{ {
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
} }
@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
}
else if (Optimizations.UseAesni)
{ {
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
} }
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
}
else if (Optimizations.UseAesni)
{ {
Operand roundKey = context.VectorZero(); Operand roundKey = context.VectorZero();

View file

@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
}
else if (Optimizations.UseAesni)
{ {
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
} }
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
}
else if (Optimizations.UseAesni)
{ {
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
} }
@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
}
else if (Optimizations.UseAesni)
{ {
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
} }
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
Operand res; Operand res;
if (Optimizations.UseAesni) if (Optimizations.UseArm64Aes)
{
res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
}
else if (Optimizations.UseAesni)
{ {
Operand roundKey = context.VectorZero(); Operand roundKey = context.VectorZero();

View file

@ -13,6 +13,7 @@ namespace ARMeilleure
public static bool UseUnmanagedDispatchLoop { get; set; } = true; public static bool UseUnmanagedDispatchLoop { get; set; } = true;
public static bool UseAdvSimdIfAvailable { get; set; } = true; public static bool UseAdvSimdIfAvailable { get; set; } = true;
public static bool UseArm64AesIfAvailable { get; set; } = true;
public static bool UseArm64PmullIfAvailable { get; set; } = true; public static bool UseArm64PmullIfAvailable { get; set; } = true;
public static bool UseSseIfAvailable { get; set; } = true; public static bool UseSseIfAvailable { get; set; } = true;
@ -41,6 +42,7 @@ namespace ARMeilleure
} }
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd; internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
internal static bool UseArm64Aes => UseArm64AesIfAvailable && Arm64HardwareCapabilities.SupportsAes;
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull; internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse; internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;

View file

@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string OuterHeaderMagicString = "PTCohd\0\0";
private const string InnerHeaderMagicString = "PTCihd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0";
private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project. private const uint InternalVersion = 5281; //! To be incremented manually for each change to the ARMeilleure project.
private const string ActualDir = "0"; private const string ActualDir = "0";
private const string BackupDir = "1"; private const string BackupDir = "1";