From 193ca3c9a28b63613407c2923f5903e1dd33fdce Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sat, 10 Jun 2023 21:51:35 -0300 Subject: [PATCH] Implement fast path for AES crypto instructions on Arm64 (#5281) * Implement fast path for AES crypto instructions on Arm64 * PPTC version bump * Use AES HW feature check --- .../CodeGen/Arm64/CodeGenerator.cs | 2 -- .../CodeGen/Arm64/CodeGeneratorIntrinsic.cs | 29 +++++++++++++++++++ .../CodeGen/Arm64/IntrinsicTable.cs | 4 +-- .../CodeGen/Arm64/IntrinsicType.cs | 7 +++-- src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs | 1 + .../Instructions/InstEmitSimdCrypto.cs | 24 ++++++++++++--- .../Instructions/InstEmitSimdCrypto32.cs | 24 ++++++++++++--- src/ARMeilleure/Optimizations.cs | 2 ++ src/ARMeilleure/Translation/PTC/Ptc.cs | 2 +- 9 files changed, 79 insertions(+), 16 deletions(-) diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs index fc4fa976e..927198505 100644 --- a/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs @@ -168,8 +168,6 @@ namespace ARMeilleure.CodeGen.Arm64 Logger.StartPass(PassName.CodeGeneration); - //Console.Error.WriteLine(IRDumper.GetDump(cfg)); - bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs index aaa00bb65..1309404a8 100644 --- a/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs @@ -179,6 +179,35 @@ namespace ARMeilleure.CodeGen.Arm64 (uint)operation.GetSource(2).AsInt32()); break; + case IntrinsicType.Vector128Unary: + GenerateVectorUnary( + context, + 1, + 0, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.Vector128Binary: + GenerateVectorBinary( + context, + 1, + 0, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.Vector128BinaryRd: + GenerateVectorUnary( + context, + 1, + 0, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.VectorUnary: GenerateVectorUnary( context, diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs index a309d56d9..c2bd0bd51 100644 --- a/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs @@ -19,8 +19,8 @@ namespace ARMeilleure.CodeGen.Arm64 Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary)); Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary)); Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary)); - Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary)); - Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128BinaryRd)); + Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128BinaryRd)); Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary)); Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary)); Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise)); diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs index 800eca93c..df61ea1eb 100644 --- a/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs @@ -23,6 +23,10 @@ namespace ARMeilleure.CodeGen.Arm64 ScalarTernaryShlRd, ScalarTernaryShrRd, + Vector128Unary, + Vector128Binary, + Vector128BinaryRd, + VectorUnary, VectorUnaryBitwise, VectorUnaryByElem, @@ -50,9 +54,6 @@ namespace ARMeilleure.CodeGen.Arm64 VectorTernaryShlRd, VectorTernaryShrRd, - Vector128Unary, - Vector128Binary, - GetRegister, SetRegister } diff --git a/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs index 6ea9d2397..74f80e0fb 100644 --- a/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs +++ b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs @@ -746,6 +746,7 @@ namespace ARMeilleure.CodeGen.Arm64 info.Type == IntrinsicType.ScalarTernaryFPRdByElem || info.Type == IntrinsicType.ScalarTernaryShlRd || info.Type == IntrinsicType.ScalarTernaryShrRd || + info.Type == IntrinsicType.Vector128BinaryRd || info.Type == IntrinsicType.VectorBinaryRd || info.Type == IntrinsicType.VectorInsertByElem || info.Type == IntrinsicType.VectorTernaryRd || diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs index db24e0290..6226e35ae 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs @@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n); + } + else if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); } @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n); + } + else if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); } @@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n); + } + else if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); } @@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n); + } + else if (Optimizations.UseAesni) { Operand roundKey = context.VectorZero(); diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs index f713a388c..24fd7c87e 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs @@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n); + } + else if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); } @@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n); + } + else if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); } @@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n); + } + else if (Optimizations.UseAesni) { res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); } @@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions Operand res; - if (Optimizations.UseAesni) + if (Optimizations.UseArm64Aes) + { + res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n); + } + else if (Optimizations.UseAesni) { Operand roundKey = context.VectorZero(); diff --git a/src/ARMeilleure/Optimizations.cs b/src/ARMeilleure/Optimizations.cs index a84a4dc4f..13348cec2 100644 --- a/src/ARMeilleure/Optimizations.cs +++ b/src/ARMeilleure/Optimizations.cs @@ -13,6 +13,7 @@ namespace ARMeilleure public static bool UseUnmanagedDispatchLoop { get; set; } = true; public static bool UseAdvSimdIfAvailable { get; set; } = true; + public static bool UseArm64AesIfAvailable { get; set; } = true; public static bool UseArm64PmullIfAvailable { get; set; } = true; public static bool UseSseIfAvailable { get; set; } = true; @@ -41,6 +42,7 @@ namespace ARMeilleure } internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd; + internal static bool UseArm64Aes => UseArm64AesIfAvailable && Arm64HardwareCapabilities.SupportsAes; internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull; internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse; diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs index ea4e715b5..366dea6bf 100644 --- a/src/ARMeilleure/Translation/PTC/Ptc.cs +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 5281; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1";