1
0
Fork 0
mirror of https://git.tukaani.org/xz.git synced 2024-04-04 12:36:23 +02:00

liblzma: Set the MSVC optimization fix to only cover lzma_crc64_clmul().

After testing a 32-bit Release build on MSVC, only lzma_crc64_clmul()
has the bug. crc_simd_body() and lzma_crc32_clmul() do not need the
optimizations disabled.
This commit is contained in:
Jia Tan 2023-10-18 19:57:10 +08:00
parent 5ce0f7a48b
commit 1c8884f0af

View file

@ -42,21 +42,6 @@
MASK_H(in, mask, high) MASK_H(in, mask, high)
// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC
// code when optimizations are enabled (release build). According to the bug
// report, the ebx register is corrupted and the calculated result is wrong.
// Trying to workaround the problem with "__asm mov ebx, ebx" didn't help.
// The following pragma works and performance is still good. x86-64 builds
// aren't affected by this problem.
//
// NOTE: Another pragma after lzma_crc64_clmul() restores the optimizations.
// If the #if condition here is updated, the other one must be updated too.
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \
&& defined(_M_IX86)
# pragma optimize("g", off)
#endif
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
__attribute__((__target__("ssse3,sse4.1,pclmul"))) __attribute__((__target__("ssse3,sse4.1,pclmul")))
#endif #endif
@ -314,6 +299,21 @@ calc_hi(uint64_t poly, uint64_t a)
#ifdef HAVE_CHECK_CRC64 #ifdef HAVE_CHECK_CRC64
// MSVC (VS2015 - VS2022) produces bad 32-bit x86 code from the CLMUL CRC
// code when optimizations are enabled (release build). According to the bug
// report, the ebx register is corrupted and the calculated result is wrong.
// Trying to workaround the problem with "__asm mov ebx, ebx" didn't help.
// The following pragma works and performance is still good. x86-64 builds
// and CRC32 CLMUL aren't affected by this problem. The problem does not
// happen in crc_simd_body() either (which is shared with CRC32 CLMUL anyway).
//
// NOTE: Another pragma after lzma_crc64_clmul() restores the optimizations.
// If the #if condition here is updated, the other one must be updated too.
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) && !defined(__clang__) \
&& defined(_M_IX86)
# pragma optimize("g", off)
#endif
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__) #if (defined(__GNUC__) || defined(__clang__)) && !defined(__EDG__)
__attribute__((__target__("ssse3,sse4.1,pclmul"))) __attribute__((__target__("ssse3,sse4.1,pclmul")))
#endif #endif