1
0
Fork 0
mirror of https://git.tukaani.org/xz.git synced 2024-04-04 12:36:23 +02:00

liblzma: Omit CRC tables when not needed with ARM64 optimizations.

This is similar to the existing x86-64 CLMUL conditions to omit the
tables. They were slightly refactored to improve readability.
This commit is contained in:
Jia Tan 2024-01-22 21:36:09 +08:00
parent 761f5b69a4
commit 1940f0ec28
3 changed files with 25 additions and 5 deletions

View file

@ -13,11 +13,23 @@
#include "common.h" #include "common.h"
// FIXME: Compared to crc32_fast.c this has to check for __x86_64__ too // FIXME: Compared to crc_common.h this has to check for __x86_64__ too
// so that in 32-bit builds crc32_x86.S won't break due to a missing table. // so that in 32-bit builds crc32_x86.S won't break due to a missing table.
#if !defined(HAVE_ENCODERS) && ((defined(__x86_64__) && defined(__SSSE3__) \ #if (defined(__x86_64__) && defined(__SSSE3__) \
&& defined(__SSE4_1__) && defined(__PCLMUL__)) \ && defined(__SSE4_1__) && defined(__PCLMUL__)) \
|| (defined(__e2k__) && __iset__ >= 6)) || (defined(__e2k__) && __iset__ >= 6)
# define X86_CLMUL_NO_TABLE 1
#endif
#if defined(HAVE_ARM64_CRC32) \
&& !defined(WORDS_BIGENDIAN) \
&& defined(__ARM_FEATURE_CRC32)
# define ARM64_CRC32_NO_TABLE 1
#endif
#if !defined(HAVE_ENCODERS) && (defined(X86_CLMUL_NO_TABLE) \
|| defined(ARM64_CRC32_NO_TABLE_))
// No table needed. Use a typedef to avoid an empty translation unit. // No table needed. Use a typedef to avoid an empty translation unit.
typedef void lzma_crc32_dummy; typedef void lzma_crc32_dummy;

View file

@ -13,11 +13,16 @@
#include "common.h" #include "common.h"
// FIXME: Compared to crc64_fast.c this has to check for __x86_64__ too // FIXME: Compared to crc_common.h this has to check for __x86_64__ too
// so that in 32-bit builds crc64_x86.S won't break due to a missing table. // so that in 32-bit builds crc64_x86.S won't break due to a missing table.
#if (defined(__x86_64__) && defined(__SSSE3__) \ #if (defined(__x86_64__) && defined(__SSSE3__) \
&& defined(__SSE4_1__) && defined(__PCLMUL__)) \ && defined(__SSE4_1__) && defined(__PCLMUL__)) \
|| (defined(__e2k__) && __iset__ >= 6) || (defined(__e2k__) && __iset__ >= 6)
# define X86_CLMUL_NO_TABLE 1
#endif
#ifdef X86_CLMUL_NO_TABLE
// No table needed. Use a typedef to avoid an empty translation unit. // No table needed. Use a typedef to avoid an empty translation unit.
typedef void lzma_crc64_dummy; typedef void lzma_crc64_dummy;

View file

@ -75,6 +75,9 @@
// ARM64 CRC32 instruction is only useful for CRC32. Currently, only // ARM64 CRC32 instruction is only useful for CRC32. Currently, only
// little endian is supported since we were unable to test on a big // little endian is supported since we were unable to test on a big
// endian machine. // endian machine.
//
// NOTE: Keep this and the next check in sync with the macro
// ARM64_CRC32_NO_TABLE in crc32_table.c
#if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN) #if defined(HAVE_ARM64_CRC32) && !defined(WORDS_BIGENDIAN)
// Allow ARM64 CRC32 instruction without a runtime check if // Allow ARM64 CRC32 instruction without a runtime check if
// __ARM_FEATURE_CRC32 is defined. GCC and Clang only define this if the // __ARM_FEATURE_CRC32 is defined. GCC and Clang only define this if the
@ -94,7 +97,7 @@
// generic version can be omitted. Note that this doesn't work with MSVC // generic version can be omitted. Note that this doesn't work with MSVC
// as I don't know how to detect the features here. // as I don't know how to detect the features here.
// //
// NOTE: Keep this this in sync with crc32_table.c. // NOTE: Keep this in sync with the CLMUL_NO_TABLE macro in crc32_table.c.
# if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \ # if (defined(__SSSE3__) && defined(__SSE4_1__) && defined(__PCLMUL__)) \
|| (defined(__e2k__) && __iset__ >= 6) || (defined(__e2k__) && __iset__ >= 6)
# define CRC32_ARCH_OPTIMIZED 1 # define CRC32_ARCH_OPTIMIZED 1