From 72c0978f88339cfe78c96fe42327692b3d74e603 Mon Sep 17 00:00:00 2001 From: Pavel P Date: Thu, 18 Apr 2024 03:32:08 +0200 Subject: [PATCH] Implement CRC32_u64 and V128_Low64 for x86 builds `_mm_cvtsi128_si64` and `_mm_crc32_u64` intrinsics are available only when building for x64. In order not to disable crc32 optimizations for 32-bit builds, equivalent code is implemented using intrinsics that are available when targeting 32-bit builds. --- .../internal/crc32_x86_arm_combined_simd.h | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/absl/crc/internal/crc32_x86_arm_combined_simd.h b/absl/crc/internal/crc32_x86_arm_combined_simd.h index aa6a65954f2..ffbed3ea4d3 100644 --- a/absl/crc/internal/crc32_x86_arm_combined_simd.h +++ b/absl/crc/internal/crc32_x86_arm_combined_simd.h @@ -25,16 +25,14 @@ // We define a translation layer for both x86 and ARM for the ease of use and // most performance gains. -// This implementation requires 64-bit CRC instructions (part of SSE 4.2) and -// PCLMULQDQ instructions. 32-bit builds with SSE 4.2 do exist, so the -// __x86_64__ condition is necessary. -#if defined(__x86_64__) && defined(__SSE4_2__) && defined(__PCLMUL__) +// This implementation requires CRC instructions (part of SSE 4.2) and +// PCLMULQDQ instructions. +#if defined(__SSE4_2__) && defined(__PCLMUL__) #include #define ABSL_CRC_INTERNAL_HAVE_X86_SIMD -#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) && \ - defined(_M_AMD64) +#elif defined(_MSC_VER) && !defined(__clang__) && defined(__AVX__) // MSVC AVX (/arch:AVX) implies SSE 4.2 and PCLMULQDQ. #include @@ -143,7 +141,13 @@ inline uint32_t CRC32_u32(uint32_t crc, uint32_t v) { } inline uint32_t CRC32_u64(uint32_t crc, uint64_t v) { +#if defined(__x86_64__) || defined(_M_X64) return static_cast(_mm_crc32_u64(crc, v)); +#else + uint32_t v_lo = static_cast(v); + uint32_t v_hi = static_cast(v >> 32); + return _mm_crc32_u32(_mm_crc32_u32(crc, v_lo), v_hi); +#endif } inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); } @@ -191,7 +195,15 @@ inline uint64_t V128_Extract64(const V128 l) { return static_cast(_mm_extract_epi64(l, imm)); } -inline int64_t V128_Low64(const V128 l) { return _mm_cvtsi128_si64(l); } +inline int64_t V128_Low64(const V128 l) { +#if defined(__x86_64__) || defined(_M_X64) + return _mm_cvtsi128_si64(l); +#else + uint32_t r_lo = static_cast(_mm_extract_epi32(l, 0)); + uint32_t r_hi = static_cast(_mm_extract_epi32(l, 1)); + return static_cast((static_cast(r_hi) << 32) | r_lo); +#endif +} inline V128 V128_ShiftLeft64(const V128 l, const V128 r) { return _mm_sll_epi64(l, r);