Skip to content

Commit 9615a45

Browse files
committed
Workaround GCC lacking __rbit intrinsic
1 parent 6455817 commit 9615a45

File tree

1 file changed

+37
-11
lines changed

1 file changed

+37
-11
lines changed

src/crc_arm_pmull.cc

+37-11
Original file line numberDiff line numberDiff line change
@@ -17,31 +17,57 @@
1717

1818
// ARM's intrinsics guide seems to suggest that vmull_p64 is available on A32, but neither Clang/GCC seem to support it on AArch32
1919
#if (defined(__ARM_FEATURE_CRYPTO) && defined(__ARM_FEATURE_CRC32) && defined(__aarch64__)) || (defined(_M_ARM64) && !defined(__clang__))
20+
2021
#include <arm_neon.h>
2122
#if defined(_MSC_VER) && !defined(__clang__)
22-
#include <intrin.h>
23+
# include <intrin.h>
2324

24-
#ifdef _M_ARM64
25+
# ifdef _M_ARM64
2526
// MSVC may detect this pattern: https://devblogs.microsoft.com/cppblog/a-tour-of-4-msvc-backend-improvements/#byteswap-identification
26-
unsigned __int64 rbit64(unsigned __int64 x) {
27+
static HEDLEY_ALWAYS_INLINE uint64_t rbit64(uint64_t x) {
2728
x = _byteswap_uint64(x);
2829
x = (x & 0xaaaaaaaaaaaaaaaa) >> 1 | (x & 0x5555555555555555) << 1;
2930
x = (x & 0xcccccccccccccccc) >> 2 | (x & 0x3333333333333333) << 2;
3031
x = (x & 0xf0f0f0f0f0f0f0f0) >> 4 | (x & 0x0f0f0f0f0f0f0f0f) << 4;
3132
return x;
3233
}
3334
// ...whilst this seems to work best for 32-bit RBIT
34-
unsigned __int32 rbit32(unsigned __int32 x) {
35-
unsigned __int64 r = rbit64(x);
35+
static HEDLEY_ALWAYS_INLINE uint32_t rbit32(uint32_t x) {
36+
uint64_t r = rbit64(x);
3637
return r >> 32;
3738
}
39+
# else
40+
# define rbit32 _arm_rbit
41+
# endif
3842
#else
39-
#define rbit32 _arm_rbit
40-
#endif
41-
#else
42-
#include <arm_acle.h>
43-
#define rbit32 __rbit
44-
#define rbit64 __rbitll
43+
# include <arm_acle.h>
44+
// __rbit not present before GCC 11.4.0 or 12.2.0; for ARM32, requires GCC 14
45+
# if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(14,0,0) && (!defined(__aarch64__) || !HEDLEY_GCC_VERSION_CHECK(11,3,0) || (HEDLEY_GCC_VERSION_CHECK(12,0,0) && !HEDLEY_GCC_VERSION_CHECK(12,2,0)))
46+
# ifdef __aarch64__
47+
static HEDLEY_ALWAYS_INLINE uint64_t rbit64(uint64_t x) {
48+
uint64_t r;
49+
__asm__ ("rbit %0,%1\n"
50+
: "=r"(r) : "r"(x)
51+
: /* No clobbers */);
52+
return r;
53+
}
54+
# endif
55+
static HEDLEY_ALWAYS_INLINE uint32_t rbit32(uint32_t x) {
56+
uint32_t r;
57+
__asm__ (
58+
# ifdef __aarch64__
59+
"rbit %w0,%w1\n"
60+
# else
61+
"rbit %0,%1\n"
62+
# endif
63+
: "=r"(r) : "r"(x)
64+
: /* No clobbers */);
65+
return r;
66+
}
67+
# else
68+
# define rbit32 __rbit
69+
# define rbit64 __rbitll
70+
# endif
4571
#endif
4672

4773

0 commit comments

Comments
 (0)