From d89adac2422ea7c0ab5122af9cfc4debf5a371b2 Mon Sep 17 00:00:00 2001 From: Alfonso Gregory <83477269+gAlfonso-bit@users.noreply.github.com> Date: Mon, 19 Jul 2021 17:09:58 -0400 Subject: [PATCH 1/4] Update cxa_thread_atexit.cpp --- libcxxabi/src/cxa_thread_atexit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxxabi/src/cxa_thread_atexit.cpp b/libcxxabi/src/cxa_thread_atexit.cpp index a940eaf2f9cc3..4b9887028f33b 100644 --- a/libcxxabi/src/cxa_thread_atexit.cpp +++ b/libcxxabi/src/cxa_thread_atexit.cpp @@ -15,7 +15,7 @@ #endif #endif -#include +#include namespace __cxxabiv1 { From 11fc84d63e22b1cd385f63534699470b7623cda7 Mon Sep 17 00:00:00 2001 From: Alfonso Gregory <83477269+gAlfonso-bit@users.noreply.github.com> Date: Mon, 19 Jul 2021 17:11:58 -0400 Subject: [PATCH 2/4] Make more MathExtras constexpr Additionally, use more intrinsics that the compilers support! --- llvm/include/llvm/Support/MathExtras.h | 100 +++++++++++++++---------- 1 file changed, 61 insertions(+), 39 deletions(-) diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 753b1998c40c0..949304a353919 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -27,15 +27,7 @@ #endif #ifdef _MSC_VER -// Declare these intrinsics manually rather including intrin.h. It's very -// expensive, and MathExtras.h is popular. -// #include -extern "C" { -unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); -unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); -unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); -unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); -} +#include #endif namespace llvm { @@ -88,12 +80,12 @@ constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A namespace detail { template struct TrailingZerosCounter { - static unsigned count(T Val, ZeroBehavior) { + constexpr static unsigned count(T Val, ZeroBehavior) { if (!Val) - return std::numeric_limits::digits; + return std::numeric_limits::digits; if (Val & 0x1) - return 0; - + return 0; + // Bisection method. unsigned ZeroBits = 0; T Shift = std::numeric_limits::digits >> 1; @@ -112,32 +104,46 @@ template struct TrailingZerosCounter { #if defined(__GNUC__) || defined(_MSC_VER) template struct TrailingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { - if (ZB != ZB_Undefined && Val == 0) - return 32; + constexpr static unsigned count(T Val, ZeroBehavior ZB) { #if __has_builtin(__builtin_ctz) || defined(__GNUC__) return __builtin_ctz(Val); #elif defined(_MSC_VER) - unsigned long Index; + #if defined(_M_AMD64) + return __tzcnt(Val); + #elif defined(_M_ARM) || defined(_M_ARM64) + return _CountTrailingZeros(Val); + #else + constexpr unsigned long Index = 0; _BitScanForward(&Index, Val); return Index; + #endif +#else + #error unsupported architecture #endif } }; #if !defined(_MSC_VER) || defined(_M_X64) template struct TrailingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { + constexpr static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 64; #if __has_builtin(__builtin_ctzll) || defined(__GNUC__) return __builtin_ctzll(Val); #elif defined(_MSC_VER) - unsigned long Index; + #if defined(_M_AMD64) + return __tzcnt(Val); + #elif defined(_M_ARM) || defined(_M_ARM64) + return _CountTrailingZeros64(Val); + #else + constexpr unsigned long Index = 0; _BitScanForward64(&Index, Val); return Index; + #endif +#else + #error unsupported architecture #endif } }; @@ -153,7 +159,7 @@ template struct TrailingZerosCounter { /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are /// valid arguments. template -unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +constexpr unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -162,9 +168,9 @@ unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { namespace detail { template struct LeadingZerosCounter { - static unsigned count(T Val, ZeroBehavior) { + constexpr static unsigned count(T Val, ZeroBehavior) { if (!Val) - return std::numeric_limits::digits; + return std::numeric_limits::digits; // Bisection method. unsigned ZeroBits = 0; @@ -181,32 +187,48 @@ template struct LeadingZerosCounter { #if defined(__GNUC__) || defined(_MSC_VER) template struct LeadingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { + constexpr static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; #if __has_builtin(__builtin_clz) || defined(__GNUC__) return __builtin_clz(Val); #elif defined(_MSC_VER) - unsigned long Index; + #if defined(_M_AMD64) + return __lzcnt(Val); + #elif defined(_M_ARM) || defined(_M_ARM64) + return _CountLeadingZeros(Val); + #else + constexpr unsigned long Index = 0; _BitScanReverse(&Index, Val); return Index ^ 31; + #endif + #else + #error unsupported architecture #endif } }; #if !defined(_MSC_VER) || defined(_M_X64) template struct LeadingZerosCounter { - static unsigned count(T Val, ZeroBehavior ZB) { + constexpr static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 64; #if __has_builtin(__builtin_clzll) || defined(__GNUC__) return __builtin_clzll(Val); #elif defined(_MSC_VER) - unsigned long Index; - _BitScanReverse64(&Index, Val); - return Index ^ 63; + #if defined(_M_AMD64) + return __lzcnt(Val); + #elif defined(_M_ARM) || defined(_M_ARM64) + return _CountLeadingZeros64(Val); + #else + constexpr unsigned long Index = 0 + _BitScanReverse(&Index, Val); + return Index ^ 31; + #endif + #else + #error unsupported architecture #endif } }; @@ -222,7 +244,7 @@ template struct LeadingZerosCounter { /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are /// valid arguments. template -unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +constexpr unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -236,7 +258,7 @@ unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { /// /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are /// valid arguments. -template T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { +constexpr template T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { if (ZB == ZB_Max && Val == 0) return std::numeric_limits::max(); @@ -245,7 +267,7 @@ template T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { /// Create a bitmask with the N right-most bits set to 1, and all other /// bits set to 0. Only unsigned types are allowed. -template T maskTrailingOnes(unsigned N) { +constexpr template T maskTrailingOnes(unsigned N) { static_assert(std::is_unsigned::value, "Invalid type!"); const unsigned Bits = CHAR_BIT * sizeof(T); assert(N <= Bits && "Invalid bit index"); @@ -254,19 +276,19 @@ template T maskTrailingOnes(unsigned N) { /// Create a bitmask with the N left-most bits set to 1, and all other /// bits set to 0. Only unsigned types are allowed. -template T maskLeadingOnes(unsigned N) { +constexpr template T maskLeadingOnes(unsigned N) { return ~maskTrailingOnes(CHAR_BIT * sizeof(T) - N); } /// Create a bitmask with the N right-most bits set to 0, and all other /// bits set to 1. Only unsigned types are allowed. -template T maskTrailingZeros(unsigned N) { +constexpr template T maskTrailingZeros(unsigned N) { return maskLeadingOnes(CHAR_BIT * sizeof(T) - N); } /// Create a bitmask with the N left-most bits set to 0, and all other /// bits set to 1. Only unsigned types are allowed. -template T maskLeadingZeros(unsigned N) { +constexpr template T maskLeadingZeros(unsigned N) { return maskTrailingOnes(CHAR_BIT * sizeof(T) - N); } @@ -277,7 +299,7 @@ template T maskLeadingZeros(unsigned N) { /// /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are /// valid arguments. -template T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { +constexpr template T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { if (ZB == ZB_Max && Val == 0) return std::numeric_limits::max(); @@ -314,28 +336,28 @@ T reverseBits(T Val) { #if __has_builtin(__builtin_bitreverse8) template<> -inline uint8_t reverseBits(uint8_t Val) { +constexpr inline uint8_t reverseBits(uint8_t Val) { return __builtin_bitreverse8(Val); } #endif #if __has_builtin(__builtin_bitreverse16) template<> -inline uint16_t reverseBits(uint16_t Val) { +constexpr inline uint16_t reverseBits(uint16_t Val) { return __builtin_bitreverse16(Val); } #endif #if __has_builtin(__builtin_bitreverse32) template<> -inline uint32_t reverseBits(uint32_t Val) { +constexpr inline uint32_t reverseBits(uint32_t Val) { return __builtin_bitreverse32(Val); } #endif #if __has_builtin(__builtin_bitreverse64) template<> -inline uint64_t reverseBits(uint64_t Val) { +constexpr inline uint64_t reverseBits(uint64_t Val) { return __builtin_bitreverse64(Val); } #endif From 1aa26992dbff2969d58c4b6823d82f12fb6909a2 Mon Sep 17 00:00:00 2001 From: Alfonso Gregory <83477269+gAlfonso-bit@users.noreply.github.com> Date: Mon, 19 Jul 2021 17:16:38 -0400 Subject: [PATCH 3/4] Update MathExtras.h --- llvm/include/llvm/Support/MathExtras.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index 949304a353919..8b7abbd4a589b 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -84,8 +84,8 @@ template struct TrailingZerosCounter { if (!Val) return std::numeric_limits::digits; if (Val & 0x1) - return 0; - + return 0; + // Bisection method. unsigned ZeroBits = 0; T Shift = std::numeric_limits::digits >> 1; From c9f119f3e4f6d7695d9e943326c181cfc3f7d7f1 Mon Sep 17 00:00:00 2001 From: Alfonso Gregory <83477269+gAlfonso-bit@users.noreply.github.com> Date: Mon, 19 Jul 2021 17:17:12 -0400 Subject: [PATCH 4/4] Update cxa_thread_atexit.cpp --- libcxxabi/src/cxa_thread_atexit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxxabi/src/cxa_thread_atexit.cpp b/libcxxabi/src/cxa_thread_atexit.cpp index 4b9887028f33b..a940eaf2f9cc3 100644 --- a/libcxxabi/src/cxa_thread_atexit.cpp +++ b/libcxxabi/src/cxa_thread_atexit.cpp @@ -15,7 +15,7 @@ #endif #endif -#include +#include namespace __cxxabiv1 {