Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:06:42

0001 /* Bit and bytes utilities.
0002 
0003    Bytes swap functions, reverse order of bytes:
0004 
0005    - _Py_bswap16(uint16_t)
0006    - _Py_bswap32(uint32_t)
0007    - _Py_bswap64(uint64_t)
0008 */
0009 
0010 #ifndef Py_INTERNAL_BITUTILS_H
0011 #define Py_INTERNAL_BITUTILS_H
0012 #ifdef __cplusplus
0013 extern "C" {
0014 #endif
0015 
0016 #ifndef Py_BUILD_CORE
0017 #  error "this header requires Py_BUILD_CORE define"
0018 #endif
0019 
0020 #if defined(__GNUC__) \
0021       && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
0022    /* __builtin_bswap16() is available since GCC 4.8,
0023       __builtin_bswap32() is available since GCC 4.3,
0024       __builtin_bswap64() is available since GCC 4.3. */
0025 #  define _PY_HAVE_BUILTIN_BSWAP
0026 #endif
0027 
0028 #ifdef _MSC_VER
0029    /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
0030 #  include <intrin.h>
0031 #endif
0032 
0033 static inline uint16_t
0034 _Py_bswap16(uint16_t word)
0035 {
0036 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
0037     return __builtin_bswap16(word);
0038 #elif defined(_MSC_VER)
0039     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
0040     return _byteswap_ushort(word);
0041 #else
0042     // Portable implementation which doesn't rely on circular bit shift
0043     return ( ((word & UINT16_C(0x00FF)) << 8)
0044            | ((word & UINT16_C(0xFF00)) >> 8));
0045 #endif
0046 }
0047 
0048 static inline uint32_t
0049 _Py_bswap32(uint32_t word)
0050 {
0051 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
0052     return __builtin_bswap32(word);
0053 #elif defined(_MSC_VER)
0054     Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
0055     return _byteswap_ulong(word);
0056 #else
0057     // Portable implementation which doesn't rely on circular bit shift
0058     return ( ((word & UINT32_C(0x000000FF)) << 24)
0059            | ((word & UINT32_C(0x0000FF00)) <<  8)
0060            | ((word & UINT32_C(0x00FF0000)) >>  8)
0061            | ((word & UINT32_C(0xFF000000)) >> 24));
0062 #endif
0063 }
0064 
0065 static inline uint64_t
0066 _Py_bswap64(uint64_t word)
0067 {
0068 #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
0069     return __builtin_bswap64(word);
0070 #elif defined(_MSC_VER)
0071     return _byteswap_uint64(word);
0072 #else
0073     // Portable implementation which doesn't rely on circular bit shift
0074     return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
0075            | ((word & UINT64_C(0x000000000000FF00)) << 40)
0076            | ((word & UINT64_C(0x0000000000FF0000)) << 24)
0077            | ((word & UINT64_C(0x00000000FF000000)) <<  8)
0078            | ((word & UINT64_C(0x000000FF00000000)) >>  8)
0079            | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
0080            | ((word & UINT64_C(0x00FF000000000000)) >> 40)
0081            | ((word & UINT64_C(0xFF00000000000000)) >> 56));
0082 #endif
0083 }
0084 
0085 
0086 // Population count: count the number of 1's in 'x'
0087 // (number of bits set to 1), also known as the hamming weight.
0088 //
0089 // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
0090 // can be used, to keep the implementation simple. For example, Visual Studio
0091 // __popcnt() is not used this reason. The clang and GCC builtin function can
0092 // use the x86 POPCNT instruction if the target architecture has SSE4a or
0093 // newer.
0094 static inline int
0095 _Py_popcount32(uint32_t x)
0096 {
0097 #if (defined(__clang__) || defined(__GNUC__))
0098 
0099 #if SIZEOF_INT >= 4
0100     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
0101     return __builtin_popcount(x);
0102 #else
0103     // The C standard guarantees that unsigned long will always be big enough
0104     // to hold a uint32_t value without losing information.
0105     Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
0106     return __builtin_popcountl(x);
0107 #endif
0108 
0109 #else
0110     // 32-bit SWAR (SIMD Within A Register) popcount
0111 
0112     // Binary: 0 1 0 1 ...
0113     const uint32_t M1 = 0x55555555;
0114     // Binary: 00 11 00 11. ..
0115     const uint32_t M2 = 0x33333333;
0116     // Binary: 0000 1111 0000 1111 ...
0117     const uint32_t M4 = 0x0F0F0F0F;
0118 
0119     // Put count of each 2 bits into those 2 bits
0120     x = x - ((x >> 1) & M1);
0121     // Put count of each 4 bits into those 4 bits
0122     x = (x & M2) + ((x >> 2) & M2);
0123     // Put count of each 8 bits into those 8 bits
0124     x = (x + (x >> 4)) & M4;
0125     // Sum of the 4 byte counts.
0126     // Take care when considering changes to the next line. Portability and
0127     // correctness are delicate here, thanks to C's "integer promotions" (C99
0128     // §6.3.1.1p2). On machines where the `int` type has width greater than 32
0129     // bits, `x` will be promoted to an `int`, and following C's "usual
0130     // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
0131     // performed as a multiplication of two `unsigned int` operands. In this
0132     // case it's critical that we cast back to `uint32_t` in order to keep only
0133     // the least significant 32 bits. On machines where the `int` type has
0134     // width no greater than 32, the multiplication is of two 32-bit unsigned
0135     // integer types, and the (uint32_t) cast is a no-op. In both cases, we
0136     // avoid the risk of undefined behaviour due to overflow of a
0137     // multiplication of signed integer types.
0138     return (uint32_t)(x * 0x01010101U) >> 24;
0139 #endif
0140 }
0141 
0142 
0143 // Return the index of the most significant 1 bit in 'x'. This is the smallest
0144 // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
0145 static inline int
0146 _Py_bit_length(unsigned long x)
0147 {
0148 #if (defined(__clang__) || defined(__GNUC__))
0149     if (x != 0) {
0150         // __builtin_clzl() is available since GCC 3.4.
0151         // Undefined behavior for x == 0.
0152         return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
0153     }
0154     else {
0155         return 0;
0156     }
0157 #elif defined(_MSC_VER)
0158     // _BitScanReverse() is documented to search 32 bits.
0159     Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
0160     unsigned long msb;
0161     if (_BitScanReverse(&msb, x)) {
0162         return (int)msb + 1;
0163     }
0164     else {
0165         return 0;
0166     }
0167 #else
0168     const int BIT_LENGTH_TABLE[32] = {
0169         0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
0170         5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
0171     };
0172     int msb = 0;
0173     while (x >= 32) {
0174         msb += 6;
0175         x >>= 6;
0176     }
0177     msb += BIT_LENGTH_TABLE[x];
0178     return msb;
0179 #endif
0180 }
0181 
0182 
0183 #ifdef __cplusplus
0184 }
0185 #endif
0186 #endif /* !Py_INTERNAL_BITUTILS_H */