arrow/util/bit_util.h

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017
0018 #pragma once
0019
0020 #if defined(_MSC_VER)
0021 #  if defined(_M_AMD64) || defined(_M_X64)
0022 #    include <intrin.h>  // IWYU pragma: keep
0023 #  endif
0024
0025 #  pragma intrinsic(_BitScanReverse)
0026 #  pragma intrinsic(_BitScanForward)
0027 #  define ARROW_POPCOUNT64 __popcnt64
0028 #  define ARROW_POPCOUNT32 __popcnt
0029 #else
0030 #  define ARROW_POPCOUNT64 __builtin_popcountll
0031 #  define ARROW_POPCOUNT32 __builtin_popcount
0032 #endif
0033
0034 #include <cstdint>
0035 #include <type_traits>
0036
0037 #include "arrow/util/macros.h"
0038 #include "arrow/util/visibility.h"
0039
0040 namespace arrow {
0041 namespace detail {
0042
0043 template <typename Integer>
0044 typename std::make_unsigned<Integer>::type as_unsigned(Integer x) {
0045   return static_cast<typename std::make_unsigned<Integer>::type>(x);
0046 }
0047
0048 }  // namespace detail
0049
0050 namespace bit_util {
0051
0052 // The number of set bits in a given unsigned byte value, pre-computed
0053 //
0054 // Generated with the following Python code
0055 // output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};'
0056 // popcounts = [str(bin(i).count('1')) for i in range(0, 256)]
0057 // print(output.format(', '.join(popcounts)))
0058 static constexpr uint8_t kBytePopcount[] = {
0059     0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
0060     4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
0061     4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
0062     5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
0063     4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
0064     3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
0065     5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
0066     5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
0067     4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
0068
0069 static inline uint64_t PopCount(uint64_t bitmap) { return ARROW_POPCOUNT64(bitmap); }
0070 static inline uint32_t PopCount(uint32_t bitmap) { return ARROW_POPCOUNT32(bitmap); }
0071
0072 //
0073 // Bit-related computations on integer values
0074 //
0075
0076 // Returns the ceil of value/divisor
0077 constexpr int64_t CeilDiv(int64_t value, int64_t divisor) {
0078   return (value == 0) ? 0 : 1 + (value - 1) / divisor;
0079 }
0080
0081 // Return the number of bytes needed to fit the given number of bits
0082 constexpr int64_t BytesForBits(int64_t bits) {
0083   // This formula avoids integer overflow on very large `bits`
0084   return (bits >> 3) + ((bits & 7) != 0);
0085 }
0086
0087 constexpr bool IsPowerOf2(int64_t value) {
0088   return value > 0 && (value & (value - 1)) == 0;
0089 }
0090
0091 constexpr bool IsPowerOf2(uint64_t value) {
0092   return value > 0 && (value & (value - 1)) == 0;
0093 }
0094
0095 // Returns the smallest power of two that contains v.  If v is already a
0096 // power of two, it is returned as is.
0097 static inline int64_t NextPower2(int64_t n) {
0098   // Taken from
0099   // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
0100   n--;
0101   n |= n >> 1;
0102   n |= n >> 2;
0103   n |= n >> 4;
0104   n |= n >> 8;
0105   n |= n >> 16;
0106   n |= n >> 32;
0107   n++;
0108   return n;
0109 }
0110
0111 constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
0112
0113 constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
0114
0115 // Returns a mask for the bit_index lower order bits.
0116 // Only valid for bit_index in the range [0, 64).
0117 constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
0118   return (static_cast<uint64_t>(1) << bit_index) - 1;
0119 }
0120
0121 // Returns 'value' rounded up to the nearest multiple of 'factor'
0122 constexpr int64_t RoundUp(int64_t value, int64_t factor) {
0123   return CeilDiv(value, factor) * factor;
0124 }
0125
0126 // Returns 'value' rounded down to the nearest multiple of 'factor'
0127 constexpr int64_t RoundDown(int64_t value, int64_t factor) {
0128   return (value / factor) * factor;
0129 }
0130
0131 // Returns 'value' rounded up to the nearest multiple of 'factor' when factor
0132 // is a power of two.
0133 // The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
0134 // since we cannot return the correct result which would be 2**64.
0135 constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
0136   // DCHECK(value >= 0);
0137   // DCHECK(IsPowerOf2(factor));
0138   return (value + (factor - 1)) & ~(factor - 1);
0139 }
0140
0141 constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) {
0142   // DCHECK(IsPowerOf2(factor));
0143   return (value + (factor - 1)) & ~(factor - 1);
0144 }
0145
0146 constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); }
0147
0148 constexpr int64_t RoundUpToMultipleOf64(int64_t num) {
0149   return RoundUpToPowerOf2(num, 64);
0150 }
0151
0152 // Returns the number of bytes covering a sliced bitmap. Find the length
0153 // rounded to cover full bytes on both extremities.
0154 //
0155 // The following example represents a slice (offset=10, length=9)
0156 //
0157 // 0       8       16     24
0158 // |-------|-------|------|
0159 //           [       ]          (slice)
0160 //         [             ]      (same slice aligned to bytes bounds, length=16)
0161 //
0162 // The covering bytes is the length (in bytes) of this new aligned slice.
0163 constexpr int64_t CoveringBytes(int64_t offset, int64_t length) {
0164   return (bit_util::RoundUp(length + offset, 8) - bit_util::RoundDown(offset, 8)) / 8;
0165 }
0166
0167 // Returns the 'num_bits' least-significant bits of 'v'.
0168 static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
0169   if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0;
0170   if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v;
0171   int n = 64 - num_bits;
0172   return (v << n) >> n;
0173 }
0174
0175 /// \brief Count the number of leading zeros in an unsigned integer.
0176 static inline int CountLeadingZeros(uint32_t value) {
0177 #if defined(__clang__) || defined(__GNUC__)
0178   if (value == 0) return 32;
0179   return static_cast<int>(__builtin_clz(value));
0180 #elif defined(_MSC_VER)
0181   unsigned long index;                                               // NOLINT
0182   if (_BitScanReverse(&index, static_cast<unsigned long>(value))) {  // NOLINT
0183     return 31 - static_cast<int>(index);
0184   } else {
0185     return 32;
0186   }
0187 #else
0188   int bitpos = 0;
0189   while (value != 0) {
0190     value >>= 1;
0191     ++bitpos;
0192   }
0193   return 32 - bitpos;
0194 #endif
0195 }
0196
0197 static inline int CountLeadingZeros(uint64_t value) {
0198 #if defined(__clang__) || defined(__GNUC__)
0199   if (value == 0) return 64;
0200   return static_cast<int>(__builtin_clzll(value));
0201 #elif defined(_MSC_VER)
0202   unsigned long index;                     // NOLINT
0203   if (_BitScanReverse64(&index, value)) {  // NOLINT
0204     return 63 - static_cast<int>(index);
0205   } else {
0206     return 64;
0207   }
0208 #else
0209   int bitpos = 0;
0210   while (value != 0) {
0211     value >>= 1;
0212     ++bitpos;
0213   }
0214   return 64 - bitpos;
0215 #endif
0216 }
0217
0218 static inline int CountTrailingZeros(uint32_t value) {
0219 #if defined(__clang__) || defined(__GNUC__)
0220   if (value == 0) return 32;
0221   return static_cast<int>(__builtin_ctzl(value));
0222 #elif defined(_MSC_VER)
0223   unsigned long index;  // NOLINT
0224   if (_BitScanForward(&index, value)) {
0225     return static_cast<int>(index);
0226   } else {
0227     return 32;
0228   }
0229 #else
0230   int bitpos = 0;
0231   if (value) {
0232     while (value & 1 == 0) {
0233       value >>= 1;
0234       ++bitpos;
0235     }
0236   } else {
0237     bitpos = 32;
0238   }
0239   return bitpos;
0240 #endif
0241 }
0242
0243 static inline int CountTrailingZeros(uint64_t value) {
0244 #if defined(__clang__) || defined(__GNUC__)
0245   if (value == 0) return 64;
0246   return static_cast<int>(__builtin_ctzll(value));
0247 #elif defined(_MSC_VER)
0248   unsigned long index;  // NOLINT
0249   if (_BitScanForward64(&index, value)) {
0250     return static_cast<int>(index);
0251   } else {
0252     return 64;
0253   }
0254 #else
0255   int bitpos = 0;
0256   if (value) {
0257     while (value & 1 == 0) {
0258       value >>= 1;
0259       ++bitpos;
0260     }
0261   } else {
0262     bitpos = 64;
0263   }
0264   return bitpos;
0265 #endif
0266 }
0267
0268 // Returns the minimum number of bits needed to represent an unsigned value
0269 static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); }
0270
0271 // Returns ceil(log2(x)).
0272 static inline int Log2(uint64_t x) {
0273   // DCHECK_GT(x, 0);
0274   return NumRequiredBits(x - 1);
0275 }
0276
0277 //
0278 // Utilities for reading and writing individual bits by their index
0279 // in a memory area.
0280 //
0281
0282 // Bitmask selecting the k-th bit in a byte
0283 static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
0284
0285 // the bitwise complement version of kBitmask
0286 static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
0287
0288 // Bitmask selecting the (k - 1) preceding bits in a byte
0289 static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
0290 static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63, 127};
0291
0292 // the bitwise complement version of kPrecedingBitmask
0293 static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
0294
0295 static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {
0296   return (bits[i >> 3] >> (i & 0x07)) & 1;
0297 }
0298
0299 // Gets the i-th bit from a byte. Should only be used with i <= 7.
0300 static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
0301   return byte & kBitmask[i];
0302 }
0303
0304 static inline void ClearBit(uint8_t* bits, int64_t i) {
0305   bits[i / 8] &= kFlippedBitmask[i % 8];
0306 }
0307
0308 static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; }
0309
0310 static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
0311   // https://graphics.stanford.edu/~seander/bithacks.html
0312   // "Conditionally set or clear bits without branching"
0313   // NOTE: this seems to confuse Valgrind as it reads from potentially
0314   // uninitialized memory
0315   bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
0316                  kBitmask[i % 8];
0317 }
0318
0319 /// \brief set or clear a range of bits quickly
0320 ARROW_EXPORT
0321 void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
0322
0323 /// \brief Sets all bits in the bitmap to true
0324 ARROW_EXPORT
0325 void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
0326
0327 /// \brief Clears all bits in the bitmap (set to false)
0328 ARROW_EXPORT
0329 void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
0330
0331 /// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be
0332 /// returned
0333 /// ex:
0334 /// ref: https://stackoverflow.com/a/59523400
0335 template <typename Word>
0336 constexpr Word PrecedingWordBitmask(unsigned int const i) {
0337   return static_cast<Word>(static_cast<Word>(i < sizeof(Word) * 8)
0338                            << (i & (sizeof(Word) * 8 - 1))) -
0339          1;
0340 }
0341 static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
0342 static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
0343 static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
0344 static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
0345
0346 /// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits
0347 /// from `high`.
0348 /// Word ret
0349 /// for (i = 0; i < sizeof(Word)*8; i++){
0350 ///     ret[i]= i < n ? low[i]: high[i];
0351 /// }
0352 template <typename Word>
0353 constexpr Word SpliceWord(int n, Word low, Word high) {
0354   return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n));
0355 }
0356
0357 /// \brief Pack integers into a bitmap in batches of 8
0358 template <int batch_size>
0359 void PackBits(const uint32_t* values, uint8_t* out) {
0360   for (int i = 0; i < batch_size / 8; ++i) {
0361     *out++ = static_cast<uint8_t>(values[0] | values[1] << 1 | values[2] << 2 |
0362                                   values[3] << 3 | values[4] << 4 | values[5] << 5 |
0363                                   values[6] << 6 | values[7] << 7);
0364     values += 8;
0365   }
0366 }
0367
0368 }  // namespace bit_util
0369 }  // namespace arrow