Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:03

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <cstring>
0022 
0023 #include "arrow/util/bit_util.h"
0024 #include "arrow/util/endian.h"
0025 #include "arrow/util/macros.h"
0026 
0027 namespace arrow {
0028 namespace internal {
0029 
0030 class BitmapWriter {
0031   // A sequential bitwise writer that preserves surrounding bit values.
0032 
0033  public:
0034   BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
0035       : bitmap_(bitmap), position_(0), length_(length) {
0036     byte_offset_ = start_offset / 8;
0037     bit_mask_ = bit_util::kBitmask[start_offset % 8];
0038     if (length > 0) {
0039       current_byte_ = bitmap[byte_offset_];
0040     } else {
0041       current_byte_ = 0;
0042     }
0043   }
0044 
0045   void Set() { current_byte_ |= bit_mask_; }
0046 
0047   void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
0048 
0049   void Next() {
0050     bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
0051     ++position_;
0052     if (bit_mask_ == 0) {
0053       // Finished this byte, need advancing
0054       bit_mask_ = 0x01;
0055       bitmap_[byte_offset_++] = current_byte_;
0056       if (ARROW_PREDICT_TRUE(position_ < length_)) {
0057         current_byte_ = bitmap_[byte_offset_];
0058       }
0059     }
0060   }
0061 
0062   void Finish() {
0063     // Store current byte if we didn't went past bitmap storage
0064     if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
0065       bitmap_[byte_offset_] = current_byte_;
0066     }
0067   }
0068 
0069   int64_t position() const { return position_; }
0070 
0071  private:
0072   uint8_t* bitmap_;
0073   int64_t position_;
0074   int64_t length_;
0075 
0076   uint8_t current_byte_;
0077   uint8_t bit_mask_;
0078   int64_t byte_offset_;
0079 };
0080 
0081 class FirstTimeBitmapWriter {
0082   // Like BitmapWriter, but any bit values *following* the bits written
0083   // might be clobbered.  It is hence faster than BitmapWriter, and can
0084   // also avoid false positives with Valgrind.
0085 
0086  public:
0087   FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
0088       : bitmap_(bitmap), position_(0), length_(length) {
0089     current_byte_ = 0;
0090     byte_offset_ = start_offset / 8;
0091     bit_mask_ = bit_util::kBitmask[start_offset % 8];
0092     if (length > 0) {
0093       current_byte_ =
0094           bitmap[byte_offset_] & bit_util::kPrecedingBitmask[start_offset % 8];
0095     } else {
0096       current_byte_ = 0;
0097     }
0098   }
0099 
0100   /// Appends number_of_bits from word to valid_bits and valid_bits_offset.
0101   ///
0102   /// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed
0103   ///            to be unset (i.e. 0).
0104   /// \param[in] number_of_bits The number of bits to append from word.
0105   void AppendWord(uint64_t word, int64_t number_of_bits) {
0106     if (ARROW_PREDICT_FALSE(number_of_bits == 0)) {
0107       return;
0108     }
0109 
0110     // Location that the first byte needs to be written to.
0111     uint8_t* append_position = bitmap_ + byte_offset_;
0112 
0113     // Update state variables except for current_byte_ here.
0114     position_ += number_of_bits;
0115     int64_t bit_offset = bit_util::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
0116     bit_mask_ = bit_util::kBitmask[(bit_offset + number_of_bits) % 8];
0117     byte_offset_ += (bit_offset + number_of_bits) / 8;
0118 
0119     if (bit_offset != 0) {
0120       // We are in the middle of the byte. This code updates the byte and shifts
0121       // bits appropriately within word so it can be memcpy'd below.
0122       int64_t bits_to_carry = 8 - bit_offset;
0123       // Carry over bits from word to current_byte_. We assume any extra bits in word
0124       // unset so no additional accounting is needed for when number_of_bits <
0125       // bits_to_carry.
0126       current_byte_ |= (word & bit_util::kPrecedingBitmask[bits_to_carry]) << bit_offset;
0127       // Check if everything is transferred into current_byte_.
0128       if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) {
0129         return;
0130       }
0131       *append_position = current_byte_;
0132       append_position++;
0133       // Move the carry bits off of word.
0134       word = word >> bits_to_carry;
0135       number_of_bits -= bits_to_carry;
0136     }
0137     word = bit_util::ToLittleEndian(word);
0138     int64_t bytes_for_word = ::arrow::bit_util::BytesForBits(number_of_bits);
0139     std::memcpy(append_position, &word, bytes_for_word);
0140     // At this point, the previous current_byte_ has been written to bitmap_.
0141     // The new current_byte_ is either the last relevant byte in 'word'
0142     // or cleared if the new position is byte aligned (i.e. a fresh byte).
0143     if (bit_mask_ == 0x1) {
0144       current_byte_ = 0;
0145     } else {
0146       current_byte_ = *(append_position + bytes_for_word - 1);
0147     }
0148   }
0149 
0150   void Set() { current_byte_ |= bit_mask_; }
0151 
0152   void Clear() {}
0153 
0154   void Next() {
0155     bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
0156     ++position_;
0157     if (bit_mask_ == 0) {
0158       // Finished this byte, need advancing
0159       bit_mask_ = 0x01;
0160       bitmap_[byte_offset_++] = current_byte_;
0161       current_byte_ = 0;
0162     }
0163   }
0164 
0165   void Finish() {
0166     // Store current byte if we didn't went go bitmap storage
0167     if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
0168       bitmap_[byte_offset_] = current_byte_;
0169     }
0170   }
0171 
0172   int64_t position() const { return position_; }
0173 
0174  private:
0175   uint8_t* bitmap_;
0176   int64_t position_;
0177   int64_t length_;
0178 
0179   uint8_t current_byte_;
0180   uint8_t bit_mask_;
0181   int64_t byte_offset_;
0182 };
0183 
0184 template <typename Word, bool may_have_byte_offset = true>
0185 class BitmapWordWriter {
0186  public:
0187   BitmapWordWriter() = default;
0188   BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length)
0189       : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
0190         bitmap_(bitmap + offset / 8),
0191         bitmap_end_(bitmap_ + bit_util::BytesForBits(offset_ + length)),
0192         mask_((1U << offset_) - 1) {
0193     if (offset_) {
0194       if (length >= static_cast<int>(sizeof(Word) * 8)) {
0195         current_data.word_ = load<Word>(bitmap_);
0196       } else if (length > 0) {
0197         current_data.epi.byte_ = load<uint8_t>(bitmap_);
0198       }
0199     }
0200   }
0201 
0202   void PutNextWord(Word word) {
0203     if (may_have_byte_offset && offset_) {
0204       // split one word into two adjacent words, don't touch unused bits
0205       //               |<------ word ----->|
0206       //               +-----+-------------+
0207       //               |  A  |      B      |
0208       //               +-----+-------------+
0209       //                  |         |
0210       //                  v         v       offset
0211       // +-------------+-----+-------------+-----+
0212       // |     ---     |  A  |      B      | --- |
0213       // +-------------+-----+-------------+-----+
0214       // |<------ next ----->|<---- current ---->|
0215       word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
0216       Word next_word = load<Word>(bitmap_ + sizeof(Word));
0217       current_data.word_ = (current_data.word_ & mask_) | (word & ~mask_);
0218       next_word = (next_word & ~mask_) | (word & mask_);
0219       store<Word>(bitmap_, current_data.word_);
0220       store<Word>(bitmap_ + sizeof(Word), next_word);
0221       current_data.word_ = next_word;
0222     } else {
0223       store<Word>(bitmap_, word);
0224     }
0225     bitmap_ += sizeof(Word);
0226   }
0227 
0228   void PutNextTrailingByte(uint8_t byte, int valid_bits) {
0229     if (valid_bits == 8) {
0230       if (may_have_byte_offset && offset_) {
0231         byte = (byte << offset_) | (byte >> (8 - offset_));
0232         uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
0233         current_data.epi.byte_ = (current_data.epi.byte_ & mask_) | (byte & ~mask_);
0234         next_byte = (next_byte & ~mask_) | (byte & mask_);
0235         store<uint8_t>(bitmap_, current_data.epi.byte_);
0236         store<uint8_t>(bitmap_ + 1, next_byte);
0237         current_data.epi.byte_ = next_byte;
0238       } else {
0239         store<uint8_t>(bitmap_, byte);
0240       }
0241       ++bitmap_;
0242     } else {
0243       assert(valid_bits > 0);
0244       assert(valid_bits < 8);
0245       assert(bitmap_ + bit_util::BytesForBits(offset_ + valid_bits) <= bitmap_end_);
0246       internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
0247       for (int i = 0; i < valid_bits; ++i) {
0248         (byte & 0x01) ? writer.Set() : writer.Clear();
0249         writer.Next();
0250         byte >>= 1;
0251       }
0252       writer.Finish();
0253     }
0254   }
0255 
0256  private:
0257   int64_t offset_;
0258   uint8_t* bitmap_;
0259 
0260   const uint8_t* bitmap_end_;
0261   uint64_t mask_;
0262   union {
0263     Word word_;
0264     struct {
0265 #if ARROW_LITTLE_ENDIAN == 0
0266       uint8_t padding_bytes_[sizeof(Word) - 1];
0267 #endif
0268       uint8_t byte_;
0269     } epi;
0270   } current_data;
0271 
0272   template <typename DType>
0273   DType load(const uint8_t* bitmap) {
0274     assert(bitmap + sizeof(DType) <= bitmap_end_);
0275     return bit_util::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
0276   }
0277 
0278   template <typename DType>
0279   void store(uint8_t* bitmap, DType data) {
0280     assert(bitmap + sizeof(DType) <= bitmap_end_);
0281     util::SafeStore(bitmap, bit_util::FromLittleEndian(data));
0282   }
0283 };
0284 
0285 }  // namespace internal
0286 }  // namespace arrow