File indexing completed on 2025-08-28 08:27:03
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <cstring>
0022
0023 #include "arrow/util/bit_util.h"
0024 #include "arrow/util/endian.h"
0025 #include "arrow/util/macros.h"
0026
0027 namespace arrow {
0028 namespace internal {
0029
0030 class BitmapWriter {
0031
0032
0033 public:
0034 BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
0035 : bitmap_(bitmap), position_(0), length_(length) {
0036 byte_offset_ = start_offset / 8;
0037 bit_mask_ = bit_util::kBitmask[start_offset % 8];
0038 if (length > 0) {
0039 current_byte_ = bitmap[byte_offset_];
0040 } else {
0041 current_byte_ = 0;
0042 }
0043 }
0044
0045 void Set() { current_byte_ |= bit_mask_; }
0046
0047 void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
0048
0049 void Next() {
0050 bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
0051 ++position_;
0052 if (bit_mask_ == 0) {
0053
0054 bit_mask_ = 0x01;
0055 bitmap_[byte_offset_++] = current_byte_;
0056 if (ARROW_PREDICT_TRUE(position_ < length_)) {
0057 current_byte_ = bitmap_[byte_offset_];
0058 }
0059 }
0060 }
0061
0062 void Finish() {
0063
0064 if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
0065 bitmap_[byte_offset_] = current_byte_;
0066 }
0067 }
0068
0069 int64_t position() const { return position_; }
0070
0071 private:
0072 uint8_t* bitmap_;
0073 int64_t position_;
0074 int64_t length_;
0075
0076 uint8_t current_byte_;
0077 uint8_t bit_mask_;
0078 int64_t byte_offset_;
0079 };
0080
0081 class FirstTimeBitmapWriter {
0082
0083
0084
0085
0086 public:
0087 FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
0088 : bitmap_(bitmap), position_(0), length_(length) {
0089 current_byte_ = 0;
0090 byte_offset_ = start_offset / 8;
0091 bit_mask_ = bit_util::kBitmask[start_offset % 8];
0092 if (length > 0) {
0093 current_byte_ =
0094 bitmap[byte_offset_] & bit_util::kPrecedingBitmask[start_offset % 8];
0095 } else {
0096 current_byte_ = 0;
0097 }
0098 }
0099
0100
0101
0102
0103
0104
0105 void AppendWord(uint64_t word, int64_t number_of_bits) {
0106 if (ARROW_PREDICT_FALSE(number_of_bits == 0)) {
0107 return;
0108 }
0109
0110
0111 uint8_t* append_position = bitmap_ + byte_offset_;
0112
0113
0114 position_ += number_of_bits;
0115 int64_t bit_offset = bit_util::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
0116 bit_mask_ = bit_util::kBitmask[(bit_offset + number_of_bits) % 8];
0117 byte_offset_ += (bit_offset + number_of_bits) / 8;
0118
0119 if (bit_offset != 0) {
0120
0121
0122 int64_t bits_to_carry = 8 - bit_offset;
0123
0124
0125
0126 current_byte_ |= (word & bit_util::kPrecedingBitmask[bits_to_carry]) << bit_offset;
0127
0128 if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) {
0129 return;
0130 }
0131 *append_position = current_byte_;
0132 append_position++;
0133
0134 word = word >> bits_to_carry;
0135 number_of_bits -= bits_to_carry;
0136 }
0137 word = bit_util::ToLittleEndian(word);
0138 int64_t bytes_for_word = ::arrow::bit_util::BytesForBits(number_of_bits);
0139 std::memcpy(append_position, &word, bytes_for_word);
0140
0141
0142
0143 if (bit_mask_ == 0x1) {
0144 current_byte_ = 0;
0145 } else {
0146 current_byte_ = *(append_position + bytes_for_word - 1);
0147 }
0148 }
0149
0150 void Set() { current_byte_ |= bit_mask_; }
0151
0152 void Clear() {}
0153
0154 void Next() {
0155 bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
0156 ++position_;
0157 if (bit_mask_ == 0) {
0158
0159 bit_mask_ = 0x01;
0160 bitmap_[byte_offset_++] = current_byte_;
0161 current_byte_ = 0;
0162 }
0163 }
0164
0165 void Finish() {
0166
0167 if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
0168 bitmap_[byte_offset_] = current_byte_;
0169 }
0170 }
0171
0172 int64_t position() const { return position_; }
0173
0174 private:
0175 uint8_t* bitmap_;
0176 int64_t position_;
0177 int64_t length_;
0178
0179 uint8_t current_byte_;
0180 uint8_t bit_mask_;
0181 int64_t byte_offset_;
0182 };
0183
0184 template <typename Word, bool may_have_byte_offset = true>
0185 class BitmapWordWriter {
0186 public:
0187 BitmapWordWriter() = default;
0188 BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length)
0189 : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
0190 bitmap_(bitmap + offset / 8),
0191 bitmap_end_(bitmap_ + bit_util::BytesForBits(offset_ + length)),
0192 mask_((1U << offset_) - 1) {
0193 if (offset_) {
0194 if (length >= static_cast<int>(sizeof(Word) * 8)) {
0195 current_data.word_ = load<Word>(bitmap_);
0196 } else if (length > 0) {
0197 current_data.epi.byte_ = load<uint8_t>(bitmap_);
0198 }
0199 }
0200 }
0201
0202 void PutNextWord(Word word) {
0203 if (may_have_byte_offset && offset_) {
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215 word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
0216 Word next_word = load<Word>(bitmap_ + sizeof(Word));
0217 current_data.word_ = (current_data.word_ & mask_) | (word & ~mask_);
0218 next_word = (next_word & ~mask_) | (word & mask_);
0219 store<Word>(bitmap_, current_data.word_);
0220 store<Word>(bitmap_ + sizeof(Word), next_word);
0221 current_data.word_ = next_word;
0222 } else {
0223 store<Word>(bitmap_, word);
0224 }
0225 bitmap_ += sizeof(Word);
0226 }
0227
0228 void PutNextTrailingByte(uint8_t byte, int valid_bits) {
0229 if (valid_bits == 8) {
0230 if (may_have_byte_offset && offset_) {
0231 byte = (byte << offset_) | (byte >> (8 - offset_));
0232 uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
0233 current_data.epi.byte_ = (current_data.epi.byte_ & mask_) | (byte & ~mask_);
0234 next_byte = (next_byte & ~mask_) | (byte & mask_);
0235 store<uint8_t>(bitmap_, current_data.epi.byte_);
0236 store<uint8_t>(bitmap_ + 1, next_byte);
0237 current_data.epi.byte_ = next_byte;
0238 } else {
0239 store<uint8_t>(bitmap_, byte);
0240 }
0241 ++bitmap_;
0242 } else {
0243 assert(valid_bits > 0);
0244 assert(valid_bits < 8);
0245 assert(bitmap_ + bit_util::BytesForBits(offset_ + valid_bits) <= bitmap_end_);
0246 internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
0247 for (int i = 0; i < valid_bits; ++i) {
0248 (byte & 0x01) ? writer.Set() : writer.Clear();
0249 writer.Next();
0250 byte >>= 1;
0251 }
0252 writer.Finish();
0253 }
0254 }
0255
0256 private:
0257 int64_t offset_;
0258 uint8_t* bitmap_;
0259
0260 const uint8_t* bitmap_end_;
0261 uint64_t mask_;
0262 union {
0263 Word word_;
0264 struct {
0265 #if ARROW_LITTLE_ENDIAN == 0
0266 uint8_t padding_bytes_[sizeof(Word) - 1];
0267 #endif
0268 uint8_t byte_;
0269 } epi;
0270 } current_data;
0271
0272 template <typename DType>
0273 DType load(const uint8_t* bitmap) {
0274 assert(bitmap + sizeof(DType) <= bitmap_end_);
0275 return bit_util::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
0276 }
0277
0278 template <typename DType>
0279 void store(uint8_t* bitmap, DType data) {
0280 assert(bitmap + sizeof(DType) <= bitmap_end_);
0281 util::SafeStore(bitmap, bit_util::FromLittleEndian(data));
0282 }
0283 };
0284
0285 }
0286 }