Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-27 08:47:18

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <algorithm>
0021 #include <cstdint>
0022 #include <cstring>
0023 #include <memory>
0024 #include <string>
0025 #include <utility>
0026 
0027 #include "arrow/buffer.h"
0028 #include "arrow/status.h"
0029 #include "arrow/util/bit_util.h"
0030 #include "arrow/util/bitmap_generate.h"
0031 #include "arrow/util/bitmap_ops.h"
0032 #include "arrow/util/macros.h"
0033 #include "arrow/util/ubsan.h"
0034 #include "arrow/util/visibility.h"
0035 
0036 namespace arrow {
0037 
0038 // ----------------------------------------------------------------------
0039 // Buffer builder classes
0040 
0041 /// \class BufferBuilder
0042 /// \brief A class for incrementally building a contiguous chunk of in-memory
0043 /// data
0044 class ARROW_EXPORT BufferBuilder {
0045  public:
0046   explicit BufferBuilder(MemoryPool* pool = default_memory_pool(),
0047                          int64_t alignment = kDefaultBufferAlignment)
0048       : pool_(pool),
0049         data_(/*ensure never null to make ubsan happy and avoid check penalties below*/
0050               util::MakeNonNull<uint8_t>()),
0051         capacity_(0),
0052         size_(0),
0053         alignment_(alignment) {}
0054 
0055   /// \brief Constructs new Builder that will start using
0056   /// the provided buffer until Finish/Reset are called.
0057   /// The buffer is not resized.
0058   explicit BufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
0059                          MemoryPool* pool = default_memory_pool(),
0060                          int64_t alignment = kDefaultBufferAlignment)
0061       : buffer_(std::move(buffer)),
0062         pool_(pool),
0063         data_(buffer_->mutable_data()),
0064         capacity_(buffer_->capacity()),
0065         size_(buffer_->size()),
0066         alignment_(alignment) {}
0067 
0068   /// \brief Resize the buffer to the nearest multiple of 64 bytes
0069   ///
0070   /// \param new_capacity the new capacity of the of the builder. Will be
0071   /// rounded up to a multiple of 64 bytes for padding
0072   /// \param shrink_to_fit if new capacity is smaller than the existing,
0073   /// reallocate internal buffer. Set to false to avoid reallocations when
0074   /// shrinking the builder.
0075   /// \return Status
0076   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
0077     if (buffer_ == NULLPTR) {
0078       ARROW_ASSIGN_OR_RAISE(buffer_,
0079                             AllocateResizableBuffer(new_capacity, alignment_, pool_));
0080     } else {
0081       ARROW_RETURN_NOT_OK(buffer_->Resize(new_capacity, shrink_to_fit));
0082     }
0083     capacity_ = buffer_->capacity();
0084     data_ = buffer_->mutable_data();
0085     return Status::OK();
0086   }
0087 
0088   /// \brief Ensure that builder can accommodate the additional number of bytes
0089   /// without the need to perform allocations
0090   ///
0091   /// \param[in] additional_bytes number of additional bytes to make space for
0092   /// \return Status
0093   Status Reserve(const int64_t additional_bytes) {
0094     auto min_capacity = size_ + additional_bytes;
0095     if (min_capacity <= capacity_) {
0096       return Status::OK();
0097     }
0098     return Resize(GrowByFactor(capacity_, min_capacity), false);
0099   }
0100 
0101   /// \brief Return a capacity expanded by the desired growth factor
0102   static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
0103     // Doubling capacity except for large Reserve requests. 2x growth strategy
0104     // (versus 1.5x) seems to have slightly better performance when using
0105     // jemalloc, but significantly better performance when using the system
0106     // allocator. See ARROW-6450 for further discussion
0107     return std::max(new_capacity, current_capacity * 2);
0108   }
0109 
0110   /// \brief Append the given data to the buffer
0111   ///
0112   /// The buffer is automatically expanded if necessary.
0113   Status Append(const void* data, const int64_t length) {
0114     if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
0115       ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
0116     }
0117     UnsafeAppend(data, length);
0118     return Status::OK();
0119   }
0120 
0121   /// \brief Append the given data to the buffer
0122   ///
0123   /// The buffer is automatically expanded if necessary.
0124   Status Append(std::string_view v) { return Append(v.data(), v.size()); }
0125 
0126   /// \brief Append copies of a value to the buffer
0127   ///
0128   /// The buffer is automatically expanded if necessary.
0129   Status Append(const int64_t num_copies, uint8_t value) {
0130     ARROW_RETURN_NOT_OK(Reserve(num_copies));
0131     UnsafeAppend(num_copies, value);
0132     return Status::OK();
0133   }
0134 
0135   // Advance pointer and zero out memory
0136   Status Advance(const int64_t length) { return Append(length, 0); }
0137 
0138   // Advance pointer, but don't allocate or zero memory
0139   void UnsafeAdvance(const int64_t length) { size_ += length; }
0140 
0141   // Unsafe methods don't check existing size
0142   void UnsafeAppend(const void* data, const int64_t length) {
0143     memcpy(data_ + size_, data, static_cast<size_t>(length));
0144     size_ += length;
0145   }
0146 
0147   void UnsafeAppend(std::string_view v) {
0148     UnsafeAppend(v.data(), static_cast<int64_t>(v.size()));
0149   }
0150 
0151   void UnsafeAppend(const int64_t num_copies, uint8_t value) {
0152     memset(data_ + size_, value, static_cast<size_t>(num_copies));
0153     size_ += num_copies;
0154   }
0155 
0156   /// \brief Return result of builder as a Buffer object.
0157   ///
0158   /// The builder is reset and can be reused afterwards.
0159   ///
0160   /// \param[out] out the finalized Buffer object
0161   /// \param shrink_to_fit if the buffer size is smaller than its capacity,
0162   /// reallocate to fit more tightly in memory. Set to false to avoid
0163   /// a reallocation, at the expense of potentially more memory consumption.
0164   /// \return Status
0165   Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
0166     ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
0167     if (size_ != 0) buffer_->ZeroPadding();
0168     *out = buffer_;
0169     if (*out == NULLPTR) {
0170       ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(0, alignment_, pool_));
0171     }
0172     Reset();
0173     return Status::OK();
0174   }
0175 
0176   Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
0177     std::shared_ptr<Buffer> out;
0178     ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
0179     return out;
0180   }
0181 
0182   /// \brief Like Finish, but override the final buffer size
0183   ///
0184   /// This is useful after writing data directly into the builder memory
0185   /// without calling the Append methods (basically, when using BufferBuilder
0186   /// mostly for memory allocation).
0187   Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
0188                                                    bool shrink_to_fit = true) {
0189     size_ = final_length;
0190     return Finish(shrink_to_fit);
0191   }
0192 
0193   void Reset() {
0194     buffer_ = NULLPTR;
0195     capacity_ = size_ = 0;
0196   }
0197 
0198   /// \brief Set size to a smaller value without modifying builder
0199   /// contents. For reusable BufferBuilder classes
0200   /// \param[in] position must be non-negative and less than or equal
0201   /// to the current length()
0202   void Rewind(int64_t position) { size_ = position; }
0203 
0204   int64_t capacity() const { return capacity_; }
0205   int64_t length() const { return size_; }
0206   const uint8_t* data() const { return data_; }
0207   uint8_t* mutable_data() { return data_; }
0208   template <typename T>
0209   const T* data_as() const {
0210     return reinterpret_cast<const T*>(data_);
0211   }
0212   template <typename T>
0213   T* mutable_data_as() {
0214     return reinterpret_cast<T*>(data_);
0215   }
0216 
0217  private:
0218   std::shared_ptr<ResizableBuffer> buffer_;
0219   MemoryPool* pool_;
0220   uint8_t* data_;
0221   int64_t capacity_;
0222   int64_t size_;
0223   int64_t alignment_;
0224 };
0225 
0226 template <typename T, typename Enable = void>
0227 class TypedBufferBuilder;
0228 
0229 /// \brief A BufferBuilder for building a buffer of arithmetic elements
0230 template <typename T>
0231 class TypedBufferBuilder<
0232     T, typename std::enable_if<std::is_arithmetic<T>::value ||
0233                                std::is_standard_layout<T>::value>::type> {
0234  public:
0235   explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool(),
0236                               int64_t alignment = kDefaultBufferAlignment)
0237       : bytes_builder_(pool, alignment) {}
0238 
0239   explicit TypedBufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
0240                               MemoryPool* pool = default_memory_pool())
0241       : bytes_builder_(std::move(buffer), pool) {}
0242 
0243   explicit TypedBufferBuilder(BufferBuilder builder)
0244       : bytes_builder_(std::move(builder)) {}
0245 
0246   BufferBuilder* bytes_builder() { return &bytes_builder_; }
0247 
0248   Status Append(T value) {
0249     return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
0250   }
0251 
0252   Status Append(const T* values, int64_t num_elements) {
0253     return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
0254                                  num_elements * sizeof(T));
0255   }
0256 
0257   Status Append(const int64_t num_copies, T value) {
0258     ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
0259     UnsafeAppend(num_copies, value);
0260     return Status::OK();
0261   }
0262 
0263   void UnsafeAppend(T value) {
0264     bytes_builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&value), sizeof(T));
0265   }
0266 
0267   void UnsafeAppend(const T* values, int64_t num_elements) {
0268     bytes_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values),
0269                                 num_elements * sizeof(T));
0270   }
0271 
0272   template <typename Iter>
0273   void UnsafeAppend(Iter values_begin, Iter values_end) {
0274     auto num_elements = static_cast<int64_t>(std::distance(values_begin, values_end));
0275     auto data = mutable_data() + length();
0276     bytes_builder_.UnsafeAdvance(num_elements * sizeof(T));
0277     std::copy(values_begin, values_end, data);
0278   }
0279 
0280   void UnsafeAppend(const int64_t num_copies, T value) {
0281     auto data = mutable_data() + length();
0282     bytes_builder_.UnsafeAdvance(num_copies * sizeof(T));
0283     std::fill(data, data + num_copies, value);
0284   }
0285 
0286   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
0287     return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
0288   }
0289 
0290   Status Reserve(const int64_t additional_elements) {
0291     return bytes_builder_.Reserve(additional_elements * sizeof(T));
0292   }
0293 
0294   Status Advance(const int64_t length) {
0295     return bytes_builder_.Advance(length * sizeof(T));
0296   }
0297 
0298   Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
0299     return bytes_builder_.Finish(out, shrink_to_fit);
0300   }
0301 
0302   Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
0303     std::shared_ptr<Buffer> out;
0304     ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
0305     return out;
0306   }
0307 
0308   /// \brief Like Finish, but override the final buffer size
0309   ///
0310   /// This is useful after writing data directly into the builder memory
0311   /// without calling the Append methods (basically, when using TypedBufferBuilder
0312   /// only for memory allocation).
0313   Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
0314                                                    bool shrink_to_fit = true) {
0315     return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
0316   }
0317 
0318   void Reset() { bytes_builder_.Reset(); }
0319 
0320   int64_t length() const { return bytes_builder_.length() / sizeof(T); }
0321   int64_t capacity() const { return bytes_builder_.capacity() / sizeof(T); }
0322   const T* data() const { return reinterpret_cast<const T*>(bytes_builder_.data()); }
0323   T* mutable_data() { return reinterpret_cast<T*>(bytes_builder_.mutable_data()); }
0324 
0325  private:
0326   BufferBuilder bytes_builder_;
0327 };
0328 
0329 /// \brief A BufferBuilder for building a buffer containing a bitmap
0330 template <>
0331 class TypedBufferBuilder<bool> {
0332  public:
0333   explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool(),
0334                               int64_t alignment = kDefaultBufferAlignment)
0335       : bytes_builder_(pool, alignment) {}
0336 
0337   explicit TypedBufferBuilder(BufferBuilder builder)
0338       : bytes_builder_(std::move(builder)) {}
0339 
0340   BufferBuilder* bytes_builder() { return &bytes_builder_; }
0341 
0342   Status Append(bool value) {
0343     ARROW_RETURN_NOT_OK(Reserve(1));
0344     UnsafeAppend(value);
0345     return Status::OK();
0346   }
0347 
0348   Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
0349     ARROW_RETURN_NOT_OK(Reserve(num_elements));
0350     UnsafeAppend(valid_bytes, num_elements);
0351     return Status::OK();
0352   }
0353 
0354   Status Append(const int64_t num_copies, bool value) {
0355     ARROW_RETURN_NOT_OK(Reserve(num_copies));
0356     UnsafeAppend(num_copies, value);
0357     return Status::OK();
0358   }
0359 
0360   void UnsafeAppend(bool value) {
0361     bit_util::SetBitTo(mutable_data(), bit_length_, value);
0362     if (!value) {
0363       ++false_count_;
0364     }
0365     ++bit_length_;
0366   }
0367 
0368   /// \brief Append bits from an array of bytes (one value per byte)
0369   void UnsafeAppend(const uint8_t* bytes, int64_t num_elements) {
0370     if (num_elements == 0) return;
0371     int64_t i = 0;
0372     internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
0373       bool value = bytes[i++];
0374       false_count_ += !value;
0375       return value;
0376     });
0377     bit_length_ += num_elements;
0378   }
0379 
0380   /// \brief Append bits from a packed bitmap
0381   void UnsafeAppend(const uint8_t* bitmap, int64_t offset, int64_t num_elements) {
0382     if (num_elements == 0) return;
0383     internal::CopyBitmap(bitmap, offset, num_elements, mutable_data(), bit_length_);
0384     false_count_ += num_elements - internal::CountSetBits(bitmap, offset, num_elements);
0385     bit_length_ += num_elements;
0386   }
0387 
0388   void UnsafeAppend(const int64_t num_copies, bool value) {
0389     bit_util::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
0390     false_count_ += num_copies * !value;
0391     bit_length_ += num_copies;
0392   }
0393 
0394   template <bool count_falses, typename Generator>
0395   void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
0396     if (num_elements == 0) return;
0397 
0398     if (count_falses) {
0399       internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
0400         bool value = gen();
0401         false_count_ += !value;
0402         return value;
0403       });
0404     } else {
0405       internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
0406                                      std::forward<Generator>(gen));
0407     }
0408     bit_length_ += num_elements;
0409   }
0410 
0411   Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
0412     const int64_t old_byte_capacity = bytes_builder_.capacity();
0413     ARROW_RETURN_NOT_OK(
0414         bytes_builder_.Resize(bit_util::BytesForBits(new_capacity), shrink_to_fit));
0415     // Resize() may have chosen a larger capacity (e.g. for padding),
0416     // so ask it again before calling memset().
0417     const int64_t new_byte_capacity = bytes_builder_.capacity();
0418     if (new_byte_capacity > old_byte_capacity) {
0419       // The additional buffer space is 0-initialized for convenience,
0420       // so that other methods can simply bump the length.
0421       memset(mutable_data() + old_byte_capacity, 0,
0422              static_cast<size_t>(new_byte_capacity - old_byte_capacity));
0423     }
0424     return Status::OK();
0425   }
0426 
0427   Status Reserve(const int64_t additional_elements) {
0428     return Resize(
0429         BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
0430         false);
0431   }
0432 
0433   Status Advance(const int64_t length) {
0434     ARROW_RETURN_NOT_OK(Reserve(length));
0435     bit_length_ += length;
0436     false_count_ += length;
0437     return Status::OK();
0438   }
0439 
0440   Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
0441     // set bytes_builder_.size_ == byte size of data
0442     bytes_builder_.UnsafeAdvance(bit_util::BytesForBits(bit_length_) -
0443                                  bytes_builder_.length());
0444     bit_length_ = false_count_ = 0;
0445     return bytes_builder_.Finish(out, shrink_to_fit);
0446   }
0447 
0448   Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
0449     std::shared_ptr<Buffer> out;
0450     ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
0451     return out;
0452   }
0453 
0454   /// \brief Like Finish, but override the final buffer size
0455   ///
0456   /// This is useful after writing data directly into the builder memory
0457   /// without calling the Append methods (basically, when using TypedBufferBuilder
0458   /// only for memory allocation).
0459   Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
0460                                                    bool shrink_to_fit = true) {
0461     const auto final_byte_length = bit_util::BytesForBits(final_length);
0462     bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length());
0463     bit_length_ = false_count_ = 0;
0464     return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
0465   }
0466 
0467   void Reset() {
0468     bytes_builder_.Reset();
0469     bit_length_ = false_count_ = 0;
0470   }
0471 
0472   int64_t length() const { return bit_length_; }
0473   int64_t capacity() const { return bytes_builder_.capacity() * 8; }
0474   const uint8_t* data() const { return bytes_builder_.data(); }
0475   uint8_t* mutable_data() { return bytes_builder_.mutable_data(); }
0476   int64_t false_count() const { return false_count_; }
0477 
0478  private:
0479   BufferBuilder bytes_builder_;
0480   int64_t bit_length_ = 0;
0481   int64_t false_count_ = 0;
0482 };
0483 
0484 }  // namespace arrow