File indexing completed on 2025-08-27 08:47:18
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <algorithm>
0021 #include <cstdint>
0022 #include <cstring>
0023 #include <memory>
0024 #include <string>
0025 #include <utility>
0026
0027 #include "arrow/buffer.h"
0028 #include "arrow/status.h"
0029 #include "arrow/util/bit_util.h"
0030 #include "arrow/util/bitmap_generate.h"
0031 #include "arrow/util/bitmap_ops.h"
0032 #include "arrow/util/macros.h"
0033 #include "arrow/util/ubsan.h"
0034 #include "arrow/util/visibility.h"
0035
0036 namespace arrow {
0037
0038
0039
0040
0041
0042
0043
0044 class ARROW_EXPORT BufferBuilder {
0045 public:
0046 explicit BufferBuilder(MemoryPool* pool = default_memory_pool(),
0047 int64_t alignment = kDefaultBufferAlignment)
0048 : pool_(pool),
0049 data_(
0050 util::MakeNonNull<uint8_t>()),
0051 capacity_(0),
0052 size_(0),
0053 alignment_(alignment) {}
0054
0055
0056
0057
0058 explicit BufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
0059 MemoryPool* pool = default_memory_pool(),
0060 int64_t alignment = kDefaultBufferAlignment)
0061 : buffer_(std::move(buffer)),
0062 pool_(pool),
0063 data_(buffer_->mutable_data()),
0064 capacity_(buffer_->capacity()),
0065 size_(buffer_->size()),
0066 alignment_(alignment) {}
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076 Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
0077 if (buffer_ == NULLPTR) {
0078 ARROW_ASSIGN_OR_RAISE(buffer_,
0079 AllocateResizableBuffer(new_capacity, alignment_, pool_));
0080 } else {
0081 ARROW_RETURN_NOT_OK(buffer_->Resize(new_capacity, shrink_to_fit));
0082 }
0083 capacity_ = buffer_->capacity();
0084 data_ = buffer_->mutable_data();
0085 return Status::OK();
0086 }
0087
0088
0089
0090
0091
0092
0093 Status Reserve(const int64_t additional_bytes) {
0094 auto min_capacity = size_ + additional_bytes;
0095 if (min_capacity <= capacity_) {
0096 return Status::OK();
0097 }
0098 return Resize(GrowByFactor(capacity_, min_capacity), false);
0099 }
0100
0101
0102 static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
0103
0104
0105
0106
0107 return std::max(new_capacity, current_capacity * 2);
0108 }
0109
0110
0111
0112
0113 Status Append(const void* data, const int64_t length) {
0114 if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) {
0115 ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false));
0116 }
0117 UnsafeAppend(data, length);
0118 return Status::OK();
0119 }
0120
0121
0122
0123
0124 Status Append(std::string_view v) { return Append(v.data(), v.size()); }
0125
0126
0127
0128
0129 Status Append(const int64_t num_copies, uint8_t value) {
0130 ARROW_RETURN_NOT_OK(Reserve(num_copies));
0131 UnsafeAppend(num_copies, value);
0132 return Status::OK();
0133 }
0134
0135
0136 Status Advance(const int64_t length) { return Append(length, 0); }
0137
0138
0139 void UnsafeAdvance(const int64_t length) { size_ += length; }
0140
0141
0142 void UnsafeAppend(const void* data, const int64_t length) {
0143 memcpy(data_ + size_, data, static_cast<size_t>(length));
0144 size_ += length;
0145 }
0146
0147 void UnsafeAppend(std::string_view v) {
0148 UnsafeAppend(v.data(), static_cast<int64_t>(v.size()));
0149 }
0150
0151 void UnsafeAppend(const int64_t num_copies, uint8_t value) {
0152 memset(data_ + size_, value, static_cast<size_t>(num_copies));
0153 size_ += num_copies;
0154 }
0155
0156
0157
0158
0159
0160
0161
0162
0163
0164
0165 Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
0166 ARROW_RETURN_NOT_OK(Resize(size_, shrink_to_fit));
0167 if (size_ != 0) buffer_->ZeroPadding();
0168 *out = buffer_;
0169 if (*out == NULLPTR) {
0170 ARROW_ASSIGN_OR_RAISE(*out, AllocateBuffer(0, alignment_, pool_));
0171 }
0172 Reset();
0173 return Status::OK();
0174 }
0175
0176 Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
0177 std::shared_ptr<Buffer> out;
0178 ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
0179 return out;
0180 }
0181
0182
0183
0184
0185
0186
0187 Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
0188 bool shrink_to_fit = true) {
0189 size_ = final_length;
0190 return Finish(shrink_to_fit);
0191 }
0192
0193 void Reset() {
0194 buffer_ = NULLPTR;
0195 capacity_ = size_ = 0;
0196 }
0197
0198
0199
0200
0201
0202 void Rewind(int64_t position) { size_ = position; }
0203
0204 int64_t capacity() const { return capacity_; }
0205 int64_t length() const { return size_; }
0206 const uint8_t* data() const { return data_; }
0207 uint8_t* mutable_data() { return data_; }
0208 template <typename T>
0209 const T* data_as() const {
0210 return reinterpret_cast<const T*>(data_);
0211 }
0212 template <typename T>
0213 T* mutable_data_as() {
0214 return reinterpret_cast<T*>(data_);
0215 }
0216
0217 private:
0218 std::shared_ptr<ResizableBuffer> buffer_;
0219 MemoryPool* pool_;
0220 uint8_t* data_;
0221 int64_t capacity_;
0222 int64_t size_;
0223 int64_t alignment_;
0224 };
0225
0226 template <typename T, typename Enable = void>
0227 class TypedBufferBuilder;
0228
0229
0230 template <typename T>
0231 class TypedBufferBuilder<
0232 T, typename std::enable_if<std::is_arithmetic<T>::value ||
0233 std::is_standard_layout<T>::value>::type> {
0234 public:
0235 explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool(),
0236 int64_t alignment = kDefaultBufferAlignment)
0237 : bytes_builder_(pool, alignment) {}
0238
0239 explicit TypedBufferBuilder(std::shared_ptr<ResizableBuffer> buffer,
0240 MemoryPool* pool = default_memory_pool())
0241 : bytes_builder_(std::move(buffer), pool) {}
0242
0243 explicit TypedBufferBuilder(BufferBuilder builder)
0244 : bytes_builder_(std::move(builder)) {}
0245
0246 BufferBuilder* bytes_builder() { return &bytes_builder_; }
0247
0248 Status Append(T value) {
0249 return bytes_builder_.Append(reinterpret_cast<uint8_t*>(&value), sizeof(T));
0250 }
0251
0252 Status Append(const T* values, int64_t num_elements) {
0253 return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values),
0254 num_elements * sizeof(T));
0255 }
0256
0257 Status Append(const int64_t num_copies, T value) {
0258 ARROW_RETURN_NOT_OK(Reserve(num_copies + length()));
0259 UnsafeAppend(num_copies, value);
0260 return Status::OK();
0261 }
0262
0263 void UnsafeAppend(T value) {
0264 bytes_builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&value), sizeof(T));
0265 }
0266
0267 void UnsafeAppend(const T* values, int64_t num_elements) {
0268 bytes_builder_.UnsafeAppend(reinterpret_cast<const uint8_t*>(values),
0269 num_elements * sizeof(T));
0270 }
0271
0272 template <typename Iter>
0273 void UnsafeAppend(Iter values_begin, Iter values_end) {
0274 auto num_elements = static_cast<int64_t>(std::distance(values_begin, values_end));
0275 auto data = mutable_data() + length();
0276 bytes_builder_.UnsafeAdvance(num_elements * sizeof(T));
0277 std::copy(values_begin, values_end, data);
0278 }
0279
0280 void UnsafeAppend(const int64_t num_copies, T value) {
0281 auto data = mutable_data() + length();
0282 bytes_builder_.UnsafeAdvance(num_copies * sizeof(T));
0283 std::fill(data, data + num_copies, value);
0284 }
0285
0286 Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
0287 return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit);
0288 }
0289
0290 Status Reserve(const int64_t additional_elements) {
0291 return bytes_builder_.Reserve(additional_elements * sizeof(T));
0292 }
0293
0294 Status Advance(const int64_t length) {
0295 return bytes_builder_.Advance(length * sizeof(T));
0296 }
0297
0298 Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
0299 return bytes_builder_.Finish(out, shrink_to_fit);
0300 }
0301
0302 Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
0303 std::shared_ptr<Buffer> out;
0304 ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
0305 return out;
0306 }
0307
0308
0309
0310
0311
0312
0313 Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
0314 bool shrink_to_fit = true) {
0315 return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit);
0316 }
0317
0318 void Reset() { bytes_builder_.Reset(); }
0319
0320 int64_t length() const { return bytes_builder_.length() / sizeof(T); }
0321 int64_t capacity() const { return bytes_builder_.capacity() / sizeof(T); }
0322 const T* data() const { return reinterpret_cast<const T*>(bytes_builder_.data()); }
0323 T* mutable_data() { return reinterpret_cast<T*>(bytes_builder_.mutable_data()); }
0324
0325 private:
0326 BufferBuilder bytes_builder_;
0327 };
0328
0329
0330 template <>
0331 class TypedBufferBuilder<bool> {
0332 public:
0333 explicit TypedBufferBuilder(MemoryPool* pool = default_memory_pool(),
0334 int64_t alignment = kDefaultBufferAlignment)
0335 : bytes_builder_(pool, alignment) {}
0336
0337 explicit TypedBufferBuilder(BufferBuilder builder)
0338 : bytes_builder_(std::move(builder)) {}
0339
0340 BufferBuilder* bytes_builder() { return &bytes_builder_; }
0341
0342 Status Append(bool value) {
0343 ARROW_RETURN_NOT_OK(Reserve(1));
0344 UnsafeAppend(value);
0345 return Status::OK();
0346 }
0347
0348 Status Append(const uint8_t* valid_bytes, int64_t num_elements) {
0349 ARROW_RETURN_NOT_OK(Reserve(num_elements));
0350 UnsafeAppend(valid_bytes, num_elements);
0351 return Status::OK();
0352 }
0353
0354 Status Append(const int64_t num_copies, bool value) {
0355 ARROW_RETURN_NOT_OK(Reserve(num_copies));
0356 UnsafeAppend(num_copies, value);
0357 return Status::OK();
0358 }
0359
0360 void UnsafeAppend(bool value) {
0361 bit_util::SetBitTo(mutable_data(), bit_length_, value);
0362 if (!value) {
0363 ++false_count_;
0364 }
0365 ++bit_length_;
0366 }
0367
0368
0369 void UnsafeAppend(const uint8_t* bytes, int64_t num_elements) {
0370 if (num_elements == 0) return;
0371 int64_t i = 0;
0372 internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
0373 bool value = bytes[i++];
0374 false_count_ += !value;
0375 return value;
0376 });
0377 bit_length_ += num_elements;
0378 }
0379
0380
0381 void UnsafeAppend(const uint8_t* bitmap, int64_t offset, int64_t num_elements) {
0382 if (num_elements == 0) return;
0383 internal::CopyBitmap(bitmap, offset, num_elements, mutable_data(), bit_length_);
0384 false_count_ += num_elements - internal::CountSetBits(bitmap, offset, num_elements);
0385 bit_length_ += num_elements;
0386 }
0387
0388 void UnsafeAppend(const int64_t num_copies, bool value) {
0389 bit_util::SetBitsTo(mutable_data(), bit_length_, num_copies, value);
0390 false_count_ += num_copies * !value;
0391 bit_length_ += num_copies;
0392 }
0393
0394 template <bool count_falses, typename Generator>
0395 void UnsafeAppend(const int64_t num_elements, Generator&& gen) {
0396 if (num_elements == 0) return;
0397
0398 if (count_falses) {
0399 internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements, [&] {
0400 bool value = gen();
0401 false_count_ += !value;
0402 return value;
0403 });
0404 } else {
0405 internal::GenerateBitsUnrolled(mutable_data(), bit_length_, num_elements,
0406 std::forward<Generator>(gen));
0407 }
0408 bit_length_ += num_elements;
0409 }
0410
0411 Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) {
0412 const int64_t old_byte_capacity = bytes_builder_.capacity();
0413 ARROW_RETURN_NOT_OK(
0414 bytes_builder_.Resize(bit_util::BytesForBits(new_capacity), shrink_to_fit));
0415
0416
0417 const int64_t new_byte_capacity = bytes_builder_.capacity();
0418 if (new_byte_capacity > old_byte_capacity) {
0419
0420
0421 memset(mutable_data() + old_byte_capacity, 0,
0422 static_cast<size_t>(new_byte_capacity - old_byte_capacity));
0423 }
0424 return Status::OK();
0425 }
0426
0427 Status Reserve(const int64_t additional_elements) {
0428 return Resize(
0429 BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements),
0430 false);
0431 }
0432
0433 Status Advance(const int64_t length) {
0434 ARROW_RETURN_NOT_OK(Reserve(length));
0435 bit_length_ += length;
0436 false_count_ += length;
0437 return Status::OK();
0438 }
0439
0440 Status Finish(std::shared_ptr<Buffer>* out, bool shrink_to_fit = true) {
0441
0442 bytes_builder_.UnsafeAdvance(bit_util::BytesForBits(bit_length_) -
0443 bytes_builder_.length());
0444 bit_length_ = false_count_ = 0;
0445 return bytes_builder_.Finish(out, shrink_to_fit);
0446 }
0447
0448 Result<std::shared_ptr<Buffer>> Finish(bool shrink_to_fit = true) {
0449 std::shared_ptr<Buffer> out;
0450 ARROW_RETURN_NOT_OK(Finish(&out, shrink_to_fit));
0451 return out;
0452 }
0453
0454
0455
0456
0457
0458
0459 Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length,
0460 bool shrink_to_fit = true) {
0461 const auto final_byte_length = bit_util::BytesForBits(final_length);
0462 bytes_builder_.UnsafeAdvance(final_byte_length - bytes_builder_.length());
0463 bit_length_ = false_count_ = 0;
0464 return bytes_builder_.FinishWithLength(final_byte_length, shrink_to_fit);
0465 }
0466
0467 void Reset() {
0468 bytes_builder_.Reset();
0469 bit_length_ = false_count_ = 0;
0470 }
0471
0472 int64_t length() const { return bit_length_; }
0473 int64_t capacity() const { return bytes_builder_.capacity() * 8; }
0474 const uint8_t* data() const { return bytes_builder_.data(); }
0475 uint8_t* mutable_data() { return bytes_builder_.mutable_data(); }
0476 int64_t false_count() const { return false_count_; }
0477
0478 private:
0479 BufferBuilder bytes_builder_;
0480 int64_t bit_length_ = 0;
0481 int64_t false_count_ = 0;
0482 };
0483
0484 }