File indexing completed on 2025-08-28 08:26:55
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <algorithm>
0021 #include <memory>
0022 #include <vector>
0023
0024 #include "arrow/array/builder_base.h"
0025 #include "arrow/array/data.h"
0026 #include "arrow/result.h"
0027 #include "arrow/type.h"
0028 #include "arrow/type_traits.h"
0029
0030 namespace arrow {
0031
0032 class ARROW_EXPORT NullBuilder : public ArrayBuilder {
0033 public:
0034 explicit NullBuilder(MemoryPool* pool = default_memory_pool(),
0035 int64_t ARROW_ARG_UNUSED(alignment) = kDefaultBufferAlignment)
0036 : ArrayBuilder(pool) {}
0037
0038 explicit NullBuilder(const std::shared_ptr<DataType>& ARROW_ARG_UNUSED(type),
0039 MemoryPool* pool = default_memory_pool(),
0040 int64_t alignment = kDefaultBufferAlignment)
0041 : NullBuilder(pool, alignment) {}
0042
0043
0044 Status AppendNulls(int64_t length) final {
0045 if (length < 0) return Status::Invalid("length must be positive");
0046 null_count_ += length;
0047 length_ += length;
0048 return Status::OK();
0049 }
0050
0051
0052 Status AppendNull() final { return AppendNulls(1); }
0053
0054 Status AppendEmptyValues(int64_t length) final { return AppendNulls(length); }
0055
0056 Status AppendEmptyValue() final { return AppendEmptyValues(1); }
0057
0058 Status Append(std::nullptr_t) { return AppendNull(); }
0059
0060 Status AppendArraySlice(const ArraySpan&, int64_t, int64_t length) override {
0061 return AppendNulls(length);
0062 }
0063
0064 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
0065
0066
0067 using ArrayBuilder::Finish;
0068
0069
0070 std::shared_ptr<DataType> type() const override { return null(); }
0071
0072 Status Finish(std::shared_ptr<NullArray>* out) { return FinishTyped(out); }
0073 };
0074
0075
0076
0077
0078
0079
0080 template <typename T>
0081 class NumericBuilder
0082 : public ArrayBuilder,
0083 public internal::ArrayBuilderExtraOps<NumericBuilder<T>, typename T::c_type> {
0084 public:
0085 using TypeClass = T;
0086 using value_type = typename T::c_type;
0087 using ArrayType = typename TypeTraits<T>::ArrayType;
0088
0089 template <typename T1 = T>
0090 explicit NumericBuilder(
0091 enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool(),
0092 int64_t alignment = kDefaultBufferAlignment)
0093 : ArrayBuilder(pool, alignment),
0094 type_(TypeTraits<T>::type_singleton()),
0095 data_builder_(pool, alignment) {}
0096
0097 NumericBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
0098 int64_t alignment = kDefaultBufferAlignment)
0099 : ArrayBuilder(pool, alignment), type_(type), data_builder_(pool, alignment) {}
0100
0101
0102 Status Append(const value_type val) {
0103 ARROW_RETURN_NOT_OK(ArrayBuilder::Reserve(1));
0104 UnsafeAppend(val);
0105 return Status::OK();
0106 }
0107
0108
0109
0110
0111 Status AppendNulls(int64_t length) final {
0112 ARROW_RETURN_NOT_OK(Reserve(length));
0113 data_builder_.UnsafeAppend(length, value_type{});
0114 UnsafeSetNull(length);
0115 return Status::OK();
0116 }
0117
0118
0119 Status AppendNull() final {
0120 ARROW_RETURN_NOT_OK(Reserve(1));
0121 data_builder_.UnsafeAppend(value_type{});
0122 UnsafeAppendToBitmap(false);
0123 return Status::OK();
0124 }
0125
0126
0127 Status AppendEmptyValue() final {
0128 ARROW_RETURN_NOT_OK(Reserve(1));
0129 data_builder_.UnsafeAppend(value_type{});
0130 UnsafeAppendToBitmap(true);
0131 return Status::OK();
0132 }
0133
0134
0135 Status AppendEmptyValues(int64_t length) final {
0136 ARROW_RETURN_NOT_OK(Reserve(length));
0137 data_builder_.UnsafeAppend(length, value_type{});
0138 UnsafeSetNotNull(length);
0139 return Status::OK();
0140 }
0141
0142 value_type GetValue(int64_t index) const { return data_builder_.data()[index]; }
0143
0144 void Reset() override {
0145 data_builder_.Reset();
0146 ArrayBuilder::Reset();
0147 }
0148
0149 Status Resize(int64_t capacity) override {
0150 ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
0151 capacity = std::max(capacity, kMinBuilderCapacity);
0152 ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
0153 return ArrayBuilder::Resize(capacity);
0154 }
0155
0156 value_type operator[](int64_t index) const { return GetValue(index); }
0157
0158 value_type& operator[](int64_t index) {
0159 return reinterpret_cast<value_type*>(data_builder_.mutable_data())[index];
0160 }
0161
0162
0163
0164
0165
0166
0167
0168 Status AppendValues(const value_type* values, int64_t length,
0169 const uint8_t* valid_bytes = NULLPTR) {
0170 ARROW_RETURN_NOT_OK(Reserve(length));
0171 data_builder_.UnsafeAppend(values, length);
0172
0173 ArrayBuilder::UnsafeAppendToBitmap(valid_bytes, length);
0174 return Status::OK();
0175 }
0176
0177
0178
0179
0180
0181
0182
0183 Status AppendValues(const value_type* values, int64_t length, const uint8_t* bitmap,
0184 int64_t bitmap_offset) {
0185 ARROW_RETURN_NOT_OK(Reserve(length));
0186 data_builder_.UnsafeAppend(values, length);
0187
0188 ArrayBuilder::UnsafeAppendToBitmap(bitmap, bitmap_offset, length);
0189 return Status::OK();
0190 }
0191
0192
0193
0194
0195
0196
0197
0198 Status AppendValues(const value_type* values, int64_t length,
0199 const std::vector<bool>& is_valid) {
0200 ARROW_RETURN_NOT_OK(Reserve(length));
0201 data_builder_.UnsafeAppend(values, length);
0202
0203 ArrayBuilder::UnsafeAppendToBitmap(is_valid);
0204 return Status::OK();
0205 }
0206
0207
0208
0209
0210
0211
0212 Status AppendValues(const std::vector<value_type>& values,
0213 const std::vector<bool>& is_valid) {
0214 if (values.empty()) {
0215 return Status::OK();
0216 }
0217 return AppendValues(values.data(), static_cast<int64_t>(values.size()), is_valid);
0218 }
0219
0220
0221
0222
0223 Status AppendValues(const std::vector<value_type>& values) {
0224 if (values.empty()) {
0225 return Status::OK();
0226 }
0227 return AppendValues(values.data(), static_cast<int64_t>(values.size()));
0228 }
0229
0230 Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
0231 ARROW_ASSIGN_OR_RAISE(auto null_bitmap,
0232 null_bitmap_builder_.FinishWithLength(length_));
0233 ARROW_ASSIGN_OR_RAISE(auto data, data_builder_.FinishWithLength(length_));
0234 *out = ArrayData::Make(type(), length_, {null_bitmap, data}, null_count_);
0235 capacity_ = length_ = null_count_ = 0;
0236 return Status::OK();
0237 }
0238
0239
0240 using ArrayBuilder::Finish;
0241
0242
0243 Status Finish(std::shared_ptr<ArrayType>* out) { return FinishTyped(out); }
0244
0245
0246
0247
0248
0249 template <typename ValuesIter>
0250 Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
0251 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
0252 ARROW_RETURN_NOT_OK(Reserve(length));
0253 data_builder_.UnsafeAppend(values_begin, values_end);
0254
0255 UnsafeSetNotNull(length);
0256 return Status::OK();
0257 }
0258
0259
0260
0261
0262
0263
0264
0265 template <typename ValuesIter, typename ValidIter>
0266 enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
0267 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
0268 static_assert(!internal::is_null_pointer<ValidIter>::value,
0269 "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
0270 "version instead");
0271 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
0272 ARROW_RETURN_NOT_OK(Reserve(length));
0273 data_builder_.UnsafeAppend(values_begin, values_end);
0274 null_bitmap_builder_.UnsafeAppend<true>(
0275 length, [&valid_begin]() -> bool { return *valid_begin++; });
0276 length_ = null_bitmap_builder_.length();
0277 null_count_ = null_bitmap_builder_.false_count();
0278 return Status::OK();
0279 }
0280
0281
0282 template <typename ValuesIter, typename ValidIter>
0283 enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
0284 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
0285 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
0286 ARROW_RETURN_NOT_OK(Reserve(length));
0287 data_builder_.UnsafeAppend(values_begin, values_end);
0288
0289 if (valid_begin == NULLPTR) {
0290 UnsafeSetNotNull(length);
0291 } else {
0292 null_bitmap_builder_.UnsafeAppend<true>(
0293 length, [&valid_begin]() -> bool { return *valid_begin++; });
0294 length_ = null_bitmap_builder_.length();
0295 null_count_ = null_bitmap_builder_.false_count();
0296 }
0297
0298 return Status::OK();
0299 }
0300
0301 Status AppendArraySlice(const ArraySpan& array, int64_t offset,
0302 int64_t length) override {
0303 return AppendValues(array.GetValues<value_type>(1) + offset, length,
0304 array.GetValues<uint8_t>(0, 0), array.offset + offset);
0305 }
0306
0307
0308
0309
0310
0311
0312 void UnsafeAppend(const value_type val) {
0313 ArrayBuilder::UnsafeAppendToBitmap(true);
0314 data_builder_.UnsafeAppend(val);
0315 }
0316
0317 void UnsafeAppendNull() {
0318 ArrayBuilder::UnsafeAppendToBitmap(false);
0319 data_builder_.UnsafeAppend(value_type{});
0320 }
0321
0322 std::shared_ptr<DataType> type() const override { return type_; }
0323
0324 protected:
0325 std::shared_ptr<DataType> type_;
0326 TypedBufferBuilder<value_type> data_builder_;
0327 };
0328
0329
0330
0331 using UInt8Builder = NumericBuilder<UInt8Type>;
0332 using UInt16Builder = NumericBuilder<UInt16Type>;
0333 using UInt32Builder = NumericBuilder<UInt32Type>;
0334 using UInt64Builder = NumericBuilder<UInt64Type>;
0335
0336 using Int8Builder = NumericBuilder<Int8Type>;
0337 using Int16Builder = NumericBuilder<Int16Type>;
0338 using Int32Builder = NumericBuilder<Int32Type>;
0339 using Int64Builder = NumericBuilder<Int64Type>;
0340
0341 using HalfFloatBuilder = NumericBuilder<HalfFloatType>;
0342 using FloatBuilder = NumericBuilder<FloatType>;
0343 using DoubleBuilder = NumericBuilder<DoubleType>;
0344
0345
0346
0347
0348
0349
0350
0351 using Date32Builder = NumericBuilder<Date32Type>;
0352 using Date64Builder = NumericBuilder<Date64Type>;
0353 using Time32Builder = NumericBuilder<Time32Type>;
0354 using Time64Builder = NumericBuilder<Time64Type>;
0355 using TimestampBuilder = NumericBuilder<TimestampType>;
0356 using MonthIntervalBuilder = NumericBuilder<MonthIntervalType>;
0357 using DurationBuilder = NumericBuilder<DurationType>;
0358
0359
0360
0361 class ARROW_EXPORT BooleanBuilder
0362 : public ArrayBuilder,
0363 public internal::ArrayBuilderExtraOps<BooleanBuilder, bool> {
0364 public:
0365 using TypeClass = BooleanType;
0366 using value_type = bool;
0367
0368 explicit BooleanBuilder(MemoryPool* pool = default_memory_pool(),
0369 int64_t alignment = kDefaultBufferAlignment);
0370
0371 BooleanBuilder(const std::shared_ptr<DataType>& type,
0372 MemoryPool* pool = default_memory_pool(),
0373 int64_t alignment = kDefaultBufferAlignment);
0374
0375
0376 Status AppendNulls(int64_t length) final {
0377 ARROW_RETURN_NOT_OK(Reserve(length));
0378 data_builder_.UnsafeAppend(length, false);
0379 UnsafeSetNull(length);
0380 return Status::OK();
0381 }
0382
0383 Status AppendNull() final {
0384 ARROW_RETURN_NOT_OK(Reserve(1));
0385 UnsafeAppendNull();
0386 return Status::OK();
0387 }
0388
0389 Status AppendEmptyValue() final {
0390 ARROW_RETURN_NOT_OK(Reserve(1));
0391 data_builder_.UnsafeAppend(false);
0392 UnsafeSetNotNull(1);
0393 return Status::OK();
0394 }
0395
0396 Status AppendEmptyValues(int64_t length) final {
0397 ARROW_RETURN_NOT_OK(Reserve(length));
0398 data_builder_.UnsafeAppend(length, false);
0399 UnsafeSetNotNull(length);
0400 return Status::OK();
0401 }
0402
0403
0404 Status Append(const bool val) {
0405 ARROW_RETURN_NOT_OK(Reserve(1));
0406 UnsafeAppend(val);
0407 return Status::OK();
0408 }
0409
0410 Status Append(const uint8_t val) { return Append(val != 0); }
0411
0412
0413 void UnsafeAppend(const bool val) {
0414 data_builder_.UnsafeAppend(val);
0415 UnsafeAppendToBitmap(true);
0416 }
0417
0418 void UnsafeAppendNull() {
0419 data_builder_.UnsafeAppend(false);
0420 UnsafeAppendToBitmap(false);
0421 }
0422
0423 void UnsafeAppend(const uint8_t val) { UnsafeAppend(val != 0); }
0424
0425
0426
0427
0428
0429
0430
0431 Status AppendValues(const uint8_t* values, int64_t length,
0432 const uint8_t* valid_bytes = NULLPTR);
0433
0434
0435
0436
0437
0438
0439
0440 Status AppendValues(const uint8_t* values, int64_t length, const uint8_t* validity,
0441 int64_t offset);
0442
0443
0444
0445
0446
0447
0448
0449 Status AppendValues(const uint8_t* values, int64_t length,
0450 const std::vector<bool>& is_valid);
0451
0452
0453
0454
0455
0456
0457 Status AppendValues(const std::vector<uint8_t>& values,
0458 const std::vector<bool>& is_valid);
0459
0460
0461
0462
0463 Status AppendValues(const std::vector<uint8_t>& values);
0464
0465
0466
0467
0468
0469
0470 Status AppendValues(const std::vector<bool>& values, const std::vector<bool>& is_valid);
0471
0472
0473
0474
0475 Status AppendValues(const std::vector<bool>& values);
0476
0477
0478
0479
0480
0481
0482 template <typename ValuesIter>
0483 Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
0484 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
0485 ARROW_RETURN_NOT_OK(Reserve(length));
0486 data_builder_.UnsafeAppend<false>(
0487 length, [&values_begin]() -> bool { return *values_begin++; });
0488
0489 UnsafeSetNotNull(length);
0490 return Status::OK();
0491 }
0492
0493
0494
0495
0496
0497
0498
0499 template <typename ValuesIter, typename ValidIter>
0500 enable_if_t<!std::is_pointer<ValidIter>::value, Status> AppendValues(
0501 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
0502 static_assert(!internal::is_null_pointer<ValidIter>::value,
0503 "Don't pass a NULLPTR directly as valid_begin, use the 2-argument "
0504 "version instead");
0505 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
0506 ARROW_RETURN_NOT_OK(Reserve(length));
0507
0508 data_builder_.UnsafeAppend<false>(
0509 length, [&values_begin]() -> bool { return *values_begin++; });
0510 null_bitmap_builder_.UnsafeAppend<true>(
0511 length, [&valid_begin]() -> bool { return *valid_begin++; });
0512 length_ = null_bitmap_builder_.length();
0513 null_count_ = null_bitmap_builder_.false_count();
0514 return Status::OK();
0515 }
0516
0517
0518 template <typename ValuesIter, typename ValidIter>
0519 enable_if_t<std::is_pointer<ValidIter>::value, Status> AppendValues(
0520 ValuesIter values_begin, ValuesIter values_end, ValidIter valid_begin) {
0521 int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
0522 ARROW_RETURN_NOT_OK(Reserve(length));
0523 data_builder_.UnsafeAppend<false>(
0524 length, [&values_begin]() -> bool { return *values_begin++; });
0525
0526 if (valid_begin == NULLPTR) {
0527 UnsafeSetNotNull(length);
0528 } else {
0529 null_bitmap_builder_.UnsafeAppend<true>(
0530 length, [&valid_begin]() -> bool { return *valid_begin++; });
0531 }
0532 length_ = null_bitmap_builder_.length();
0533 null_count_ = null_bitmap_builder_.false_count();
0534 return Status::OK();
0535 }
0536
0537 Status AppendValues(int64_t length, bool value);
0538
0539 Status AppendArraySlice(const ArraySpan& array, int64_t offset,
0540 int64_t length) override {
0541 return AppendValues(array.GetValues<uint8_t>(1, 0), length,
0542 array.GetValues<uint8_t>(0, 0), array.offset + offset);
0543 }
0544
0545 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
0546
0547
0548 using ArrayBuilder::Finish;
0549
0550
0551 Status Finish(std::shared_ptr<BooleanArray>* out) { return FinishTyped(out); }
0552
0553 void Reset() override;
0554 Status Resize(int64_t capacity) override;
0555
0556 std::shared_ptr<DataType> type() const override { return boolean(); }
0557
0558 protected:
0559 TypedBufferBuilder<bool> data_builder_;
0560 };
0561
0562 }