File indexing completed on 2025-08-28 08:26:54
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <algorithm> // IWYU pragma: keep
0021 #include <cstdint>
0022 #include <limits>
0023 #include <memory>
0024 #include <utility>
0025 #include <vector>
0026
0027 #include "arrow/array/array_base.h"
0028 #include "arrow/array/array_primitive.h"
0029 #include "arrow/buffer.h"
0030 #include "arrow/buffer_builder.h"
0031 #include "arrow/result.h"
0032 #include "arrow/status.h"
0033 #include "arrow/type_fwd.h"
0034 #include "arrow/util/macros.h"
0035 #include "arrow/util/visibility.h"
0036
0037 namespace arrow {
0038
0039 namespace internal {
0040
0041 template <class Builder, class V>
0042 class ArrayBuilderExtraOps {
0043 public:
0044
0045 Status AppendOrNull(const std::optional<V>& value) {
0046 auto* self = static_cast<Builder*>(this);
0047 return value.has_value() ? self->Append(*value) : self->AppendNull();
0048 }
0049
0050
0051
0052
0053 void UnsafeAppendOrNull(const std::optional<V>& value) {
0054 auto* self = static_cast<Builder*>(this);
0055 return value.has_value() ? self->UnsafeAppend(*value) : self->UnsafeAppendNull();
0056 }
0057 };
0058
0059 }
0060
0061
0062
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072
0073
0074
0075
0076
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086 constexpr int64_t kMinBuilderCapacity = 1 << 5;
0087 constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097 class ARROW_EXPORT ArrayBuilder {
0098 public:
0099 explicit ArrayBuilder(MemoryPool* pool, int64_t alignment = kDefaultBufferAlignment)
0100 : pool_(pool), alignment_(alignment), null_bitmap_builder_(pool, alignment) {}
0101
0102 ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
0103
0104 virtual ~ArrayBuilder() = default;
0105
0106
0107
0108 ArrayBuilder* child(int i) { return children_[i].get(); }
0109
0110 const std::shared_ptr<ArrayBuilder>& child_builder(int i) const { return children_[i]; }
0111
0112 int num_children() const { return static_cast<int>(children_.size()); }
0113
0114 virtual int64_t length() const { return length_; }
0115 int64_t null_count() const { return null_count_; }
0116 int64_t capacity() const { return capacity_; }
0117
0118
0119
0120
0121
0122
0123
0124
0125
0126
0127 virtual Status Resize(int64_t capacity);
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138 Status Reserve(int64_t additional_capacity) {
0139 auto current_capacity = capacity();
0140 auto min_capacity = length() + additional_capacity;
0141 if (min_capacity <= current_capacity) return Status::OK();
0142
0143
0144 auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
0145 return Resize(new_capacity);
0146 }
0147
0148
0149 virtual void Reset();
0150
0151
0152 virtual Status AppendNull() = 0;
0153
0154 virtual Status AppendNulls(int64_t length) = 0;
0155
0156
0157
0158
0159
0160
0161 virtual Status AppendEmptyValue() = 0;
0162
0163
0164
0165
0166
0167
0168 virtual Status AppendEmptyValues(int64_t length) = 0;
0169
0170
0171 Status AppendScalar(const Scalar& scalar) { return AppendScalar(scalar, 1); }
0172 virtual Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
0173 virtual Status AppendScalars(const ScalarVector& scalars);
0174
0175
0176
0177
0178 virtual Status AppendArraySlice(const ArraySpan& ARROW_ARG_UNUSED(array),
0179 int64_t ARROW_ARG_UNUSED(offset),
0180 int64_t ARROW_ARG_UNUSED(length)) {
0181 return Status::NotImplemented("AppendArraySlice for builder for ", *type());
0182 }
0183
0184
0185
0186
0187
0188
0189 virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
0190
0191
0192
0193
0194
0195
0196
0197 Status Finish(std::shared_ptr<Array>* out);
0198
0199
0200
0201
0202
0203
0204 Result<std::shared_ptr<Array>> Finish();
0205
0206
0207 virtual std::shared_ptr<DataType> type() const = 0;
0208
0209 protected:
0210
0211 Status AppendToBitmap(bool is_valid);
0212
0213
0214
0215 Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
0216
0217
0218 Status AppendToBitmap(int64_t num_bits, bool value);
0219
0220
0221 Status SetNotNull(int64_t length);
0222
0223
0224
0225 void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
0226
0227
0228 void UnsafeAppendToBitmap(bool is_valid) {
0229 null_bitmap_builder_.UnsafeAppend(is_valid);
0230 ++length_;
0231 if (!is_valid) ++null_count_;
0232 }
0233
0234
0235
0236 void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
0237 if (valid_bytes == NULLPTR) {
0238 return UnsafeSetNotNull(length);
0239 }
0240 null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
0241 length_ += length;
0242 null_count_ = null_bitmap_builder_.false_count();
0243 }
0244
0245
0246
0247 void UnsafeAppendToBitmap(const uint8_t* bitmap, int64_t offset, int64_t length) {
0248 if (bitmap == NULLPTR) {
0249 return UnsafeSetNotNull(length);
0250 }
0251 null_bitmap_builder_.UnsafeAppend(bitmap, offset, length);
0252 length_ += length;
0253 null_count_ = null_bitmap_builder_.false_count();
0254 }
0255
0256
0257 void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
0258 if (value) {
0259 UnsafeSetNotNull(num_bits);
0260 } else {
0261 UnsafeSetNull(num_bits);
0262 }
0263 }
0264
0265 void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
0266
0267
0268 void UnsafeSetNotNull(int64_t length);
0269
0270
0271 void UnsafeSetNull(int64_t length);
0272
0273 static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
0274
0275
0276 template <typename ArrayType>
0277 Status FinishTyped(std::shared_ptr<ArrayType>* out) {
0278 std::shared_ptr<Array> out_untyped;
0279 ARROW_RETURN_NOT_OK(Finish(&out_untyped));
0280 *out = std::static_pointer_cast<ArrayType>(std::move(out_untyped));
0281 return Status::OK();
0282 }
0283
0284
0285 Status CheckCapacity(int64_t new_capacity) {
0286 if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
0287 return Status::Invalid(
0288 "Resize capacity must be positive (requested: ", new_capacity, ")");
0289 }
0290
0291 if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
0292 return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
0293 ", current length: ", length_, ")");
0294 }
0295
0296 return Status::OK();
0297 }
0298
0299
0300 Status CheckArrayType(const std::shared_ptr<DataType>& expected_type,
0301 const Array& array, const char* message);
0302 Status CheckArrayType(Type::type expected_type, const Array& array,
0303 const char* message);
0304
0305 MemoryPool* pool_;
0306 int64_t alignment_;
0307
0308 TypedBufferBuilder<bool> null_bitmap_builder_;
0309 int64_t null_count_ = 0;
0310
0311
0312 int64_t length_ = 0;
0313 int64_t capacity_ = 0;
0314
0315
0316 std::vector<std::shared_ptr<ArrayBuilder>> children_;
0317
0318 private:
0319 ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
0320 };
0321
0322
0323
0324
0325
0326
0327 ARROW_EXPORT
0328 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
0329 std::unique_ptr<ArrayBuilder>* out);
0330
0331 inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilder(
0332 const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
0333 std::unique_ptr<ArrayBuilder> out;
0334 ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &out));
0335 return out;
0336 }
0337
0338
0339
0340
0341 ARROW_EXPORT
0342 Status MakeBuilderExactIndex(MemoryPool* pool, const std::shared_ptr<DataType>& type,
0343 std::unique_ptr<ArrayBuilder>* out);
0344
0345 inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilderExactIndex(
0346 const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
0347 std::unique_ptr<ArrayBuilder> out;
0348 ARROW_RETURN_NOT_OK(MakeBuilderExactIndex(pool, type, &out));
0349 return out;
0350 }
0351
0352
0353
0354
0355
0356
0357
0358 ARROW_EXPORT
0359 Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
0360 const std::shared_ptr<Array>& dictionary,
0361 std::unique_ptr<ArrayBuilder>* out);
0362
0363 inline Result<std::unique_ptr<ArrayBuilder>> MakeDictionaryBuilder(
0364 const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
0365 MemoryPool* pool = default_memory_pool()) {
0366 std::unique_ptr<ArrayBuilder> out;
0367 ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, type, dictionary, &out));
0368 return out;
0369 }
0370
0371 }