Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:54

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <algorithm>  // IWYU pragma: keep
0021 #include <cstdint>
0022 #include <limits>
0023 #include <memory>
0024 #include <utility>
0025 #include <vector>
0026 
0027 #include "arrow/array/array_base.h"
0028 #include "arrow/array/array_primitive.h"
0029 #include "arrow/buffer.h"
0030 #include "arrow/buffer_builder.h"
0031 #include "arrow/result.h"
0032 #include "arrow/status.h"
0033 #include "arrow/type_fwd.h"
0034 #include "arrow/util/macros.h"
0035 #include "arrow/util/visibility.h"
0036 
0037 namespace arrow {
0038 
0039 namespace internal {
0040 
0041 template <class Builder, class V>
0042 class ArrayBuilderExtraOps {
0043  public:
0044   /// \brief Append a value from an optional or null if it has no value.
0045   Status AppendOrNull(const std::optional<V>& value) {
0046     auto* self = static_cast<Builder*>(this);
0047     return value.has_value() ? self->Append(*value) : self->AppendNull();
0048   }
0049 
0050   /// \brief Append a value from an optional or null if it has no value.
0051   ///
0052   /// Unsafe methods don't check existing size.
0053   void UnsafeAppendOrNull(const std::optional<V>& value) {
0054     auto* self = static_cast<Builder*>(this);
0055     return value.has_value() ? self->UnsafeAppend(*value) : self->UnsafeAppendNull();
0056   }
0057 };
0058 
0059 }  // namespace internal
0060 
0061 /// \defgroup numeric-builders Concrete builder subclasses for numeric types
0062 /// @{
0063 /// @}
0064 
0065 /// \defgroup temporal-builders Concrete builder subclasses for temporal types
0066 /// @{
0067 /// @}
0068 
0069 /// \defgroup binary-builders Concrete builder subclasses for binary types
0070 /// @{
0071 /// @}
0072 
0073 /// \defgroup nested-builders Concrete builder subclasses for nested types
0074 /// @{
0075 /// @}
0076 
0077 /// \defgroup dictionary-builders Concrete builder subclasses for dictionary types
0078 /// @{
0079 /// @}
0080 
0081 /// \defgroup run-end-encoded-builders Concrete builder subclasses for run-end encoded
0082 /// arrays
0083 /// @{
0084 /// @}
0085 
0086 constexpr int64_t kMinBuilderCapacity = 1 << 5;
0087 constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
0088 
0089 /// Base class for all data array builders.
0090 ///
0091 /// This class provides a facilities for incrementally building the null bitmap
0092 /// (see Append methods) and as a side effect the current number of slots and
0093 /// the null count.
0094 ///
0095 /// \note Users are expected to use builders as one of the concrete types below.
0096 /// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
0097 class ARROW_EXPORT ArrayBuilder {
0098  public:
0099   explicit ArrayBuilder(MemoryPool* pool, int64_t alignment = kDefaultBufferAlignment)
0100       : pool_(pool), alignment_(alignment), null_bitmap_builder_(pool, alignment) {}
0101 
0102   ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);
0103 
0104   virtual ~ArrayBuilder() = default;
0105 
0106   /// For nested types. Since the objects are owned by this class instance, we
0107   /// skip shared pointers and just return a raw pointer
0108   ArrayBuilder* child(int i) { return children_[i].get(); }
0109 
0110   const std::shared_ptr<ArrayBuilder>& child_builder(int i) const { return children_[i]; }
0111 
0112   int num_children() const { return static_cast<int>(children_.size()); }
0113 
0114   virtual int64_t length() const { return length_; }
0115   int64_t null_count() const { return null_count_; }
0116   int64_t capacity() const { return capacity_; }
0117 
0118   /// \brief Ensure that enough memory has been allocated to fit the indicated
0119   /// number of total elements in the builder, including any that have already
0120   /// been appended. Does not account for reallocations that may be due to
0121   /// variable size data, like binary values. To make space for incremental
0122   /// appends, use Reserve instead.
0123   ///
0124   /// \param[in] capacity the minimum number of total array values to
0125   ///            accommodate. Must be greater than the current capacity.
0126   /// \return Status
0127   virtual Status Resize(int64_t capacity);
0128 
0129   /// \brief Ensure that there is enough space allocated to append the indicated
0130   /// number of elements without any further reallocation. Overallocation is
0131   /// used in order to minimize the impact of incremental Reserve() calls.
0132   /// Note that additional_capacity is relative to the current number of elements
0133   /// rather than to the current capacity, so calls to Reserve() which are not
0134   /// interspersed with addition of new elements may not increase the capacity.
0135   ///
0136   /// \param[in] additional_capacity the number of additional array values
0137   /// \return Status
0138   Status Reserve(int64_t additional_capacity) {
0139     auto current_capacity = capacity();
0140     auto min_capacity = length() + additional_capacity;
0141     if (min_capacity <= current_capacity) return Status::OK();
0142 
0143     // leave growth factor up to BufferBuilder
0144     auto new_capacity = BufferBuilder::GrowByFactor(current_capacity, min_capacity);
0145     return Resize(new_capacity);
0146   }
0147 
0148   /// Reset the builder.
0149   virtual void Reset();
0150 
0151   /// \brief Append a null value to builder
0152   virtual Status AppendNull() = 0;
0153   /// \brief Append a number of null values to builder
0154   virtual Status AppendNulls(int64_t length) = 0;
0155 
0156   /// \brief Append a non-null value to builder
0157   ///
0158   /// The appended value is an implementation detail, but the corresponding
0159   /// memory slot is guaranteed to be initialized.
0160   /// This method is useful when appending a null value to a parent nested type.
0161   virtual Status AppendEmptyValue() = 0;
0162 
0163   /// \brief Append a number of non-null values to builder
0164   ///
0165   /// The appended values are an implementation detail, but the corresponding
0166   /// memory slot is guaranteed to be initialized.
0167   /// This method is useful when appending null values to a parent nested type.
0168   virtual Status AppendEmptyValues(int64_t length) = 0;
0169 
0170   /// \brief Append a value from a scalar
0171   Status AppendScalar(const Scalar& scalar) { return AppendScalar(scalar, 1); }
0172   virtual Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
0173   virtual Status AppendScalars(const ScalarVector& scalars);
0174 
0175   /// \brief Append a range of values from an array.
0176   ///
0177   /// The given array must be the same type as the builder.
0178   virtual Status AppendArraySlice(const ArraySpan& ARROW_ARG_UNUSED(array),
0179                                   int64_t ARROW_ARG_UNUSED(offset),
0180                                   int64_t ARROW_ARG_UNUSED(length)) {
0181     return Status::NotImplemented("AppendArraySlice for builder for ", *type());
0182   }
0183 
0184   /// \brief Return result of builder as an internal generic ArrayData
0185   /// object. Resets builder except for dictionary builder
0186   ///
0187   /// \param[out] out the finalized ArrayData object
0188   /// \return Status
0189   virtual Status FinishInternal(std::shared_ptr<ArrayData>* out) = 0;
0190 
0191   /// \brief Return result of builder as an Array object.
0192   ///
0193   /// The builder is reset except for DictionaryBuilder.
0194   ///
0195   /// \param[out] out the finalized Array object
0196   /// \return Status
0197   Status Finish(std::shared_ptr<Array>* out);
0198 
0199   /// \brief Return result of builder as an Array object.
0200   ///
0201   /// The builder is reset except for DictionaryBuilder.
0202   ///
0203   /// \return The finalized Array object
0204   Result<std::shared_ptr<Array>> Finish();
0205 
0206   /// \brief Return the type of the built Array
0207   virtual std::shared_ptr<DataType> type() const = 0;
0208 
0209  protected:
0210   /// Append to null bitmap
0211   Status AppendToBitmap(bool is_valid);
0212 
0213   /// Vector append. Treat each zero byte as a null.   If valid_bytes is null
0214   /// assume all of length bits are valid.
0215   Status AppendToBitmap(const uint8_t* valid_bytes, int64_t length);
0216 
0217   /// Uniform append.  Append N times the same validity bit.
0218   Status AppendToBitmap(int64_t num_bits, bool value);
0219 
0220   /// Set the next length bits to not null (i.e. valid).
0221   Status SetNotNull(int64_t length);
0222 
0223   // Unsafe operations (don't check capacity/don't resize)
0224 
0225   void UnsafeAppendNull() { UnsafeAppendToBitmap(false); }
0226 
0227   // Append to null bitmap, update the length
0228   void UnsafeAppendToBitmap(bool is_valid) {
0229     null_bitmap_builder_.UnsafeAppend(is_valid);
0230     ++length_;
0231     if (!is_valid) ++null_count_;
0232   }
0233 
0234   // Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
0235   // assume all of length bits are valid.
0236   void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int64_t length) {
0237     if (valid_bytes == NULLPTR) {
0238       return UnsafeSetNotNull(length);
0239     }
0240     null_bitmap_builder_.UnsafeAppend(valid_bytes, length);
0241     length_ += length;
0242     null_count_ = null_bitmap_builder_.false_count();
0243   }
0244 
0245   // Vector append. Copy from a given bitmap. If bitmap is null assume
0246   // all of length bits are valid.
0247   void UnsafeAppendToBitmap(const uint8_t* bitmap, int64_t offset, int64_t length) {
0248     if (bitmap == NULLPTR) {
0249       return UnsafeSetNotNull(length);
0250     }
0251     null_bitmap_builder_.UnsafeAppend(bitmap, offset, length);
0252     length_ += length;
0253     null_count_ = null_bitmap_builder_.false_count();
0254   }
0255 
0256   // Append the same validity value a given number of times.
0257   void UnsafeAppendToBitmap(const int64_t num_bits, bool value) {
0258     if (value) {
0259       UnsafeSetNotNull(num_bits);
0260     } else {
0261       UnsafeSetNull(num_bits);
0262     }
0263   }
0264 
0265   void UnsafeAppendToBitmap(const std::vector<bool>& is_valid);
0266 
0267   // Set the next validity bits to not null (i.e. valid).
0268   void UnsafeSetNotNull(int64_t length);
0269 
0270   // Set the next validity bits to null (i.e. invalid).
0271   void UnsafeSetNull(int64_t length);
0272 
0273   static Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer);
0274 
0275   /// \brief Finish to an array of the specified ArrayType
0276   template <typename ArrayType>
0277   Status FinishTyped(std::shared_ptr<ArrayType>* out) {
0278     std::shared_ptr<Array> out_untyped;
0279     ARROW_RETURN_NOT_OK(Finish(&out_untyped));
0280     *out = std::static_pointer_cast<ArrayType>(std::move(out_untyped));
0281     return Status::OK();
0282   }
0283 
0284   // Check the requested capacity for validity
0285   Status CheckCapacity(int64_t new_capacity) {
0286     if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
0287       return Status::Invalid(
0288           "Resize capacity must be positive (requested: ", new_capacity, ")");
0289     }
0290 
0291     if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
0292       return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
0293                              ", current length: ", length_, ")");
0294     }
0295 
0296     return Status::OK();
0297   }
0298 
0299   // Check for array type
0300   Status CheckArrayType(const std::shared_ptr<DataType>& expected_type,
0301                         const Array& array, const char* message);
0302   Status CheckArrayType(Type::type expected_type, const Array& array,
0303                         const char* message);
0304 
0305   MemoryPool* pool_;
0306   int64_t alignment_;
0307 
0308   TypedBufferBuilder<bool> null_bitmap_builder_;
0309   int64_t null_count_ = 0;
0310 
0311   // Array length, so far. Also, the index of the next element to be added
0312   int64_t length_ = 0;
0313   int64_t capacity_ = 0;
0314 
0315   // Child value array builders. These are owned by this class
0316   std::vector<std::shared_ptr<ArrayBuilder>> children_;
0317 
0318  private:
0319   ARROW_DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
0320 };
0321 
0322 /// \brief Construct an empty ArrayBuilder corresponding to the data
0323 /// type
0324 /// \param[in] pool the MemoryPool to use for allocations
0325 /// \param[in] type the data type to create the builder for
0326 /// \param[out] out the created ArrayBuilder
0327 ARROW_EXPORT
0328 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
0329                    std::unique_ptr<ArrayBuilder>* out);
0330 
0331 inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilder(
0332     const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
0333   std::unique_ptr<ArrayBuilder> out;
0334   ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &out));
0335   return out;
0336 }
0337 
0338 /// \brief Construct an empty ArrayBuilder corresponding to the data
0339 /// type, where any top-level or nested dictionary builders return the
0340 /// exact index type specified by the type.
0341 ARROW_EXPORT
0342 Status MakeBuilderExactIndex(MemoryPool* pool, const std::shared_ptr<DataType>& type,
0343                              std::unique_ptr<ArrayBuilder>* out);
0344 
0345 inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilderExactIndex(
0346     const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
0347   std::unique_ptr<ArrayBuilder> out;
0348   ARROW_RETURN_NOT_OK(MakeBuilderExactIndex(pool, type, &out));
0349   return out;
0350 }
0351 
0352 /// \brief Construct an empty DictionaryBuilder initialized optionally
0353 /// with a preexisting dictionary
0354 /// \param[in] pool the MemoryPool to use for allocations
0355 /// \param[in] type the dictionary type to create the builder for
0356 /// \param[in] dictionary the initial dictionary, if any. May be nullptr
0357 /// \param[out] out the created ArrayBuilder
0358 ARROW_EXPORT
0359 Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
0360                              const std::shared_ptr<Array>& dictionary,
0361                              std::unique_ptr<ArrayBuilder>* out);
0362 
0363 inline Result<std::unique_ptr<ArrayBuilder>> MakeDictionaryBuilder(
0364     const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
0365     MemoryPool* pool = default_memory_pool()) {
0366   std::unique_ptr<ArrayBuilder> out;
0367   ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, type, dictionary, &out));
0368   return out;
0369 }
0370 
0371 }  // namespace arrow