Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:53

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <iosfwd>
0022 #include <memory>
0023 #include <string>
0024 #include <vector>
0025 
0026 #include "arrow/array/data.h"
0027 #include "arrow/buffer.h"
0028 #include "arrow/compare.h"
0029 #include "arrow/result.h"
0030 #include "arrow/status.h"
0031 #include "arrow/type.h"
0032 #include "arrow/util/bit_util.h"
0033 #include "arrow/util/macros.h"
0034 #include "arrow/util/visibility.h"
0035 #include "arrow/visitor.h"
0036 
0037 namespace arrow {
0038 
0039 // ----------------------------------------------------------------------
0040 // User array accessor types
0041 
0042 /// \brief Array base type
0043 /// Immutable data array with some logical type and some length.
0044 ///
0045 /// Any memory is owned by the respective Buffer instance (or its parents).
0046 ///
0047 /// The base class is only required to have a null bitmap buffer if the null
0048 /// count is greater than 0
0049 ///
0050 /// If known, the null count can be provided in the base Array constructor. If
0051 /// the null count is not known, pass -1 to indicate that the null count is to
0052 /// be computed on the first call to null_count()
0053 class ARROW_EXPORT Array {
0054  public:
0055   virtual ~Array() = default;
0056 
0057   /// \brief Return true if value at index is null. Does not boundscheck
0058   bool IsNull(int64_t i) const { return !IsValid(i); }
0059 
0060   /// \brief Return true if value at index is valid (not null). Does not
0061   /// boundscheck
0062   bool IsValid(int64_t i) const {
0063     if (null_bitmap_data_ != NULLPTR) {
0064       return bit_util::GetBit(null_bitmap_data_, i + data_->offset);
0065     }
0066     // Dispatching with a few conditionals like this makes IsNull more
0067     // efficient for how it is used in practice. Making IsNull virtual
0068     // would add a vtable lookup to every call and prevent inlining +
0069     // a potential inner-branch removal.
0070     if (type_id() == Type::SPARSE_UNION) {
0071       return !internal::IsNullSparseUnion(*data_, i);
0072     }
0073     if (type_id() == Type::DENSE_UNION) {
0074       return !internal::IsNullDenseUnion(*data_, i);
0075     }
0076     if (type_id() == Type::RUN_END_ENCODED) {
0077       return !internal::IsNullRunEndEncoded(*data_, i);
0078     }
0079     return data_->null_count != data_->length;
0080   }
0081 
0082   /// \brief Return a Scalar containing the value of this array at i
0083   Result<std::shared_ptr<Scalar>> GetScalar(int64_t i) const;
0084 
0085   /// Size in the number of elements this array contains.
0086   int64_t length() const { return data_->length; }
0087 
0088   /// A relative position into another array's data, to enable zero-copy
0089   /// slicing. This value defaults to zero
0090   int64_t offset() const { return data_->offset; }
0091 
0092   /// The number of null entries in the array. If the null count was not known
0093   /// at time of construction (and set to a negative value), then the null
0094   /// count will be computed and cached on the first invocation of this
0095   /// function
0096   int64_t null_count() const;
0097 
0098   /// \brief Computes the logical null count for arrays of all types including
0099   /// those that do not have a validity bitmap like union and run-end encoded
0100   /// arrays
0101   ///
0102   /// If the array has a validity bitmap, this function behaves the same as
0103   /// null_count(). For types that have no validity bitmap, this function will
0104   /// recompute the null count every time it is called.
0105   ///
0106   /// \see GetNullCount
0107   int64_t ComputeLogicalNullCount() const;
0108 
0109   const std::shared_ptr<DataType>& type() const { return data_->type; }
0110   Type::type type_id() const { return data_->type->id(); }
0111 
0112   /// Buffer for the validity (null) bitmap, if any. Note that Union types
0113   /// never have a null bitmap.
0114   ///
0115   /// Note that for `null_count == 0` or for null type, this will be null.
0116   /// This buffer does not account for any slice offset
0117   const std::shared_ptr<Buffer>& null_bitmap() const { return data_->buffers[0]; }
0118 
0119   /// Raw pointer to the null bitmap.
0120   ///
0121   /// Note that for `null_count == 0` or for null type, this will be null.
0122   /// This buffer does not account for any slice offset
0123   const uint8_t* null_bitmap_data() const { return null_bitmap_data_; }
0124 
0125   /// Equality comparison with another array
0126   bool Equals(const Array& arr, const EqualOptions& = EqualOptions::Defaults()) const;
0127   bool Equals(const std::shared_ptr<Array>& arr,
0128               const EqualOptions& = EqualOptions::Defaults()) const;
0129 
0130   /// \brief Return the formatted unified diff of arrow::Diff between this
0131   /// Array and another Array
0132   std::string Diff(const Array& other) const;
0133 
0134   /// Approximate equality comparison with another array
0135   ///
0136   /// epsilon is only used if this is FloatArray or DoubleArray
0137   bool ApproxEquals(const std::shared_ptr<Array>& arr,
0138                     const EqualOptions& = EqualOptions::Defaults()) const;
0139   bool ApproxEquals(const Array& arr,
0140                     const EqualOptions& = EqualOptions::Defaults()) const;
0141 
0142   /// Compare if the range of slots specified are equal for the given array and
0143   /// this array.  end_idx exclusive.  This methods does not bounds check.
0144   bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
0145                    const Array& other,
0146                    const EqualOptions& = EqualOptions::Defaults()) const;
0147   bool RangeEquals(int64_t start_idx, int64_t end_idx, int64_t other_start_idx,
0148                    const std::shared_ptr<Array>& other,
0149                    const EqualOptions& = EqualOptions::Defaults()) const;
0150   bool RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
0151                    int64_t other_start_idx,
0152                    const EqualOptions& = EqualOptions::Defaults()) const;
0153   bool RangeEquals(const std::shared_ptr<Array>& other, int64_t start_idx,
0154                    int64_t end_idx, int64_t other_start_idx,
0155                    const EqualOptions& = EqualOptions::Defaults()) const;
0156 
0157   /// \brief Apply the ArrayVisitor::Visit() method specialized to the array type
0158   Status Accept(ArrayVisitor* visitor) const;
0159 
0160   /// Construct a zero-copy view of this array with the given type.
0161   ///
0162   /// This method checks if the types are layout-compatible.
0163   /// Nested types are traversed in depth-first order. Data buffers must have
0164   /// the same item sizes, even though the logical types may be different.
0165   /// An error is returned if the types are not layout-compatible.
0166   Result<std::shared_ptr<Array>> View(const std::shared_ptr<DataType>& type) const;
0167 
0168   /// \brief Construct a copy of the array with all buffers on destination
0169   /// Memory Manager
0170   ///
0171   /// This method recursively copies the array's buffers and those of its children
0172   /// onto the destination MemoryManager device and returns the new Array.
0173   Result<std::shared_ptr<Array>> CopyTo(const std::shared_ptr<MemoryManager>& to) const;
0174 
0175   /// \brief Construct a new array attempting to zero-copy view if possible.
0176   ///
0177   /// Like CopyTo this method recursively goes through all of the array's buffers
0178   /// and those of it's children and first attempts to create zero-copy
0179   /// views on the destination MemoryManager device. If it can't, it falls back
0180   /// to performing a copy. See Buffer::ViewOrCopy.
0181   Result<std::shared_ptr<Array>> ViewOrCopyTo(
0182       const std::shared_ptr<MemoryManager>& to) const;
0183 
0184   /// Construct a zero-copy slice of the array with the indicated offset and
0185   /// length
0186   ///
0187   /// \param[in] offset the position of the first element in the constructed
0188   /// slice
0189   /// \param[in] length the length of the slice. If there are not enough
0190   /// elements in the array, the length will be adjusted accordingly
0191   ///
0192   /// \return a new object wrapped in std::shared_ptr<Array>
0193   std::shared_ptr<Array> Slice(int64_t offset, int64_t length) const;
0194 
0195   /// Slice from offset until end of the array
0196   std::shared_ptr<Array> Slice(int64_t offset) const;
0197 
0198   /// Input-checking variant of Array::Slice
0199   Result<std::shared_ptr<Array>> SliceSafe(int64_t offset, int64_t length) const;
0200   /// Input-checking variant of Array::Slice
0201   Result<std::shared_ptr<Array>> SliceSafe(int64_t offset) const;
0202 
0203   const std::shared_ptr<ArrayData>& data() const { return data_; }
0204 
0205   int num_fields() const { return static_cast<int>(data_->child_data.size()); }
0206 
0207   /// \return PrettyPrint representation of array suitable for debugging
0208   std::string ToString() const;
0209 
0210   /// \brief Perform cheap validation checks to determine obvious inconsistencies
0211   /// within the array's internal data.
0212   ///
0213   /// This is O(k) where k is the number of descendents.
0214   ///
0215   /// \return Status
0216   Status Validate() const;
0217 
0218   /// \brief Perform extensive validation checks to determine inconsistencies
0219   /// within the array's internal data.
0220   ///
0221   /// This is potentially O(k*n) where k is the number of descendents and n
0222   /// is the array length.
0223   ///
0224   /// \return Status
0225   Status ValidateFull() const;
0226 
0227   /// \brief Return the device_type that this array's data is allocated on
0228   ///
0229   /// This just delegates to calling device_type on the underlying ArrayData
0230   /// object which backs this Array.
0231   ///
0232   /// \return DeviceAllocationType
0233   DeviceAllocationType device_type() const { return data_->device_type(); }
0234 
0235   /// \brief Return the statistics of this Array
0236   ///
0237   /// This just delegates to calling statistics on the underlying ArrayData
0238   /// object which backs this Array.
0239   ///
0240   /// \return const std::shared_ptr<ArrayStatistics>&
0241   const std::shared_ptr<ArrayStatistics>& statistics() const { return data_->statistics; }
0242 
0243  protected:
0244   Array() = default;
0245   ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
0246 
0247   std::shared_ptr<ArrayData> data_;
0248   const uint8_t* null_bitmap_data_ = NULLPTR;
0249 
0250   /// Protected method for constructors
0251   void SetData(const std::shared_ptr<ArrayData>& data) {
0252     if (data->buffers.size() > 0) {
0253       null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
0254     } else {
0255       null_bitmap_data_ = NULLPTR;
0256     }
0257     data_ = data;
0258   }
0259 
0260  private:
0261   ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
0262 
0263   ARROW_FRIEND_EXPORT friend void PrintTo(const Array& x, std::ostream* os);
0264 };
0265 
0266 static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
0267   os << x.ToString();
0268   return os;
0269 }
0270 
0271 /// Base class for non-nested arrays
0272 class ARROW_EXPORT FlatArray : public Array {
0273  protected:
0274   using Array::Array;
0275 };
0276 
0277 /// Base class for arrays of fixed-size logical types
0278 class ARROW_EXPORT PrimitiveArray : public FlatArray {
0279  public:
0280   PrimitiveArray(const std::shared_ptr<DataType>& type, int64_t length,
0281                  const std::shared_ptr<Buffer>& data,
0282                  const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
0283                  int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0284 
0285   /// Does not account for any slice offset
0286   const std::shared_ptr<Buffer>& values() const { return data_->buffers[1]; }
0287 
0288  protected:
0289   PrimitiveArray() : raw_values_(NULLPTR) {}
0290 
0291   void SetData(const std::shared_ptr<ArrayData>& data) {
0292     this->Array::SetData(data);
0293     raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
0294   }
0295 
0296   explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
0297 
0298   const uint8_t* raw_values_;
0299 };
0300 
0301 /// Degenerate null type Array
0302 class ARROW_EXPORT NullArray : public FlatArray {
0303  public:
0304   using TypeClass = NullType;
0305 
0306   explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
0307   explicit NullArray(int64_t length);
0308 
0309  private:
0310   void SetData(const std::shared_ptr<ArrayData>& data) {
0311     null_bitmap_data_ = NULLPTR;
0312     data->null_count = data->length;
0313     data_ = data;
0314   }
0315 };
0316 
0317 }  // namespace arrow