Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:55

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <atomic>  // IWYU pragma: export
0021 #include <cassert>
0022 #include <cstdint>
0023 #include <memory>
0024 #include <utility>
0025 #include <vector>
0026 
0027 #include "arrow/array/statistics.h"
0028 #include "arrow/buffer.h"
0029 #include "arrow/result.h"
0030 #include "arrow/type.h"
0031 #include "arrow/type_fwd.h"
0032 #include "arrow/util/bit_util.h"
0033 #include "arrow/util/macros.h"
0034 #include "arrow/util/span.h"
0035 #include "arrow/util/visibility.h"
0036 
0037 namespace arrow {
0038 
0039 namespace internal {
0040 // ----------------------------------------------------------------------
0041 // Null handling for types without a validity bitmap and the dictionary type
0042 
0043 ARROW_EXPORT bool IsNullSparseUnion(const ArrayData& data, int64_t i);
0044 ARROW_EXPORT bool IsNullDenseUnion(const ArrayData& data, int64_t i);
0045 ARROW_EXPORT bool IsNullRunEndEncoded(const ArrayData& data, int64_t i);
0046 
0047 ARROW_EXPORT bool UnionMayHaveLogicalNulls(const ArrayData& data);
0048 ARROW_EXPORT bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data);
0049 ARROW_EXPORT bool DictionaryMayHaveLogicalNulls(const ArrayData& data);
0050 
0051 }  // namespace internal
0052 
0053 // When slicing, we do not know the null count of the sliced range without
0054 // doing some computation. To avoid doing this eagerly, we set the null count
0055 // to -1 (any negative number will do). When Array::null_count is called the
0056 // first time, the null count will be computed. See ARROW-33
0057 constexpr int64_t kUnknownNullCount = -1;
0058 
0059 // ----------------------------------------------------------------------
0060 // Generic array data container
0061 
0062 /// \class ArrayData
0063 /// \brief Mutable container for generic Arrow array data
0064 ///
0065 /// This data structure is a self-contained representation of the memory and
0066 /// metadata inside an Arrow array data structure (called vectors in Java). The
0067 /// classes arrow::Array and its subclasses provide strongly-typed accessors
0068 /// with support for the visitor pattern and other affordances.
0069 ///
0070 /// This class is designed for easy internal data manipulation, analytical data
0071 /// processing, and data transport to and from IPC messages. For example, we
0072 /// could cast from int64 to float64 like so:
0073 ///
0074 /// Int64Array arr = GetMyData();
0075 /// auto new_data = arr.data()->Copy();
0076 /// new_data->type = arrow::float64();
0077 /// DoubleArray double_arr(new_data);
0078 ///
0079 /// This object is also useful in an analytics setting where memory may be
0080 /// reused. For example, if we had a group of operations all returning doubles,
0081 /// say:
0082 ///
0083 /// Log(Sqrt(Expr(arr)))
0084 ///
0085 /// Then the low-level implementations of each of these functions could have
0086 /// the signatures
0087 ///
0088 /// void Log(const ArrayData& values, ArrayData* out);
0089 ///
0090 /// As another example a function may consume one or more memory buffers in an
0091 /// input array and replace them with newly-allocated data, changing the output
0092 /// data type as well.
0093 struct ARROW_EXPORT ArrayData {
0094   ArrayData() = default;
0095 
0096   ArrayData(std::shared_ptr<DataType> type, int64_t length,
0097             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
0098       : type(std::move(type)), length(length), null_count(null_count), offset(offset) {}
0099 
0100   ArrayData(std::shared_ptr<DataType> type, int64_t length,
0101             std::vector<std::shared_ptr<Buffer>> buffers,
0102             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
0103       : ArrayData(std::move(type), length, null_count, offset) {
0104     this->buffers = std::move(buffers);
0105 #ifndef NDEBUG
0106     // in debug mode, call the `device_type` function to trigger
0107     // the DCHECKs that validate all the buffers are on the same device
0108     ARROW_UNUSED(this->device_type());
0109 #endif
0110   }
0111 
0112   ArrayData(std::shared_ptr<DataType> type, int64_t length,
0113             std::vector<std::shared_ptr<Buffer>> buffers,
0114             std::vector<std::shared_ptr<ArrayData>> child_data,
0115             int64_t null_count = kUnknownNullCount, int64_t offset = 0)
0116       : ArrayData(std::move(type), length, null_count, offset) {
0117     this->buffers = std::move(buffers);
0118     this->child_data = std::move(child_data);
0119 #ifndef NDEBUG
0120     // in debug mode, call the `device_type` function to trigger
0121     // the DCHECKs that validate all the buffers (including children)
0122     // are on the same device
0123     ARROW_UNUSED(this->device_type());
0124 #endif
0125   }
0126 
0127   static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
0128                                          std::vector<std::shared_ptr<Buffer>> buffers,
0129                                          int64_t null_count = kUnknownNullCount,
0130                                          int64_t offset = 0);
0131 
0132   static std::shared_ptr<ArrayData> Make(
0133       std::shared_ptr<DataType> type, int64_t length,
0134       std::vector<std::shared_ptr<Buffer>> buffers,
0135       std::vector<std::shared_ptr<ArrayData>> child_data,
0136       int64_t null_count = kUnknownNullCount, int64_t offset = 0);
0137 
0138   static std::shared_ptr<ArrayData> Make(
0139       std::shared_ptr<DataType> type, int64_t length,
0140       std::vector<std::shared_ptr<Buffer>> buffers,
0141       std::vector<std::shared_ptr<ArrayData>> child_data,
0142       std::shared_ptr<ArrayData> dictionary, int64_t null_count = kUnknownNullCount,
0143       int64_t offset = 0);
0144 
0145   static std::shared_ptr<ArrayData> Make(std::shared_ptr<DataType> type, int64_t length,
0146                                          int64_t null_count = kUnknownNullCount,
0147                                          int64_t offset = 0);
0148 
0149   // Move constructor
0150   ArrayData(ArrayData&& other) noexcept
0151       : type(std::move(other.type)),
0152         length(other.length),
0153         null_count(other.null_count.load()),
0154         offset(other.offset),
0155         buffers(std::move(other.buffers)),
0156         child_data(std::move(other.child_data)),
0157         dictionary(std::move(other.dictionary)),
0158         statistics(std::move(other.statistics)) {}
0159 
0160   // Copy constructor
0161   ArrayData(const ArrayData& other) noexcept
0162       : type(other.type),
0163         length(other.length),
0164         null_count(other.null_count.load()),
0165         offset(other.offset),
0166         buffers(other.buffers),
0167         child_data(other.child_data),
0168         dictionary(other.dictionary),
0169         statistics(other.statistics) {}
0170 
0171   // Move assignment
0172   ArrayData& operator=(ArrayData&& other) {
0173     type = std::move(other.type);
0174     length = other.length;
0175     SetNullCount(other.null_count);
0176     offset = other.offset;
0177     buffers = std::move(other.buffers);
0178     child_data = std::move(other.child_data);
0179     dictionary = std::move(other.dictionary);
0180     statistics = std::move(other.statistics);
0181     return *this;
0182   }
0183 
0184   // Copy assignment
0185   ArrayData& operator=(const ArrayData& other) {
0186     type = other.type;
0187     length = other.length;
0188     SetNullCount(other.null_count);
0189     offset = other.offset;
0190     buffers = other.buffers;
0191     child_data = other.child_data;
0192     dictionary = other.dictionary;
0193     statistics = other.statistics;
0194     return *this;
0195   }
0196 
0197   std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }
0198 
0199   /// \brief Copy all buffers and children recursively to destination MemoryManager
0200   ///
0201   /// This utilizes MemoryManager::CopyBuffer to create a new ArrayData object
0202   /// recursively copying the buffers and all child buffers to the destination
0203   /// memory manager. This includes dictionaries if applicable.
0204   Result<std::shared_ptr<ArrayData>> CopyTo(
0205       const std::shared_ptr<MemoryManager>& to) const;
0206   /// \brief View or Copy this ArrayData to destination memory manager.
0207   ///
0208   /// Tries to view the buffer contents on the given memory manager's device
0209   /// if possible (to avoid a copy) but falls back to copying if a no-copy view
0210   /// isn't supported.
0211   Result<std::shared_ptr<ArrayData>> ViewOrCopyTo(
0212       const std::shared_ptr<MemoryManager>& to) const;
0213 
0214   bool IsNull(int64_t i) const { return !IsValid(i); }
0215 
0216   bool IsValid(int64_t i) const {
0217     if (buffers[0] != NULLPTR) {
0218       return bit_util::GetBit(buffers[0]->data(), i + offset);
0219     }
0220     const auto type = this->type->id();
0221     if (type == Type::SPARSE_UNION) {
0222       return !internal::IsNullSparseUnion(*this, i);
0223     }
0224     if (type == Type::DENSE_UNION) {
0225       return !internal::IsNullDenseUnion(*this, i);
0226     }
0227     if (type == Type::RUN_END_ENCODED) {
0228       return !internal::IsNullRunEndEncoded(*this, i);
0229     }
0230     return null_count.load() != length;
0231   }
0232 
0233   // Access a buffer's data as a typed C pointer
0234   template <typename T>
0235   inline const T* GetValues(int i, int64_t absolute_offset) const {
0236     if (buffers[i]) {
0237       return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
0238     } else {
0239       return NULLPTR;
0240     }
0241   }
0242 
0243   template <typename T>
0244   inline const T* GetValues(int i) const {
0245     return GetValues<T>(i, offset);
0246   }
0247 
0248   // Like GetValues, but returns NULLPTR instead of aborting if the underlying
0249   // buffer is not a CPU buffer.
0250   template <typename T>
0251   inline const T* GetValuesSafe(int i, int64_t absolute_offset) const {
0252     if (buffers[i] && buffers[i]->is_cpu()) {
0253       return reinterpret_cast<const T*>(buffers[i]->data()) + absolute_offset;
0254     } else {
0255       return NULLPTR;
0256     }
0257   }
0258 
0259   template <typename T>
0260   inline const T* GetValuesSafe(int i) const {
0261     return GetValuesSafe<T>(i, offset);
0262   }
0263 
0264   // Access a buffer's data as a typed C pointer
0265   template <typename T>
0266   inline T* GetMutableValues(int i, int64_t absolute_offset) {
0267     if (buffers[i]) {
0268       return reinterpret_cast<T*>(buffers[i]->mutable_data()) + absolute_offset;
0269     } else {
0270       return NULLPTR;
0271     }
0272   }
0273 
0274   template <typename T>
0275   inline T* GetMutableValues(int i) {
0276     return GetMutableValues<T>(i, offset);
0277   }
0278 
0279   /// \brief Construct a zero-copy slice of the data with the given offset and length
0280   ///
0281   /// The associated `ArrayStatistics` is always discarded in a sliced
0282   /// `ArrayData`. Because `ArrayStatistics` in the original
0283   /// `ArrayData` may be invalid in a sliced `ArrayData`. If you want
0284   /// to reuse statistics in the original `ArrayData`, you need to do
0285   /// it by yourself.
0286   ///
0287   /// If the specified slice range has the same range as the original
0288   /// `ArrayData`, we can reuse statistics in the original
0289   /// `ArrayData`. Because it has the same data as the original
0290   /// `ArrayData`. But the associated `ArrayStatistics` is discarded
0291   /// in this case too. Use `Copy()` instead for the case.
0292   std::shared_ptr<ArrayData> Slice(int64_t offset, int64_t length) const;
0293 
0294   /// \brief Input-checking variant of Slice
0295   ///
0296   /// An Invalid Status is returned if the requested slice falls out of bounds.
0297   /// Note that unlike Slice, `length` isn't clamped to the available buffer size.
0298   Result<std::shared_ptr<ArrayData>> SliceSafe(int64_t offset, int64_t length) const;
0299 
0300   void SetNullCount(int64_t v) { null_count.store(v); }
0301 
0302   /// \brief Return physical null count, or compute and set it if it's not known
0303   int64_t GetNullCount() const;
0304 
0305   /// \brief Return true if the data has a validity bitmap and the physical null
0306   /// count is known to be non-zero or not yet known.
0307   ///
0308   /// Note that this is not the same as MayHaveLogicalNulls, which also checks
0309   /// for the presence of nulls in child data for types like unions and run-end
0310   /// encoded types.
0311   ///
0312   /// \see HasValidityBitmap
0313   /// \see MayHaveLogicalNulls
0314   bool MayHaveNulls() const {
0315     // If an ArrayData is slightly malformed it may have kUnknownNullCount set
0316     // but no buffer
0317     return null_count.load() != 0 && buffers[0] != NULLPTR;
0318   }
0319 
0320   /// \brief Return true if the data has a validity bitmap
0321   bool HasValidityBitmap() const { return buffers[0] != NULLPTR; }
0322 
0323   /// \brief Return true if the validity bitmap may have 0's in it, or if the
0324   /// child arrays (in the case of types without a validity bitmap) may have
0325   /// nulls, or if the dictionary of dictionary array may have nulls.
0326   ///
0327   /// This is not a drop-in replacement for MayHaveNulls, as historically
0328   /// MayHaveNulls() has been used to check for the presence of a validity
0329   /// bitmap that needs to be checked.
0330   ///
0331   /// Code that previously used MayHaveNulls() and then dealt with the validity
0332   /// bitmap directly can be fixed to handle all types correctly without
0333   /// performance degradation when handling most types by adopting
0334   /// HasValidityBitmap and MayHaveLogicalNulls.
0335   ///
0336   /// Before:
0337   ///
0338   ///     uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
0339   ///     for (int64_t i = 0; i < array.length; ++i) {
0340   ///       if (validity && !bit_util::GetBit(validity, i)) {
0341   ///         continue;  // skip a NULL
0342   ///       }
0343   ///       ...
0344   ///     }
0345   ///
0346   /// After:
0347   ///
0348   ///     bool all_valid = !array.MayHaveLogicalNulls();
0349   ///     uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR;
0350   ///     for (int64_t i = 0; i < array.length; ++i) {
0351   ///       bool is_valid = all_valid ||
0352   ///                       (validity && bit_util::GetBit(validity, i)) ||
0353   ///                       array.IsValid(i);
0354   ///       if (!is_valid) {
0355   ///         continue;  // skip a NULL
0356   ///       }
0357   ///       ...
0358   ///     }
0359   bool MayHaveLogicalNulls() const {
0360     if (buffers[0] != NULLPTR) {
0361       return null_count.load() != 0;
0362     }
0363     const auto t = type->id();
0364     if (t == Type::SPARSE_UNION || t == Type::DENSE_UNION) {
0365       return internal::UnionMayHaveLogicalNulls(*this);
0366     }
0367     if (t == Type::RUN_END_ENCODED) {
0368       return internal::RunEndEncodedMayHaveLogicalNulls(*this);
0369     }
0370     if (t == Type::DICTIONARY) {
0371       return internal::DictionaryMayHaveLogicalNulls(*this);
0372     }
0373     return null_count.load() != 0;
0374   }
0375 
0376   /// \brief Computes the logical null count for arrays of all types including
0377   /// those that do not have a validity bitmap like union and run-end encoded
0378   /// arrays
0379   ///
0380   /// If the array has a validity bitmap, this function behaves the same as
0381   /// GetNullCount. For types that have no validity bitmap, this function will
0382   /// recompute the null count every time it is called.
0383   ///
0384   /// \see GetNullCount
0385   int64_t ComputeLogicalNullCount() const;
0386 
0387   /// \brief Return the device_type of the underlying buffers and children
0388   ///
0389   /// If there are no buffers in this ArrayData object, it just returns
0390   /// DeviceAllocationType::kCPU as a default. We also assume that all buffers
0391   /// should be allocated on the same device type and perform DCHECKs to confirm
0392   /// this in debug mode.
0393   ///
0394   /// \return DeviceAllocationType
0395   DeviceAllocationType device_type() const;
0396 
0397   std::shared_ptr<DataType> type;
0398   int64_t length = 0;
0399   mutable std::atomic<int64_t> null_count{0};
0400   // The logical start point into the physical buffers (in values, not bytes).
0401   // Note that, for child data, this must be *added* to the child data's own offset.
0402   int64_t offset = 0;
0403   std::vector<std::shared_ptr<Buffer>> buffers;
0404   std::vector<std::shared_ptr<ArrayData>> child_data;
0405 
0406   // The dictionary for this Array, if any. Only used for dictionary type
0407   std::shared_ptr<ArrayData> dictionary;
0408 
0409   // The statistics for this Array.
0410   std::shared_ptr<ArrayStatistics> statistics;
0411 };
0412 
0413 /// \brief A non-owning Buffer reference
0414 struct ARROW_EXPORT BufferSpan {
0415   // It is the user of this class's responsibility to ensure that
0416   // buffers that were const originally are not written to
0417   // accidentally.
0418   uint8_t* data = NULLPTR;
0419   int64_t size = 0;
0420   // Pointer back to buffer that owns this memory
0421   const std::shared_ptr<Buffer>* owner = NULLPTR;
0422 
0423   template <typename T>
0424   const T* data_as() const {
0425     return reinterpret_cast<const T*>(data);
0426   }
0427   template <typename T>
0428   T* mutable_data_as() {
0429     return reinterpret_cast<T*>(data);
0430   }
0431 };
0432 
0433 /// \brief EXPERIMENTAL: A non-owning ArrayData reference that is cheaply
0434 /// copyable and does not contain any shared_ptr objects. Do not use in public
0435 /// APIs aside from compute kernels for now
0436 struct ARROW_EXPORT ArraySpan {
0437   const DataType* type = NULLPTR;
0438   int64_t length = 0;
0439   mutable int64_t null_count = kUnknownNullCount;
0440   int64_t offset = 0;
0441   BufferSpan buffers[3];
0442 
0443   ArraySpan() = default;
0444 
0445   explicit ArraySpan(const DataType* type, int64_t length) : type(type), length(length) {}
0446 
0447   ArraySpan(const ArrayData& data) {  // NOLINT implicit conversion
0448     SetMembers(data);
0449   }
0450   explicit ArraySpan(const Scalar& data) { FillFromScalar(data); }
0451 
0452   /// If dictionary-encoded, put dictionary in the first entry
0453   std::vector<ArraySpan> child_data;
0454 
0455   /// \brief Populate ArraySpan to look like an array of length 1 pointing at
0456   /// the data members of a Scalar value
0457   void FillFromScalar(const Scalar& value);
0458 
0459   void SetMembers(const ArrayData& data);
0460 
0461   void SetBuffer(int index, const std::shared_ptr<Buffer>& buffer) {
0462     this->buffers[index].data = const_cast<uint8_t*>(buffer->data());
0463     this->buffers[index].size = buffer->size();
0464     this->buffers[index].owner = &buffer;
0465   }
0466 
0467   const ArraySpan& dictionary() const { return child_data[0]; }
0468 
0469   /// \brief Return the number of buffers (out of 3) that are used to
0470   /// constitute this array
0471   int num_buffers() const;
0472 
0473   // Access a buffer's data as a typed C pointer
0474   template <typename T>
0475   inline T* GetValues(int i, int64_t absolute_offset) {
0476     return reinterpret_cast<T*>(buffers[i].data) + absolute_offset;
0477   }
0478 
0479   template <typename T>
0480   inline T* GetValues(int i) {
0481     return GetValues<T>(i, this->offset);
0482   }
0483 
0484   // Access a buffer's data as a typed C pointer
0485   template <typename T>
0486   inline const T* GetValues(int i, int64_t absolute_offset) const {
0487     return reinterpret_cast<const T*>(buffers[i].data) + absolute_offset;
0488   }
0489 
0490   template <typename T>
0491   inline const T* GetValues(int i) const {
0492     return GetValues<T>(i, this->offset);
0493   }
0494 
0495   /// \brief Access a buffer's data as a span
0496   ///
0497   /// \param i The buffer index
0498   /// \param length The required length (in number of typed values) of the requested span
0499   /// \pre i > 0
0500   /// \pre length <= the length of the buffer (in number of values) that's expected for
0501   /// this array type
0502   /// \return A span<const T> of the requested length
0503   template <typename T>
0504   util::span<const T> GetSpan(int i, int64_t length) const {
0505     const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
0506     assert(i > 0 && length + offset <= buffer_length);
0507     ARROW_UNUSED(buffer_length);
0508     return util::span<const T>(buffers[i].data_as<T>() + this->offset, length);
0509   }
0510 
0511   /// \brief Access a buffer's data as a span
0512   ///
0513   /// \param i The buffer index
0514   /// \param length The required length (in number of typed values) of the requested span
0515   /// \pre i > 0
0516   /// \pre length <= the length of the buffer (in number of values) that's expected for
0517   /// this array type
0518   /// \return A span<T> of the requested length
0519   template <typename T>
0520   util::span<T> GetSpan(int i, int64_t length) {
0521     const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
0522     assert(i > 0 && length + offset <= buffer_length);
0523     ARROW_UNUSED(buffer_length);
0524     return util::span<T>(buffers[i].mutable_data_as<T>() + this->offset, length);
0525   }
0526 
0527   inline bool IsNull(int64_t i) const { return !IsValid(i); }
0528 
0529   inline bool IsValid(int64_t i) const {
0530     if (this->buffers[0].data != NULLPTR) {
0531       return bit_util::GetBit(this->buffers[0].data, i + this->offset);
0532     } else {
0533       const auto type = this->type->id();
0534       if (type == Type::SPARSE_UNION) {
0535         return !IsNullSparseUnion(i);
0536       }
0537       if (type == Type::DENSE_UNION) {
0538         return !IsNullDenseUnion(i);
0539       }
0540       if (type == Type::RUN_END_ENCODED) {
0541         return !IsNullRunEndEncoded(i);
0542       }
0543       return this->null_count != this->length;
0544     }
0545   }
0546 
0547   std::shared_ptr<ArrayData> ToArrayData() const;
0548 
0549   std::shared_ptr<Array> ToArray() const;
0550 
0551   std::shared_ptr<Buffer> GetBuffer(int index) const {
0552     const BufferSpan& buf = this->buffers[index];
0553     if (buf.owner) {
0554       return *buf.owner;
0555     } else if (buf.data != NULLPTR) {
0556       // Buffer points to some memory without an owning buffer
0557       return std::make_shared<Buffer>(buf.data, buf.size);
0558     } else {
0559       return NULLPTR;
0560     }
0561   }
0562 
0563   void SetSlice(int64_t offset, int64_t length) {
0564     this->offset = offset;
0565     this->length = length;
0566     if (this->type->id() == Type::NA) {
0567       this->null_count = this->length;
0568     } else if (this->MayHaveNulls()) {
0569       this->null_count = kUnknownNullCount;
0570     } else {
0571       this->null_count = 0;
0572     }
0573   }
0574 
0575   /// \brief Return physical null count, or compute and set it if it's not known
0576   int64_t GetNullCount() const;
0577 
0578   /// \brief Return true if the array has a validity bitmap and the physical null
0579   /// count is known to be non-zero or not yet known
0580   ///
0581   /// Note that this is not the same as MayHaveLogicalNulls, which also checks
0582   /// for the presence of nulls in child data for types like unions and run-end
0583   /// encoded types.
0584   ///
0585   /// \see HasValidityBitmap
0586   /// \see MayHaveLogicalNulls
0587   bool MayHaveNulls() const {
0588     // If an ArrayData is slightly malformed it may have kUnknownNullCount set
0589     // but no buffer
0590     return null_count != 0 && buffers[0].data != NULLPTR;
0591   }
0592 
0593   /// \brief Return true if the array has a validity bitmap
0594   bool HasValidityBitmap() const { return buffers[0].data != NULLPTR; }
0595 
0596   /// \brief Return true if the validity bitmap may have 0's in it, or if the
0597   /// child arrays (in the case of types without a validity bitmap) may have
0598   /// nulls, or if the dictionary of dictionay array may have nulls.
0599   ///
0600   /// \see ArrayData::MayHaveLogicalNulls
0601   bool MayHaveLogicalNulls() const {
0602     if (buffers[0].data != NULLPTR) {
0603       return null_count != 0;
0604     }
0605     const auto t = type->id();
0606     if (t == Type::SPARSE_UNION || t == Type::DENSE_UNION) {
0607       return UnionMayHaveLogicalNulls();
0608     }
0609     if (t == Type::RUN_END_ENCODED) {
0610       return RunEndEncodedMayHaveLogicalNulls();
0611     }
0612     if (t == Type::DICTIONARY) {
0613       return DictionaryMayHaveLogicalNulls();
0614     }
0615     return null_count != 0;
0616   }
0617 
0618   /// \brief Compute the logical null count for arrays of all types including
0619   /// those that do not have a validity bitmap like union and run-end encoded
0620   /// arrays
0621   ///
0622   /// If the array has a validity bitmap, this function behaves the same as
0623   /// GetNullCount. For types that have no validity bitmap, this function will
0624   /// recompute the logical null count every time it is called.
0625   ///
0626   /// \see GetNullCount
0627   int64_t ComputeLogicalNullCount() const;
0628 
0629   /// Some DataTypes (StringView, BinaryView) may have an arbitrary number of variadic
0630   /// buffers. Since ArraySpan only has 3 buffers, we pack the variadic buffers into
0631   /// buffers[2]; IE buffers[2].data points to the first shared_ptr<Buffer> of the
0632   /// variadic set and buffers[2].size is the number of variadic buffers times
0633   /// sizeof(shared_ptr<Buffer>).
0634   ///
0635   /// \see HasVariadicBuffers
0636   util::span<const std::shared_ptr<Buffer>> GetVariadicBuffers() const;
0637   bool HasVariadicBuffers() const;
0638 
0639  private:
0640   ARROW_FRIEND_EXPORT friend bool internal::IsNullRunEndEncoded(const ArrayData& data,
0641                                                                 int64_t i);
0642 
0643   bool IsNullSparseUnion(int64_t i) const;
0644   bool IsNullDenseUnion(int64_t i) const;
0645 
0646   /// \brief Return true if the value at logical index i is null
0647   ///
0648   /// This function uses binary-search, so it has a O(log N) cost.
0649   /// Iterating over the whole array and calling IsNull is O(N log N), so
0650   /// for better performance it is recommended to use a
0651   /// ree_util::RunEndEncodedArraySpan to iterate run by run instead.
0652   bool IsNullRunEndEncoded(int64_t i) const;
0653 
0654   bool UnionMayHaveLogicalNulls() const;
0655   bool RunEndEncodedMayHaveLogicalNulls() const;
0656   bool DictionaryMayHaveLogicalNulls() const;
0657 };
0658 
0659 namespace internal {
0660 
0661 void FillZeroLengthArray(const DataType* type, ArraySpan* span);
0662 
0663 /// Construct a zero-copy view of this ArrayData with the given type.
0664 ///
0665 /// This method checks if the types are layout-compatible.
0666 /// Nested types are traversed in depth-first order. Data buffers must have
0667 /// the same item sizes, even though the logical types may be different.
0668 /// An error is returned if the types are not layout-compatible.
0669 ARROW_EXPORT
0670 Result<std::shared_ptr<ArrayData>> GetArrayView(const std::shared_ptr<ArrayData>& data,
0671                                                 const std::shared_ptr<DataType>& type);
0672 
0673 }  // namespace internal
0674 }  // namespace arrow