Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-27 08:47:18

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <type_traits>
0024 #include <utility>
0025 #include <variant>
0026 #include <vector>
0027 
0028 #include "arrow/array/data.h"
0029 #include "arrow/device_allocation_type_set.h"
0030 #include "arrow/scalar.h"
0031 #include "arrow/type.h"
0032 #include "arrow/type_traits.h"
0033 #include "arrow/util/checked_cast.h"
0034 #include "arrow/util/macros.h"
0035 #include "arrow/util/visibility.h"
0036 
0037 namespace arrow {
0038 
0039 class Array;
0040 class ChunkedArray;
0041 class RecordBatch;
0042 class Table;
0043 
0044 /// \class Datum
0045 /// \brief Variant type for various Arrow C++ data structures
0046 struct ARROW_EXPORT Datum {
0047   /// \brief The kind of datum stored
0048   enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE };
0049 
0050   /// \brief A placeholder type to represent empty datum
0051   struct Empty {};
0052 
0053   /// \brief Datums variants may have a length. This special value indicate that the
0054   /// current variant does not have a length.
0055   static constexpr int64_t kUnknownLength = -1;
0056 
0057   /// \brief Storage of the actual datum.
0058   ///
0059   /// Note: For arrays, ArrayData is stored instead of Array for easier processing
0060   std::variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>,
0061                std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
0062                std::shared_ptr<Table>>
0063       value;
0064 
0065   /// \brief Empty datum, to be populated elsewhere
0066   Datum() = default;
0067 
0068   Datum(const Datum& other) = default;
0069   Datum& operator=(const Datum& other) = default;
0070   Datum(Datum&& other) = default;
0071   Datum& operator=(Datum&& other) = default;
0072 
0073   /// \brief Construct from a Scalar
0074   Datum(std::shared_ptr<Scalar> value)  // NOLINT implicit conversion
0075       : value(std::move(value)) {}
0076 
0077   /// \brief Construct from an ArrayData
0078   Datum(std::shared_ptr<ArrayData> value)  // NOLINT implicit conversion
0079       : value(std::move(value)) {}
0080 
0081   /// \brief Construct from an ArrayData
0082   Datum(ArrayData arg)  // NOLINT implicit conversion
0083       : value(std::make_shared<ArrayData>(std::move(arg))) {}
0084 
0085   /// \brief Construct from an Array
0086   Datum(const Array& value);  // NOLINT implicit conversion
0087 
0088   /// \brief Construct from an Array
0089   Datum(const std::shared_ptr<Array>& value);  // NOLINT implicit conversion
0090 
0091   /// \brief Construct from a ChunkedArray
0092   Datum(std::shared_ptr<ChunkedArray> value);  // NOLINT implicit conversion
0093 
0094   /// \brief Construct from a RecordBatch
0095   Datum(std::shared_ptr<RecordBatch> value);  // NOLINT implicit conversion
0096 
0097   /// \brief Construct from a Table
0098   Datum(std::shared_ptr<Table> value);  // NOLINT implicit conversion
0099 
0100   /// \brief Construct from a ChunkedArray.
0101   ///
0102   /// This can be expensive, prefer the shared_ptr<ChunkedArray> constructor
0103   explicit Datum(const ChunkedArray& value);
0104 
0105   /// \brief Construct from a RecordBatch.
0106   ///
0107   /// This can be expensive, prefer the shared_ptr<RecordBatch> constructor
0108   explicit Datum(const RecordBatch& value);
0109 
0110   /// \brief Construct from a Table.
0111   ///
0112   /// This can be expensive, prefer the shared_ptr<Table> constructor
0113   explicit Datum(const Table& value);
0114 
0115   /// \brief Cast from concrete subtypes of Array or Scalar to Datum
0116   template <typename T, bool IsArray = std::is_base_of_v<Array, T>,
0117             bool IsScalar = std::is_base_of_v<Scalar, T>,
0118             typename = enable_if_t<IsArray || IsScalar>>
0119   Datum(std::shared_ptr<T> value)  // NOLINT implicit conversion
0120       : Datum(std::shared_ptr<typename std::conditional<IsArray, Array, Scalar>::type>(
0121             std::move(value))) {}
0122 
0123   /// \brief Cast from concrete subtypes of Array or Scalar to Datum
0124   template <typename T, typename TV = typename std::remove_reference_t<T>,
0125             bool IsArray = std::is_base_of_v<Array, T>,
0126             bool IsScalar = std::is_base_of_v<Scalar, T>,
0127             typename = enable_if_t<IsArray || IsScalar>>
0128   Datum(T&& value)  // NOLINT implicit conversion
0129       : Datum(std::make_shared<TV>(std::forward<T>(value))) {}
0130 
0131   /// \brief Copy from concrete subtypes of Scalar.
0132   ///
0133   /// The concrete scalar type must be copyable (not all of them are).
0134   template <typename T, typename = enable_if_t<std::is_base_of_v<Scalar, T>>>
0135   Datum(const T& value)  // NOLINT implicit conversion
0136       : Datum(std::make_shared<T>(value)) {}
0137 
0138   // Convenience constructors
0139   /// \brief Convenience constructor storing a bool scalar.
0140   explicit Datum(bool value);
0141   /// \brief Convenience constructor storing an int8 scalar.
0142   explicit Datum(int8_t value);
0143   /// \brief Convenience constructor storing a uint8 scalar.
0144   explicit Datum(uint8_t value);
0145   /// \brief Convenience constructor storing an int16 scalar.
0146   explicit Datum(int16_t value);
0147   /// \brief Convenience constructor storing a uint16 scalar.
0148   explicit Datum(uint16_t value);
0149   /// \brief Convenience constructor storing an int32 scalar.
0150   explicit Datum(int32_t value);
0151   /// \brief Convenience constructor storing a uint32 scalar.
0152   explicit Datum(uint32_t value);
0153   /// \brief Convenience constructor storing an int64 scalar.
0154   explicit Datum(int64_t value);
0155   /// \brief Convenience constructor storing a uint64 scalar.
0156   explicit Datum(uint64_t value);
0157   /// \brief Convenience constructor storing a float scalar.
0158   explicit Datum(float value);
0159   /// \brief Convenience constructor storing a double scalar.
0160   explicit Datum(double value);
0161   /// \brief Convenience constructor storing a string scalar.
0162   explicit Datum(std::string value);
0163   /// \brief Convenience constructor storing a string scalar.
0164   explicit Datum(const char* value);
0165 
0166   /// \brief Convenience constructor for a DurationScalar from std::chrono::duration
0167   template <template <typename, typename> class StdDuration, typename Rep,
0168             typename Period,
0169             typename = decltype(DurationScalar{StdDuration<Rep, Period>{}})>
0170   explicit Datum(StdDuration<Rep, Period> d) : Datum{DurationScalar(d)} {}
0171 
0172   /// \brief The kind of data stored in Datum
0173   Datum::Kind kind() const {
0174     switch (this->value.index()) {
0175       case 0:
0176         return Datum::NONE;
0177       case 1:
0178         return Datum::SCALAR;
0179       case 2:
0180         return Datum::ARRAY;
0181       case 3:
0182         return Datum::CHUNKED_ARRAY;
0183       case 4:
0184         return Datum::RECORD_BATCH;
0185       case 5:
0186         return Datum::TABLE;
0187       default:
0188         return Datum::NONE;
0189     }
0190   }
0191 
0192   /// \brief Retrieve the stored array as ArrayData
0193   ///
0194   /// Use make_array() if an Array is desired (which is more expensive).
0195   /// \throws std::bad_variant_access if the datum is not an array
0196   const std::shared_ptr<ArrayData>& array() const {
0197     return std::get<std::shared_ptr<ArrayData>>(this->value);
0198   }
0199 
0200   /// \brief The sum of bytes in each buffer referenced by the datum
0201   /// Note: Scalars report a size of 0
0202   /// \see arrow::util::TotalBufferSize for caveats
0203   int64_t TotalBufferSize() const;
0204 
0205   /// \brief Get the stored ArrayData in mutable form
0206   ///
0207   /// For internal use primarily. Keep in mind a shared_ptr<Datum> may have multiple
0208   /// owners.
0209   ArrayData* mutable_array() const { return this->array().get(); }
0210 
0211   /// \brief Retrieve the stored array as Array
0212   /// \throws std::bad_variant_access if the datum is not an array
0213   std::shared_ptr<Array> make_array() const;
0214 
0215   /// \brief Retrieve the chunked array stored
0216   /// \throws std::bad_variant_access if the datum is not a chunked array
0217   const std::shared_ptr<ChunkedArray>& chunked_array() const {
0218     return std::get<std::shared_ptr<ChunkedArray>>(this->value);
0219   }
0220 
0221   /// \brief Retrieve the record batch stored
0222   /// \throws std::bad_variant_access if the datum is not a record batch
0223   const std::shared_ptr<RecordBatch>& record_batch() const {
0224     return std::get<std::shared_ptr<RecordBatch>>(this->value);
0225   }
0226 
0227   /// \brief Retrieve the table stored
0228   /// \throws std::bad_variant_access if the datum is not a table
0229   const std::shared_ptr<Table>& table() const {
0230     return std::get<std::shared_ptr<Table>>(this->value);
0231   }
0232 
0233   /// \brief Retrieve the scalar stored
0234   /// \throws std::bad_variant_access if the datum is not a scalar
0235   const std::shared_ptr<Scalar>& scalar() const {
0236     return std::get<std::shared_ptr<Scalar>>(this->value);
0237   }
0238 
0239   /// \brief Retrieve the datum as its concrete array type
0240   /// \throws std::bad_variant_access if the datum is not an array
0241   /// \tparam ExactType the expected array type, may cause undefined behavior if it is not
0242   /// the type of the stored array
0243   template <typename ExactType>
0244   std::shared_ptr<ExactType> array_as() const {
0245     return internal::checked_pointer_cast<ExactType>(this->make_array());
0246   }
0247 
0248   /// \brief Retrieve the datum as its concrete scalar type
0249   /// \throws std::bad_variant_access if the datum is not a scalar
0250   /// \tparam ExactType the expected scalar type, may cause undefined behavior if it is
0251   /// not the type of the stored scalar
0252   template <typename ExactType>
0253   const ExactType& scalar_as() const {
0254     return internal::checked_cast<const ExactType&>(*this->scalar());
0255   }
0256 
0257   /// \brief True if Datum contains an array
0258   bool is_array() const { return this->kind() == Datum::ARRAY; }
0259 
0260   /// \brief True if Datum contains a chunked array
0261   bool is_chunked_array() const { return this->kind() == Datum::CHUNKED_ARRAY; }
0262 
0263   /// \brief True if Datum contains an array or a chunked array
0264   bool is_arraylike() const {
0265     return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY;
0266   }
0267 
0268   /// \brief True if Datum contains a scalar
0269   bool is_scalar() const { return this->kind() == Datum::SCALAR; }
0270 
0271   /// \brief True if Datum contains a scalar or array-like data
0272   bool is_value() const { return this->is_arraylike() || this->is_scalar(); }
0273 
0274   /// \brief Return the null count.
0275   ///
0276   /// Only valid for scalar and array-like data.
0277   int64_t null_count() const;
0278 
0279   /// \brief The value type of the variant, if any
0280   ///
0281   /// \return nullptr if no type
0282   const std::shared_ptr<DataType>& type() const;
0283 
0284   /// \brief The schema of the variant, if any
0285   ///
0286   /// \return nullptr if no schema
0287   const std::shared_ptr<Schema>& schema() const;
0288 
0289   /// \brief The value length of the variant, if any
0290   ///
0291   /// \return kUnknownLength if no type
0292   int64_t length() const;
0293 
0294   /// \brief The array chunks of the variant, if any
0295   ///
0296   /// \return empty if not arraylike
0297   ArrayVector chunks() const;
0298 
0299   DeviceAllocationTypeSet device_types() const;
0300 
0301   /// \brief True if the two data are equal
0302   bool Equals(const Datum& other) const;
0303 
0304   bool operator==(const Datum& other) const { return Equals(other); }
0305   bool operator!=(const Datum& other) const { return !Equals(other); }
0306 
0307   std::string ToString() const;
0308 };
0309 
0310 ARROW_EXPORT void PrintTo(const Datum&, std::ostream*);
0311 
0312 ARROW_EXPORT std::string ToString(Datum::Kind kind);
0313 
0314 }  // namespace arrow