Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-27 08:47:22

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <atomic>
0021 #include <climits>
0022 #include <cstdint>
0023 #include <iosfwd>
0024 #include <limits>
0025 #include <memory>
0026 #include <optional>
0027 #include <string>
0028 #include <utility>
0029 #include <variant>
0030 #include <vector>
0031 
0032 #include "arrow/result.h"
0033 #include "arrow/type_fwd.h"  // IWYU pragma: export
0034 #include "arrow/util/checked_cast.h"
0035 #include "arrow/util/endian.h"
0036 #include "arrow/util/macros.h"
0037 #include "arrow/util/visibility.h"
0038 #include "arrow/visitor.h"  // IWYU pragma: keep
0039 
0040 namespace arrow {
0041 namespace detail {
0042 
0043 /// \defgroup numeric-datatypes Datatypes for numeric data
0044 /// @{
0045 /// @}
0046 
0047 /// \defgroup binary-datatypes Datatypes for binary/string data
0048 /// @{
0049 /// @}
0050 
0051 /// \defgroup temporal-datatypes Datatypes for temporal data
0052 /// @{
0053 /// @}
0054 
0055 /// \defgroup nested-datatypes Datatypes for nested data
0056 /// @{
0057 /// @}
0058 
0059 class ARROW_EXPORT Fingerprintable {
0060  public:
0061   virtual ~Fingerprintable();
0062 
0063   const std::string& fingerprint() const {
0064     auto p = fingerprint_.load();
0065     if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
0066       return *p;
0067     }
0068     return LoadFingerprintSlow();
0069   }
0070 
0071   const std::string& metadata_fingerprint() const {
0072     auto p = metadata_fingerprint_.load();
0073     if (ARROW_PREDICT_TRUE(p != NULLPTR)) {
0074       return *p;
0075     }
0076     return LoadMetadataFingerprintSlow();
0077   }
0078 
0079  protected:
0080   const std::string& LoadFingerprintSlow() const;
0081   const std::string& LoadMetadataFingerprintSlow() const;
0082 
0083   virtual std::string ComputeFingerprint() const = 0;
0084   virtual std::string ComputeMetadataFingerprint() const = 0;
0085 
0086   mutable std::atomic<std::string*> fingerprint_{NULLPTR};
0087   mutable std::atomic<std::string*> metadata_fingerprint_{NULLPTR};
0088 };
0089 
0090 }  // namespace detail
0091 
0092 /// EXPERIMENTAL: Layout specification for a data type
0093 struct ARROW_EXPORT DataTypeLayout {
0094   enum BufferKind { FIXED_WIDTH, VARIABLE_WIDTH, BITMAP, ALWAYS_NULL };
0095 
0096   /// Layout specification for a single data type buffer
0097   struct BufferSpec {
0098     BufferKind kind;
0099     int64_t byte_width;  // For FIXED_WIDTH
0100 
0101     bool operator==(const BufferSpec& other) const {
0102       return kind == other.kind &&
0103              (kind != FIXED_WIDTH || byte_width == other.byte_width);
0104     }
0105     bool operator!=(const BufferSpec& other) const { return !(*this == other); }
0106   };
0107 
0108   static BufferSpec FixedWidth(int64_t w) { return BufferSpec{FIXED_WIDTH, w}; }
0109   static BufferSpec VariableWidth() { return BufferSpec{VARIABLE_WIDTH, -1}; }
0110   static BufferSpec Bitmap() { return BufferSpec{BITMAP, -1}; }
0111   static BufferSpec AlwaysNull() { return BufferSpec{ALWAYS_NULL, -1}; }
0112 
0113   /// A vector of buffer layout specifications, one for each expected buffer
0114   std::vector<BufferSpec> buffers;
0115   /// Whether this type expects an associated dictionary array.
0116   bool has_dictionary = false;
0117   /// If this is provided, the number of buffers expected is only lower-bounded by
0118   /// buffers.size(). Buffers beyond this lower bound are expected to conform to
0119   /// variadic_spec.
0120   std::optional<BufferSpec> variadic_spec;
0121 
0122   explicit DataTypeLayout(std::vector<BufferSpec> buffers,
0123                           std::optional<BufferSpec> variadic_spec = {})
0124       : buffers(std::move(buffers)), variadic_spec(variadic_spec) {}
0125 };
0126 
0127 /// \brief Base class for all data types
0128 ///
0129 /// Data types in this library are all *logical*. They can be expressed as
0130 /// either a primitive physical type (bytes or bits of some fixed size), a
0131 /// nested type consisting of other data types, or another data type (e.g. a
0132 /// timestamp encoded as an int64).
0133 ///
0134 /// Simple datatypes may be entirely described by their Type::type id, but
0135 /// complex datatypes are usually parametric.
0136 class ARROW_EXPORT DataType : public std::enable_shared_from_this<DataType>,
0137                               public detail::Fingerprintable,
0138                               public util::EqualityComparable<DataType> {
0139  public:
0140   explicit DataType(Type::type id) : detail::Fingerprintable(), id_(id) {}
0141   ~DataType() override;
0142 
0143   /// \brief Return whether the types are equal
0144   ///
0145   /// Types that are logically convertible from one to another (e.g. List<UInt8>
0146   /// and Binary) are NOT equal.
0147   bool Equals(const DataType& other, bool check_metadata = false) const;
0148 
0149   /// \brief Return whether the types are equal
0150   bool Equals(const std::shared_ptr<DataType>& other, bool check_metadata = false) const;
0151 
0152   /// \brief Return the child field at index i.
0153   const std::shared_ptr<Field>& field(int i) const { return children_[i]; }
0154 
0155   /// \brief Return the children fields associated with this type.
0156   const FieldVector& fields() const { return children_; }
0157 
0158   /// \brief Return the number of children fields associated with this type.
0159   int num_fields() const { return static_cast<int>(children_.size()); }
0160 
0161   /// \brief Apply the TypeVisitor::Visit() method specialized to the data type
0162   Status Accept(TypeVisitor* visitor) const;
0163 
0164   /// \brief A string representation of the type, including any children
0165   virtual std::string ToString(bool show_metadata = false) const = 0;
0166 
0167   /// \brief Return hash value (excluding metadata in child fields)
0168   size_t Hash() const;
0169 
0170   /// \brief A string name of the type, omitting any child fields
0171   ///
0172   /// \since 0.7.0
0173   virtual std::string name() const = 0;
0174 
0175   /// \brief Return the data type layout.  Children are not included.
0176   ///
0177   /// \note Experimental API
0178   virtual DataTypeLayout layout() const = 0;
0179 
0180   /// \brief Return the type category
0181   Type::type id() const { return id_; }
0182 
0183   /// \brief Return the type category of the storage type
0184   virtual Type::type storage_id() const { return id_; }
0185 
0186   /// \brief Returns the type's fixed byte width, if any. Returns -1
0187   /// for non-fixed-width types, and should only be used for
0188   /// subclasses of FixedWidthType
0189   virtual int32_t byte_width() const {
0190     int32_t num_bits = this->bit_width();
0191     return num_bits > 0 ? num_bits / 8 : -1;
0192   }
0193 
0194   /// \brief Returns the type's fixed bit width, if any. Returns -1
0195   /// for non-fixed-width types, and should only be used for
0196   /// subclasses of FixedWidthType
0197   virtual int bit_width() const { return -1; }
0198 
0199   // \brief EXPERIMENTAL: Enable retrieving shared_ptr<DataType> from a const
0200   // context.
0201   std::shared_ptr<DataType> GetSharedPtr() const {
0202     return const_cast<DataType*>(this)->shared_from_this();
0203   }
0204 
0205  protected:
0206   // Dummy version that returns a null string (indicating not implemented).
0207   // Subclasses should override for fast equality checks.
0208   std::string ComputeFingerprint() const override;
0209 
0210   // Generic versions that works for all regular types, nested or not.
0211   std::string ComputeMetadataFingerprint() const override;
0212 
0213   Type::type id_;
0214   FieldVector children_;
0215 
0216  private:
0217   ARROW_DISALLOW_COPY_AND_ASSIGN(DataType);
0218 };
0219 
0220 /// \brief EXPERIMENTAL: Container for a type pointer which can hold a
0221 /// dynamically created shared_ptr<DataType> if it needs to.
0222 struct ARROW_EXPORT TypeHolder {
0223   const DataType* type = NULLPTR;
0224   std::shared_ptr<DataType> owned_type;
0225 
0226   TypeHolder() = default;
0227   TypeHolder(const TypeHolder& other) = default;
0228   TypeHolder& operator=(const TypeHolder& other) = default;
0229   TypeHolder(TypeHolder&& other) = default;
0230   TypeHolder& operator=(TypeHolder&& other) = default;
0231 
0232   TypeHolder(std::shared_ptr<DataType> owned_type)  // NOLINT implicit construction
0233       : type(owned_type.get()), owned_type(std::move(owned_type)) {}
0234 
0235   TypeHolder(const DataType* type)  // NOLINT implicit construction
0236       : type(type) {}
0237 
0238   Type::type id() const { return this->type->id(); }
0239 
0240   std::shared_ptr<DataType> GetSharedPtr() const {
0241     return this->type != NULLPTR ? this->type->GetSharedPtr() : NULLPTR;
0242   }
0243 
0244   const DataType& operator*() const { return *this->type; }
0245 
0246   operator bool() const { return this->type != NULLPTR; }
0247 
0248   bool operator==(const TypeHolder& other) const {
0249     if (type == other.type) return true;
0250     if (type == NULLPTR || other.type == NULLPTR) return false;
0251     return type->Equals(*other.type);
0252   }
0253 
0254   bool operator==(decltype(NULLPTR)) const { return this->type == NULLPTR; }
0255 
0256   bool operator==(const DataType& other) const {
0257     if (this->type == NULLPTR) return false;
0258     return other.Equals(*this->type);
0259   }
0260 
0261   bool operator!=(const DataType& other) const { return !(*this == other); }
0262 
0263   bool operator==(const std::shared_ptr<DataType>& other) const {
0264     return *this == *other;
0265   }
0266 
0267   bool operator!=(const TypeHolder& other) const { return !(*this == other); }
0268 
0269   std::string ToString(bool show_metadata = false) const {
0270     return this->type ? this->type->ToString(show_metadata) : "<NULLPTR>";
0271   }
0272 
0273   static std::string ToString(const std::vector<TypeHolder>&, bool show_metadata = false);
0274 
0275   static std::vector<TypeHolder> FromTypes(
0276       const std::vector<std::shared_ptr<DataType>>& types);
0277 };
0278 
0279 ARROW_EXPORT
0280 std::ostream& operator<<(std::ostream& os, const DataType& type);
0281 
0282 ARROW_EXPORT
0283 std::ostream& operator<<(std::ostream& os, const TypeHolder& type);
0284 
0285 /// \brief Return the compatible physical data type
0286 ///
0287 /// Some types may have distinct logical meanings but the exact same physical
0288 /// representation.  For example, TimestampType has Int64Type as a physical
0289 /// type (defined as TimestampType::PhysicalType).
0290 ///
0291 /// The return value is as follows:
0292 /// - if a `PhysicalType` alias exists in the concrete type class, return
0293 ///   an instance of `PhysicalType`.
0294 /// - otherwise, return the input type itself.
0295 std::shared_ptr<DataType> GetPhysicalType(const std::shared_ptr<DataType>& type);
0296 
0297 /// \brief Base class for all fixed-width data types
0298 class ARROW_EXPORT FixedWidthType : public DataType {
0299  public:
0300   using DataType::DataType;
0301   // This is only for preventing defining this class in each
0302   // translation unit to avoid one-definition-rule violation.
0303   ~FixedWidthType() override;
0304 };
0305 
0306 /// \brief Base class for all data types representing primitive values
0307 class ARROW_EXPORT PrimitiveCType : public FixedWidthType {
0308  public:
0309   using FixedWidthType::FixedWidthType;
0310   // This is only for preventing defining this class in each
0311   // translation unit to avoid one-definition-rule violation.
0312   ~PrimitiveCType() override;
0313 };
0314 
0315 /// \brief Base class for all numeric data types
0316 class ARROW_EXPORT NumberType : public PrimitiveCType {
0317  public:
0318   using PrimitiveCType::PrimitiveCType;
0319   // This is only for preventing defining this class in each
0320   // translation unit to avoid one-definition-rule violation.
0321   ~NumberType() override;
0322 };
0323 
0324 /// \brief Base class for all integral data types
0325 class ARROW_EXPORT IntegerType : public NumberType {
0326  public:
0327   using NumberType::NumberType;
0328   // This is only for preventing defining this class in each
0329   // translation unit to avoid one-definition-rule violation.
0330   ~IntegerType() override;
0331   virtual bool is_signed() const = 0;
0332 };
0333 
0334 /// \brief Base class for all floating-point data types
0335 class ARROW_EXPORT FloatingPointType : public NumberType {
0336  public:
0337   using NumberType::NumberType;
0338   // This is only for preventing defining this class in each
0339   // translation unit to avoid one-definition-rule violation.
0340   ~FloatingPointType() override;
0341   enum Precision { HALF, SINGLE, DOUBLE };
0342   virtual Precision precision() const = 0;
0343 };
0344 
0345 /// \brief Base class for all parametric data types
0346 class ParametricType {};
0347 
0348 class ARROW_EXPORT NestedType : public DataType, public ParametricType {
0349  public:
0350   using DataType::DataType;
0351   // This is only for preventing defining this class in each
0352   // translation unit to avoid one-definition-rule violation.
0353   ~NestedType() override;
0354 };
0355 
0356 /// \brief The combination of a field name and data type, with optional metadata
0357 ///
0358 /// Fields are used to describe the individual constituents of a
0359 /// nested DataType or a Schema.
0360 ///
0361 /// A field's metadata is represented by a KeyValueMetadata instance,
0362 /// which holds arbitrary key-value pairs.
0363 class ARROW_EXPORT Field : public detail::Fingerprintable,
0364                            public util::EqualityComparable<Field> {
0365  public:
0366   Field(std::string name, std::shared_ptr<DataType> type, bool nullable = true,
0367         std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR)
0368       : detail::Fingerprintable(),
0369         name_(std::move(name)),
0370         type_(std::move(type)),
0371         nullable_(nullable),
0372         metadata_(std::move(metadata)) {}
0373 
0374   ~Field() override;
0375 
0376   /// \brief Return the field's attached metadata
0377   std::shared_ptr<const KeyValueMetadata> metadata() const { return metadata_; }
0378 
0379   /// \brief Return whether the field has non-empty metadata
0380   bool HasMetadata() const;
0381 
0382   /// \brief Return a copy of this field with the given metadata attached to it
0383   std::shared_ptr<Field> WithMetadata(
0384       const std::shared_ptr<const KeyValueMetadata>& metadata) const;
0385 
0386   /// \brief EXPERIMENTAL: Return a copy of this field with the given metadata
0387   /// merged with existing metadata (any colliding keys will be overridden by
0388   /// the passed metadata)
0389   std::shared_ptr<Field> WithMergedMetadata(
0390       const std::shared_ptr<const KeyValueMetadata>& metadata) const;
0391 
0392   /// \brief Return a copy of this field without any metadata attached to it
0393   std::shared_ptr<Field> RemoveMetadata() const;
0394 
0395   /// \brief Return a copy of this field with the replaced type.
0396   std::shared_ptr<Field> WithType(const std::shared_ptr<DataType>& type) const;
0397 
0398   /// \brief Return a copy of this field with the replaced name.
0399   std::shared_ptr<Field> WithName(const std::string& name) const;
0400 
0401   /// \brief Return a copy of this field with the replaced nullability.
0402   std::shared_ptr<Field> WithNullable(bool nullable) const;
0403 
0404   /// \brief Options that control the behavior of `MergeWith`.
0405   /// Options are to be added to allow type conversions, including integer
0406   /// widening, promotion from integer to float, or conversion to or from boolean.
0407   struct ARROW_EXPORT MergeOptions : public util::ToStringOstreamable<MergeOptions> {
0408     /// If true, a Field of NullType can be unified with a Field of another type.
0409     /// The unified field will be of the other type and become nullable.
0410     /// Nullability will be promoted to the looser option (nullable if one is not
0411     /// nullable).
0412     bool promote_nullability = true;
0413 
0414     /// Allow a decimal to be unified with another decimal of the same
0415     /// width, adjusting scale and precision as appropriate. May fail
0416     /// if the adjustment is not possible.
0417     bool promote_decimal = false;
0418 
0419     /// Allow a decimal to be promoted to a float. The float type will
0420     /// not itself be promoted (e.g. Decimal128 + Float32 = Float32).
0421     bool promote_decimal_to_float = false;
0422 
0423     /// Allow an integer to be promoted to a decimal.
0424     ///
0425     /// May fail if the decimal has insufficient precision to
0426     /// accommodate the integer (see promote_numeric_width).
0427     bool promote_integer_to_decimal = false;
0428 
0429     /// Allow an integer of a given bit width to be promoted to a
0430     /// float; the result will be a float of an equal or greater bit
0431     /// width to both of the inputs. Examples:
0432     ///  - int8 + float32 = float32
0433     ///  - int32 + float32 = float64
0434     ///  - int32 + float64 = float64
0435     /// Because an int32 cannot always be represented exactly in the
0436     /// 24 bits of a float32 mantissa.
0437     bool promote_integer_to_float = false;
0438 
0439     /// Allow an unsigned integer of a given bit width to be promoted
0440     /// to a signed integer that fits into the signed type:
0441     /// uint + int16 = int16
0442     /// When widening is needed, set promote_numeric_width to true:
0443     /// uint16 + int16 = int32
0444     bool promote_integer_sign = false;
0445 
0446     /// Allow an integer, float, or decimal of a given bit width to be
0447     /// promoted to an equivalent type of a greater bit width.
0448     bool promote_numeric_width = false;
0449 
0450     /// Allow strings to be promoted to binary types. Promotion of fixed size
0451     /// binary types to variable sized formats, and binary to large binary,
0452     /// and string to large string.
0453     bool promote_binary = false;
0454 
0455     /// Second to millisecond, Time32 to Time64, Time32(SECOND) to Time32(MILLI), etc
0456     bool promote_temporal_unit = false;
0457 
0458     /// Allow promotion from a list to a large-list and from a fixed-size list to a
0459     /// variable sized list
0460     bool promote_list = false;
0461 
0462     /// Unify dictionary index types and dictionary value types.
0463     bool promote_dictionary = false;
0464 
0465     /// Allow merging ordered and non-ordered dictionaries.
0466     /// The result will be ordered if and only if both inputs
0467     /// are ordered.
0468     bool promote_dictionary_ordered = false;
0469 
0470     /// Get default options. Only NullType will be merged with other types.
0471     static MergeOptions Defaults() { return MergeOptions(); }
0472     /// Get permissive options. All options are enabled, except
0473     /// promote_dictionary_ordered.
0474     static MergeOptions Permissive();
0475     /// Get a human-readable representation of the options.
0476     std::string ToString() const;
0477   };
0478 
0479   /// \brief Merge the current field with a field of the same name.
0480   ///
0481   /// The two fields must be compatible, i.e:
0482   ///   - have the same name
0483   ///   - have the same type, or of compatible types according to `options`.
0484   ///
0485   /// The metadata of the current field is preserved; the metadata of the other
0486   /// field is discarded.
0487   Result<std::shared_ptr<Field>> MergeWith(
0488       const Field& other, MergeOptions options = MergeOptions::Defaults()) const;
0489   Result<std::shared_ptr<Field>> MergeWith(
0490       const std::shared_ptr<Field>& other,
0491       MergeOptions options = MergeOptions::Defaults()) const;
0492 
0493   FieldVector Flatten() const;
0494 
0495   /// \brief Indicate if fields are equals.
0496   ///
0497   /// \param[in] other field to check equality with.
0498   /// \param[in] check_metadata controls if it should check for metadata
0499   ///            equality.
0500   ///
0501   /// \return true if fields are equal, false otherwise.
0502   bool Equals(const Field& other, bool check_metadata = false) const;
0503   bool Equals(const std::shared_ptr<Field>& other, bool check_metadata = false) const;
0504 
0505   /// \brief Indicate if fields are compatibles.
0506   ///
0507   /// See the criteria of MergeWith.
0508   ///
0509   /// \return true if fields are compatible, false otherwise.
0510   bool IsCompatibleWith(const Field& other) const;
0511   bool IsCompatibleWith(const std::shared_ptr<Field>& other) const;
0512 
0513   /// \brief Return a string representation ot the field
0514   /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
0515   /// print keys and values in the output
0516   std::string ToString(bool show_metadata = false) const;
0517 
0518   /// \brief Return the field name
0519   const std::string& name() const { return name_; }
0520   /// \brief Return the field data type
0521   const std::shared_ptr<DataType>& type() const { return type_; }
0522   /// \brief Return whether the field is nullable
0523   bool nullable() const { return nullable_; }
0524 
0525   std::shared_ptr<Field> Copy() const;
0526 
0527  private:
0528   std::string ComputeFingerprint() const override;
0529   std::string ComputeMetadataFingerprint() const override;
0530 
0531   // Field name
0532   std::string name_;
0533 
0534   // The field's data type
0535   std::shared_ptr<DataType> type_;
0536 
0537   // Fields can be nullable
0538   bool nullable_;
0539 
0540   // The field's metadata, if any
0541   std::shared_ptr<const KeyValueMetadata> metadata_;
0542 
0543   ARROW_DISALLOW_COPY_AND_ASSIGN(Field);
0544 };
0545 
0546 ARROW_EXPORT void PrintTo(const Field& field, std::ostream* os);
0547 
0548 namespace detail {
0549 
0550 template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
0551 class CTypeImpl : public BASE {
0552  public:
0553   static constexpr Type::type type_id = TYPE_ID;
0554   using c_type = C_TYPE;
0555   using PhysicalType = DERIVED;
0556 
0557   CTypeImpl() : BASE(TYPE_ID) {}
0558 
0559   int bit_width() const override { return static_cast<int>(sizeof(C_TYPE) * CHAR_BIT); }
0560 
0561   DataTypeLayout layout() const override {
0562     return DataTypeLayout(
0563         {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(C_TYPE))});
0564   }
0565 
0566   std::string name() const override { return DERIVED::type_name(); }
0567 
0568   std::string ToString(bool show_metadata = false) const override { return this->name(); }
0569 };
0570 
0571 template <typename DERIVED, typename BASE, Type::type TYPE_ID, typename C_TYPE>
0572 constexpr Type::type CTypeImpl<DERIVED, BASE, TYPE_ID, C_TYPE>::type_id;
0573 
0574 template <typename DERIVED, Type::type TYPE_ID, typename C_TYPE>
0575 class IntegerTypeImpl : public detail::CTypeImpl<DERIVED, IntegerType, TYPE_ID, C_TYPE> {
0576   bool is_signed() const override { return std::is_signed<C_TYPE>::value; }
0577 };
0578 
0579 }  // namespace detail
0580 
0581 /// Concrete type class for always-null data
0582 class ARROW_EXPORT NullType : public DataType {
0583  public:
0584   static constexpr Type::type type_id = Type::NA;
0585 
0586   static constexpr const char* type_name() { return "null"; }
0587 
0588   NullType() : DataType(Type::NA) {}
0589 
0590   std::string ToString(bool show_metadata = false) const override;
0591 
0592   DataTypeLayout layout() const override {
0593     return DataTypeLayout({DataTypeLayout::AlwaysNull()});
0594   }
0595 
0596   std::string name() const override { return "null"; }
0597 
0598  protected:
0599   std::string ComputeFingerprint() const override;
0600 };
0601 
0602 /// Concrete type class for boolean data
0603 class ARROW_EXPORT BooleanType
0604     : public detail::CTypeImpl<BooleanType, PrimitiveCType, Type::BOOL, bool> {
0605  public:
0606   static constexpr const char* type_name() { return "bool"; }
0607 
0608   // BooleanType within arrow use a single bit instead of the C 8-bits layout.
0609   int bit_width() const final { return 1; }
0610 
0611   DataTypeLayout layout() const override {
0612     return DataTypeLayout({DataTypeLayout::Bitmap(), DataTypeLayout::Bitmap()});
0613   }
0614 
0615  protected:
0616   std::string ComputeFingerprint() const override;
0617 };
0618 
0619 /// \addtogroup numeric-datatypes
0620 ///
0621 /// @{
0622 
0623 /// Concrete type class for unsigned 8-bit integer data
0624 class ARROW_EXPORT UInt8Type
0625     : public detail::IntegerTypeImpl<UInt8Type, Type::UINT8, uint8_t> {
0626  public:
0627   static constexpr const char* type_name() { return "uint8"; }
0628 
0629  protected:
0630   std::string ComputeFingerprint() const override;
0631 };
0632 
0633 /// Concrete type class for signed 8-bit integer data
0634 class ARROW_EXPORT Int8Type
0635     : public detail::IntegerTypeImpl<Int8Type, Type::INT8, int8_t> {
0636  public:
0637   static constexpr const char* type_name() { return "int8"; }
0638 
0639  protected:
0640   std::string ComputeFingerprint() const override;
0641 };
0642 
0643 /// Concrete type class for unsigned 16-bit integer data
0644 class ARROW_EXPORT UInt16Type
0645     : public detail::IntegerTypeImpl<UInt16Type, Type::UINT16, uint16_t> {
0646  public:
0647   static constexpr const char* type_name() { return "uint16"; }
0648 
0649  protected:
0650   std::string ComputeFingerprint() const override;
0651 };
0652 
0653 /// Concrete type class for signed 16-bit integer data
0654 class ARROW_EXPORT Int16Type
0655     : public detail::IntegerTypeImpl<Int16Type, Type::INT16, int16_t> {
0656  public:
0657   static constexpr const char* type_name() { return "int16"; }
0658 
0659  protected:
0660   std::string ComputeFingerprint() const override;
0661 };
0662 
0663 /// Concrete type class for unsigned 32-bit integer data
0664 class ARROW_EXPORT UInt32Type
0665     : public detail::IntegerTypeImpl<UInt32Type, Type::UINT32, uint32_t> {
0666  public:
0667   static constexpr const char* type_name() { return "uint32"; }
0668 
0669  protected:
0670   std::string ComputeFingerprint() const override;
0671 };
0672 
0673 /// Concrete type class for signed 32-bit integer data
0674 class ARROW_EXPORT Int32Type
0675     : public detail::IntegerTypeImpl<Int32Type, Type::INT32, int32_t> {
0676  public:
0677   static constexpr const char* type_name() { return "int32"; }
0678 
0679  protected:
0680   std::string ComputeFingerprint() const override;
0681 };
0682 
0683 /// Concrete type class for unsigned 64-bit integer data
0684 class ARROW_EXPORT UInt64Type
0685     : public detail::IntegerTypeImpl<UInt64Type, Type::UINT64, uint64_t> {
0686  public:
0687   static constexpr const char* type_name() { return "uint64"; }
0688 
0689  protected:
0690   std::string ComputeFingerprint() const override;
0691 };
0692 
0693 /// Concrete type class for signed 64-bit integer data
0694 class ARROW_EXPORT Int64Type
0695     : public detail::IntegerTypeImpl<Int64Type, Type::INT64, int64_t> {
0696  public:
0697   static constexpr const char* type_name() { return "int64"; }
0698 
0699  protected:
0700   std::string ComputeFingerprint() const override;
0701 };
0702 
0703 /// Concrete type class for 16-bit floating-point data
0704 class ARROW_EXPORT HalfFloatType
0705     : public detail::CTypeImpl<HalfFloatType, FloatingPointType, Type::HALF_FLOAT,
0706                                uint16_t> {
0707  public:
0708   Precision precision() const override;
0709   static constexpr const char* type_name() { return "halffloat"; }
0710 
0711  protected:
0712   std::string ComputeFingerprint() const override;
0713 };
0714 
0715 /// Concrete type class for 32-bit floating-point data (C "float")
0716 class ARROW_EXPORT FloatType
0717     : public detail::CTypeImpl<FloatType, FloatingPointType, Type::FLOAT, float> {
0718  public:
0719   Precision precision() const override;
0720   static constexpr const char* type_name() { return "float"; }
0721 
0722  protected:
0723   std::string ComputeFingerprint() const override;
0724 };
0725 
0726 /// Concrete type class for 64-bit floating-point data (C "double")
0727 class ARROW_EXPORT DoubleType
0728     : public detail::CTypeImpl<DoubleType, FloatingPointType, Type::DOUBLE, double> {
0729  public:
0730   Precision precision() const override;
0731   static constexpr const char* type_name() { return "double"; }
0732 
0733  protected:
0734   std::string ComputeFingerprint() const override;
0735 };
0736 
0737 /// @}
0738 
0739 /// \brief Base class for all variable-size binary data types
0740 class ARROW_EXPORT BaseBinaryType : public DataType {
0741  public:
0742   using DataType::DataType;
0743   // This is only for preventing defining this class in each
0744   // translation unit to avoid one-definition-rule violation.
0745   ~BaseBinaryType() override;
0746 };
0747 
0748 constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
0749 
0750 /// \addtogroup binary-datatypes
0751 ///
0752 /// @{
0753 
0754 /// \brief Concrete type class for variable-size binary data
0755 class ARROW_EXPORT BinaryType : public BaseBinaryType {
0756  public:
0757   static constexpr Type::type type_id = Type::BINARY;
0758   static constexpr bool is_utf8 = false;
0759   using offset_type = int32_t;
0760   using PhysicalType = BinaryType;
0761 
0762   static constexpr const char* type_name() { return "binary"; }
0763 
0764   BinaryType() : BinaryType(Type::BINARY) {}
0765 
0766   DataTypeLayout layout() const override {
0767     return DataTypeLayout({DataTypeLayout::Bitmap(),
0768                            DataTypeLayout::FixedWidth(sizeof(offset_type)),
0769                            DataTypeLayout::VariableWidth()});
0770   }
0771 
0772   std::string ToString(bool show_metadata = false) const override;
0773   std::string name() const override { return "binary"; }
0774 
0775  protected:
0776   std::string ComputeFingerprint() const override;
0777 
0778   // Allow subclasses like StringType to change the logical type.
0779   explicit BinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
0780 };
0781 
0782 /// \brief Concrete type class for variable-size binary view data
0783 class ARROW_EXPORT BinaryViewType : public DataType {
0784  public:
0785   static constexpr Type::type type_id = Type::BINARY_VIEW;
0786   static constexpr bool is_utf8 = false;
0787   using PhysicalType = BinaryViewType;
0788 
0789   static constexpr int kSize = 16;
0790   static constexpr int kInlineSize = 12;
0791   static constexpr int kPrefixSize = 4;
0792 
0793   /// Variable length string or binary with inline optimization for small values (12 bytes
0794   /// or fewer). This is similar to std::string_view except limited in size to INT32_MAX
0795   /// and at least the first four bytes of the string are copied inline (accessible
0796   /// without pointer dereference). This inline prefix allows failing comparisons early.
0797   /// Furthermore when dealing with short strings the CPU cache working set is reduced
0798   /// since many can be inline.
0799   ///
0800   /// This union supports two states:
0801   ///
0802   /// - Entirely inlined string data
0803   ///                |----|--------------|
0804   ///                 ^    ^
0805   ///                 |    |
0806   ///              size    in-line string data, zero padded
0807   ///
0808   /// - Reference into a buffer
0809   ///                |----|----|----|----|
0810   ///                 ^    ^    ^    ^
0811   ///                 |    |    |    |
0812   ///              size    |    |    `------.
0813   ///                  prefix   |           |
0814   ///                        buffer index   |
0815   ///                                  offset in buffer
0816   ///
0817   /// Adapted from TU Munich's UmbraDB [1], Velox, DuckDB.
0818   ///
0819   /// [1]: https://db.in.tum.de/~freitag/papers/p29-neumann-cidr20.pdf
0820   ///
0821   /// Alignment to 64 bits enables an aligned load of the size and prefix into
0822   /// a single 64 bit integer, which is useful to the comparison fast path.
0823   union alignas(int64_t) c_type {
0824     struct {
0825       int32_t size;
0826       std::array<uint8_t, kInlineSize> data;
0827     } inlined;
0828 
0829     struct {
0830       int32_t size;
0831       std::array<uint8_t, kPrefixSize> prefix;
0832       int32_t buffer_index;
0833       int32_t offset;
0834     } ref;
0835 
0836     /// The number of bytes viewed.
0837     int32_t size() const {
0838       // Size is in the common initial subsequence of each member of the union,
0839       // so accessing `inlined.size` is legal even if another member is active.
0840       return inlined.size;
0841     }
0842 
0843     /// True if the view's data is entirely stored inline.
0844     bool is_inline() const { return size() <= kInlineSize; }
0845 
0846     /// Return a pointer to the inline data of a view.
0847     ///
0848     /// For inline views, this points to the entire data of the view.
0849     /// For other views, this points to the 4 byte prefix.
0850     const uint8_t* inline_data() const& {
0851       // Since `ref.prefix` has the same address as `inlined.data`,
0852       // the branch will be trivially optimized out.
0853       return is_inline() ? inlined.data.data() : ref.prefix.data();
0854     }
0855     const uint8_t* inline_data() && = delete;
0856   };
0857   static_assert(sizeof(c_type) == kSize);
0858   static_assert(std::is_trivial_v<c_type>);
0859 
0860   static constexpr const char* type_name() { return "binary_view"; }
0861 
0862   BinaryViewType() : BinaryViewType(Type::BINARY_VIEW) {}
0863 
0864   DataTypeLayout layout() const override {
0865     return DataTypeLayout({DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(kSize)},
0866                           DataTypeLayout::VariableWidth());
0867   }
0868 
0869   std::string ToString(bool show_metadata = false) const override;
0870   std::string name() const override { return "binary_view"; }
0871 
0872  protected:
0873   std::string ComputeFingerprint() const override;
0874 
0875   // Allow subclasses like StringType to change the logical type.
0876   explicit BinaryViewType(Type::type logical_type) : DataType(logical_type) {}
0877 };
0878 
0879 /// \brief Concrete type class for large variable-size binary data
0880 class ARROW_EXPORT LargeBinaryType : public BaseBinaryType {
0881  public:
0882   static constexpr Type::type type_id = Type::LARGE_BINARY;
0883   static constexpr bool is_utf8 = false;
0884   using offset_type = int64_t;
0885   using PhysicalType = LargeBinaryType;
0886 
0887   static constexpr const char* type_name() { return "large_binary"; }
0888 
0889   LargeBinaryType() : LargeBinaryType(Type::LARGE_BINARY) {}
0890 
0891   DataTypeLayout layout() const override {
0892     return DataTypeLayout({DataTypeLayout::Bitmap(),
0893                            DataTypeLayout::FixedWidth(sizeof(offset_type)),
0894                            DataTypeLayout::VariableWidth()});
0895   }
0896 
0897   std::string ToString(bool show_metadata = false) const override;
0898   std::string name() const override { return "large_binary"; }
0899 
0900  protected:
0901   std::string ComputeFingerprint() const override;
0902 
0903   // Allow subclasses like LargeStringType to change the logical type.
0904   explicit LargeBinaryType(Type::type logical_type) : BaseBinaryType(logical_type) {}
0905 };
0906 
0907 /// \brief Concrete type class for variable-size string data, utf8-encoded
0908 class ARROW_EXPORT StringType : public BinaryType {
0909  public:
0910   static constexpr Type::type type_id = Type::STRING;
0911   static constexpr bool is_utf8 = true;
0912   using PhysicalType = BinaryType;
0913 
0914   static constexpr const char* type_name() { return "utf8"; }
0915 
0916   StringType() : BinaryType(Type::STRING) {}
0917 
0918   std::string ToString(bool show_metadata = false) const override;
0919   std::string name() const override { return "utf8"; }
0920 
0921  protected:
0922   std::string ComputeFingerprint() const override;
0923 };
0924 
0925 /// \brief Concrete type class for variable-size string data, utf8-encoded
0926 class ARROW_EXPORT StringViewType : public BinaryViewType {
0927  public:
0928   static constexpr Type::type type_id = Type::STRING_VIEW;
0929   static constexpr bool is_utf8 = true;
0930   using PhysicalType = BinaryViewType;
0931 
0932   static constexpr const char* type_name() { return "utf8_view"; }
0933 
0934   StringViewType() : BinaryViewType(Type::STRING_VIEW) {}
0935 
0936   std::string ToString(bool show_metadata = false) const override;
0937   std::string name() const override { return "utf8_view"; }
0938 
0939  protected:
0940   std::string ComputeFingerprint() const override;
0941 };
0942 
0943 /// \brief Concrete type class for large variable-size string data, utf8-encoded
0944 class ARROW_EXPORT LargeStringType : public LargeBinaryType {
0945  public:
0946   static constexpr Type::type type_id = Type::LARGE_STRING;
0947   static constexpr bool is_utf8 = true;
0948   using PhysicalType = LargeBinaryType;
0949 
0950   static constexpr const char* type_name() { return "large_utf8"; }
0951 
0952   LargeStringType() : LargeBinaryType(Type::LARGE_STRING) {}
0953 
0954   std::string ToString(bool show_metadata = false) const override;
0955   std::string name() const override { return "large_utf8"; }
0956 
0957  protected:
0958   std::string ComputeFingerprint() const override;
0959 };
0960 
0961 /// \brief Concrete type class for fixed-size binary data
0962 class ARROW_EXPORT FixedSizeBinaryType : public FixedWidthType, public ParametricType {
0963  public:
0964   static constexpr Type::type type_id = Type::FIXED_SIZE_BINARY;
0965   static constexpr bool is_utf8 = false;
0966 
0967   static constexpr const char* type_name() { return "fixed_size_binary"; }
0968 
0969   explicit FixedSizeBinaryType(int32_t byte_width)
0970       : FixedWidthType(Type::FIXED_SIZE_BINARY), byte_width_(byte_width) {}
0971   explicit FixedSizeBinaryType(int32_t byte_width, Type::type override_type_id)
0972       : FixedWidthType(override_type_id), byte_width_(byte_width) {}
0973 
0974   std::string ToString(bool show_metadata = false) const override;
0975   std::string name() const override { return "fixed_size_binary"; }
0976 
0977   DataTypeLayout layout() const override {
0978     return DataTypeLayout(
0979         {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(byte_width())});
0980   }
0981 
0982   int byte_width() const override { return byte_width_; }
0983 
0984   int bit_width() const override;
0985 
0986   // Validating constructor
0987   static Result<std::shared_ptr<DataType>> Make(int32_t byte_width);
0988 
0989  protected:
0990   std::string ComputeFingerprint() const override;
0991 
0992   int32_t byte_width_;
0993 };
0994 
0995 /// @}
0996 
0997 /// \addtogroup numeric-datatypes
0998 ///
0999 /// @{
1000 
1001 /// \brief Base type class for (fixed-size) decimal data
1002 class ARROW_EXPORT DecimalType : public FixedSizeBinaryType {
1003  public:
1004   explicit DecimalType(Type::type type_id, int32_t byte_width, int32_t precision,
1005                        int32_t scale)
1006       : FixedSizeBinaryType(byte_width, type_id), precision_(precision), scale_(scale) {}
1007 
1008   /// Constructs concrete decimal types
1009   static Result<std::shared_ptr<DataType>> Make(Type::type type_id, int32_t precision,
1010                                                 int32_t scale);
1011 
1012   int32_t precision() const { return precision_; }
1013   int32_t scale() const { return scale_; }
1014 
1015   /// \brief Returns the number of bytes needed for precision.
1016   ///
1017   /// precision must be >= 1
1018   static int32_t DecimalSize(int32_t precision);
1019 
1020  protected:
1021   std::string ComputeFingerprint() const override;
1022 
1023   int32_t precision_;
1024   int32_t scale_;
1025 };
1026 
1027 /// \brief Concrete type class for 32-bit decimal data
1028 ///
1029 /// Arrow decimals are fixed-point decimal numbers encoded as a scaled
1030 /// integer.  The precision is the number of significant digits that the
1031 /// decimal type can represent; the scale is the number of digits after
1032 /// the decimal point (note the scale can be negative).
1033 ///
1034 /// As an example, `Decimal32Type(7, 3)` can exactly represent the numbers
1035 /// 1234.567 and -1234.567 (encoded internally as the 32-bit integers
1036 /// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
1037 ///
1038 /// Decimal32Type has a maximum precision of 9 significant digits
1039 /// (also available as Decimal32Type::kMaxPrecision).
1040 /// If higher precision is needed, consider using Decimal64Type,
1041 /// Decimal128Type or Decimal256Type.
1042 class ARROW_EXPORT Decimal32Type : public DecimalType {
1043  public:
1044   static constexpr Type::type type_id = Type::DECIMAL32;
1045 
1046   static constexpr const char* type_name() { return "decimal32"; }
1047 
1048   /// Decimal32Type constructor that aborts on invalid input.
1049   explicit Decimal32Type(int32_t precision, int32_t scale);
1050 
1051   /// Decimal32Type constructor that returns an error on invalid input
1052   static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
1053 
1054   std::string ToString(bool show_metadata = false) const override;
1055   std::string name() const override { return "decimal32"; }
1056 
1057   static constexpr int32_t kMinPrecision = 1;
1058   static constexpr int32_t kMaxPrecision = 9;
1059   static constexpr int32_t kByteWidth = 4;
1060 };
1061 
1062 /// \brief Concrete type class for 64-bit decimal data
1063 ///
1064 /// Arrow decimals are fixed-point decimal numbers encoded as a scaled
1065 /// integer.  The precision is the number of significant digits that the
1066 /// decimal type can represent; the scale is the number of digits after
1067 /// the decimal point (note the scale can be negative).
1068 ///
1069 /// As an example, `Decimal64Type(7, 3)` can exactly represent the numbers
1070 /// 1234.567 and -1234.567 (encoded internally as the 64-bit integers
1071 /// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
1072 ///
1073 /// Decimal64Type has a maximum precision of 18 significant digits
1074 /// (also available as Decimal64Type::kMaxPrecision).
1075 /// If higher precision is needed, consider using Decimal128Type or
1076 /// Decimal256Type.
1077 class ARROW_EXPORT Decimal64Type : public DecimalType {
1078  public:
1079   static constexpr Type::type type_id = Type::DECIMAL64;
1080 
1081   static constexpr const char* type_name() { return "decimal64"; }
1082 
1083   /// Decimal32Type constructor that aborts on invalid input.
1084   explicit Decimal64Type(int32_t precision, int32_t scale);
1085 
1086   /// Decimal32Type constructor that returns an error on invalid input
1087   static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
1088 
1089   std::string ToString(bool show_metadata = false) const override;
1090   std::string name() const override { return "decimal64"; }
1091 
1092   static constexpr int32_t kMinPrecision = 1;
1093   static constexpr int32_t kMaxPrecision = 18;
1094   static constexpr int32_t kByteWidth = 8;
1095 };
1096 
1097 /// \brief Concrete type class for 128-bit decimal data
1098 ///
1099 /// Arrow decimals are fixed-point decimal numbers encoded as a scaled
1100 /// integer.  The precision is the number of significant digits that the
1101 /// decimal type can represent; the scale is the number of digits after
1102 /// the decimal point (note the scale can be negative).
1103 ///
1104 /// As an example, `Decimal128Type(7, 3)` can exactly represent the numbers
1105 /// 1234.567 and -1234.567 (encoded internally as the 128-bit integers
1106 /// 1234567 and -1234567, respectively), but neither 12345.67 nor 123.4567.
1107 ///
1108 /// Decimal128Type has a maximum precision of 38 significant digits
1109 /// (also available as Decimal128Type::kMaxPrecision).
1110 /// If higher precision is needed, consider using Decimal256Type.
1111 class ARROW_EXPORT Decimal128Type : public DecimalType {
1112  public:
1113   static constexpr Type::type type_id = Type::DECIMAL128;
1114 
1115   static constexpr const char* type_name() { return "decimal128"; }
1116 
1117   /// Decimal128Type constructor that aborts on invalid input.
1118   explicit Decimal128Type(int32_t precision, int32_t scale);
1119 
1120   /// Decimal128Type constructor that returns an error on invalid input.
1121   static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
1122 
1123   std::string ToString(bool show_metadata = false) const override;
1124   std::string name() const override { return "decimal128"; }
1125 
1126   static constexpr int32_t kMinPrecision = 1;
1127   static constexpr int32_t kMaxPrecision = 38;
1128   static constexpr int32_t kByteWidth = 16;
1129 };
1130 
1131 /// \brief Concrete type class for 256-bit decimal data
1132 ///
1133 /// Arrow decimals are fixed-point decimal numbers encoded as a scaled
1134 /// integer.  The precision is the number of significant digits that the
1135 /// decimal type can represent; the scale is the number of digits after
1136 /// the decimal point (note the scale can be negative).
1137 ///
1138 /// Decimal256Type has a maximum precision of 76 significant digits.
1139 /// (also available as Decimal256Type::kMaxPrecision).
1140 ///
1141 /// For most use cases, the maximum precision offered by Decimal128Type
1142 /// is sufficient, and it will result in a more compact and more efficient
1143 /// encoding.
1144 class ARROW_EXPORT Decimal256Type : public DecimalType {
1145  public:
1146   static constexpr Type::type type_id = Type::DECIMAL256;
1147 
1148   static constexpr const char* type_name() { return "decimal256"; }
1149 
1150   /// Decimal256Type constructor that aborts on invalid input.
1151   explicit Decimal256Type(int32_t precision, int32_t scale);
1152 
1153   /// Decimal256Type constructor that returns an error on invalid input.
1154   static Result<std::shared_ptr<DataType>> Make(int32_t precision, int32_t scale);
1155 
1156   std::string ToString(bool show_metadata = false) const override;
1157   std::string name() const override { return "decimal256"; }
1158 
1159   static constexpr int32_t kMinPrecision = 1;
1160   static constexpr int32_t kMaxPrecision = 76;
1161   static constexpr int32_t kByteWidth = 32;
1162 };
1163 
1164 /// @}
1165 
1166 /// \addtogroup nested-datatypes
1167 ///
1168 /// @{
1169 
1170 /// \brief Base class for all variable-size list data types
1171 class ARROW_EXPORT BaseListType : public NestedType {
1172  public:
1173   using NestedType::NestedType;
1174   // This is only for preventing defining this class in each
1175   // translation unit to avoid one-definition-rule violation.
1176   ~BaseListType() override;
1177   const std::shared_ptr<Field>& value_field() const { return children_[0]; }
1178 
1179   const std::shared_ptr<DataType>& value_type() const { return children_[0]->type(); }
1180 };
1181 
1182 /// \brief Concrete type class for list data
1183 ///
1184 /// List data is nested data where each value is a variable number of
1185 /// child items.  Lists can be recursively nested, for example
1186 /// list(list(int32)).
1187 class ARROW_EXPORT ListType : public BaseListType {
1188  public:
1189   static constexpr Type::type type_id = Type::LIST;
1190   using offset_type = int32_t;
1191 
1192   static constexpr const char* type_name() { return "list"; }
1193 
1194   // List can contain any other logical value type
1195   explicit ListType(std::shared_ptr<DataType> value_type)
1196       : ListType(std::make_shared<Field>("item", std::move(value_type))) {}
1197 
1198   explicit ListType(std::shared_ptr<Field> value_field) : BaseListType(type_id) {
1199     children_ = {std::move(value_field)};
1200   }
1201 
1202   DataTypeLayout layout() const override {
1203     return DataTypeLayout(
1204         {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
1205   }
1206 
1207   std::string ToString(bool show_metadata = false) const override;
1208 
1209   std::string name() const override { return "list"; }
1210 
1211  protected:
1212   std::string ComputeFingerprint() const override;
1213 };
1214 
1215 /// \brief Concrete type class for large list data
1216 ///
1217 /// LargeListType is like ListType but with 64-bit rather than 32-bit offsets.
1218 class ARROW_EXPORT LargeListType : public BaseListType {
1219  public:
1220   static constexpr Type::type type_id = Type::LARGE_LIST;
1221   using offset_type = int64_t;
1222 
1223   static constexpr const char* type_name() { return "large_list"; }
1224 
1225   // List can contain any other logical value type
1226   explicit LargeListType(std::shared_ptr<DataType> value_type)
1227       : LargeListType(std::make_shared<Field>("item", std::move(value_type))) {}
1228 
1229   explicit LargeListType(std::shared_ptr<Field> value_field) : BaseListType(type_id) {
1230     children_ = {std::move(value_field)};
1231   }
1232 
1233   DataTypeLayout layout() const override {
1234     return DataTypeLayout(
1235         {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(sizeof(offset_type))});
1236   }
1237 
1238   std::string ToString(bool show_metadata = false) const override;
1239 
1240   std::string name() const override { return "large_list"; }
1241 
1242  protected:
1243   std::string ComputeFingerprint() const override;
1244 };
1245 
1246 /// \brief Type class for array of list views
1247 class ARROW_EXPORT ListViewType : public BaseListType {
1248  public:
1249   static constexpr Type::type type_id = Type::LIST_VIEW;
1250   using offset_type = int32_t;
1251 
1252   static constexpr const char* type_name() { return "list_view"; }
1253 
1254   // ListView can contain any other logical value type
1255   explicit ListViewType(const std::shared_ptr<DataType>& value_type)
1256       : ListViewType(std::make_shared<Field>("item", value_type)) {}
1257 
1258   explicit ListViewType(const std::shared_ptr<Field>& value_field)
1259       : BaseListType(type_id) {
1260     children_ = {value_field};
1261   }
1262 
1263   DataTypeLayout layout() const override {
1264     return DataTypeLayout({DataTypeLayout::Bitmap(),
1265                            DataTypeLayout::FixedWidth(sizeof(offset_type)),
1266                            DataTypeLayout::FixedWidth(sizeof(offset_type))});
1267   }
1268 
1269   std::string ToString(bool show_metadata = false) const override;
1270 
1271   std::string name() const override { return "list_view"; }
1272 
1273  protected:
1274   std::string ComputeFingerprint() const override;
1275 };
1276 
1277 /// \brief Concrete type class for large list-view data
1278 ///
1279 /// LargeListViewType is like ListViewType but with 64-bit rather than 32-bit offsets and
1280 /// sizes.
1281 class ARROW_EXPORT LargeListViewType : public BaseListType {
1282  public:
1283   static constexpr Type::type type_id = Type::LARGE_LIST_VIEW;
1284   using offset_type = int64_t;
1285 
1286   static constexpr const char* type_name() { return "large_list_view"; }
1287 
1288   // LargeListView can contain any other logical value type
1289   explicit LargeListViewType(const std::shared_ptr<DataType>& value_type)
1290       : LargeListViewType(std::make_shared<Field>("item", value_type)) {}
1291 
1292   explicit LargeListViewType(const std::shared_ptr<Field>& value_field)
1293       : BaseListType(type_id) {
1294     children_ = {value_field};
1295   }
1296 
1297   DataTypeLayout layout() const override {
1298     return DataTypeLayout({DataTypeLayout::Bitmap(),
1299                            DataTypeLayout::FixedWidth(sizeof(offset_type)),
1300                            DataTypeLayout::FixedWidth(sizeof(offset_type))});
1301   }
1302 
1303   std::string ToString(bool show_metadata = false) const override;
1304 
1305   std::string name() const override { return "large_list_view"; }
1306 
1307  protected:
1308   std::string ComputeFingerprint() const override;
1309 };
1310 
1311 /// \brief Concrete type class for map data
1312 ///
1313 /// Map data is nested data where each value is a variable number of
1314 /// key-item pairs.  Its physical representation is the same as
1315 /// a list of `{key, item}` structs.
1316 ///
1317 /// Maps can be recursively nested, for example map(utf8, map(utf8, int32)).
1318 class ARROW_EXPORT MapType : public ListType {
1319  public:
1320   static constexpr Type::type type_id = Type::MAP;
1321 
1322   static constexpr const char* type_name() { return "map"; }
1323 
1324   MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<DataType> item_type,
1325           bool keys_sorted = false);
1326 
1327   MapType(std::shared_ptr<DataType> key_type, std::shared_ptr<Field> item_field,
1328           bool keys_sorted = false);
1329 
1330   MapType(std::shared_ptr<Field> key_field, std::shared_ptr<Field> item_field,
1331           bool keys_sorted = false);
1332 
1333   explicit MapType(std::shared_ptr<Field> value_field, bool keys_sorted = false);
1334 
1335   // Validating constructor
1336   static Result<std::shared_ptr<DataType>> Make(std::shared_ptr<Field> value_field,
1337                                                 bool keys_sorted = false);
1338 
1339   std::shared_ptr<Field> key_field() const { return value_type()->field(0); }
1340   std::shared_ptr<DataType> key_type() const { return key_field()->type(); }
1341 
1342   std::shared_ptr<Field> item_field() const { return value_type()->field(1); }
1343   std::shared_ptr<DataType> item_type() const { return item_field()->type(); }
1344 
1345   std::string ToString(bool show_metadata = false) const override;
1346 
1347   std::string name() const override { return "map"; }
1348 
1349   bool keys_sorted() const { return keys_sorted_; }
1350 
1351  private:
1352   std::string ComputeFingerprint() const override;
1353 
1354   bool keys_sorted_;
1355 };
1356 
1357 /// \brief Concrete type class for fixed size list data
1358 class ARROW_EXPORT FixedSizeListType : public BaseListType {
1359  public:
1360   static constexpr Type::type type_id = Type::FIXED_SIZE_LIST;
1361   // While the individual item size is 32-bit, the overall data size
1362   // (item size * list length) may not fit in a 32-bit int.
1363   using offset_type = int64_t;
1364 
1365   static constexpr const char* type_name() { return "fixed_size_list"; }
1366 
1367   // List can contain any other logical value type
1368   FixedSizeListType(std::shared_ptr<DataType> value_type, int32_t list_size)
1369       : FixedSizeListType(std::make_shared<Field>("item", std::move(value_type)),
1370                           list_size) {}
1371 
1372   FixedSizeListType(std::shared_ptr<Field> value_field, int32_t list_size)
1373       : BaseListType(type_id), list_size_(list_size) {
1374     children_ = {std::move(value_field)};
1375   }
1376 
1377   DataTypeLayout layout() const override {
1378     return DataTypeLayout({DataTypeLayout::Bitmap()});
1379   }
1380 
1381   std::string ToString(bool show_metadata = false) const override;
1382 
1383   std::string name() const override { return "fixed_size_list"; }
1384 
1385   int32_t list_size() const { return list_size_; }
1386 
1387  protected:
1388   std::string ComputeFingerprint() const override;
1389 
1390   int32_t list_size_;
1391 };
1392 
1393 /// \brief Concrete type class for struct data
1394 class ARROW_EXPORT StructType : public NestedType {
1395  public:
1396   static constexpr Type::type type_id = Type::STRUCT;
1397 
1398   static constexpr const char* type_name() { return "struct"; }
1399 
1400   explicit StructType(const FieldVector& fields);
1401 
1402   ~StructType() override;
1403 
1404   DataTypeLayout layout() const override {
1405     return DataTypeLayout({DataTypeLayout::Bitmap()});
1406   }
1407 
1408   std::string ToString(bool show_metadata = false) const override;
1409   std::string name() const override { return "struct"; }
1410 
1411   /// Returns null if name not found
1412   std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
1413 
1414   /// Return all fields having this name
1415   FieldVector GetAllFieldsByName(const std::string& name) const;
1416 
1417   /// Returns -1 if name not found or if there are multiple fields having the
1418   /// same name
1419   int GetFieldIndex(const std::string& name) const;
1420 
1421   /// \brief Return the indices of all fields having this name in sorted order
1422   std::vector<int> GetAllFieldIndices(const std::string& name) const;
1423 
1424   /// \brief Create a new StructType with field added at given index
1425   Result<std::shared_ptr<StructType>> AddField(int i,
1426                                                const std::shared_ptr<Field>& field) const;
1427   /// \brief Create a new StructType by removing the field at given index
1428   Result<std::shared_ptr<StructType>> RemoveField(int i) const;
1429   /// \brief Create a new StructType by changing the field at given index
1430   Result<std::shared_ptr<StructType>> SetField(int i,
1431                                                const std::shared_ptr<Field>& field) const;
1432 
1433  private:
1434   std::string ComputeFingerprint() const override;
1435 
1436   class Impl;
1437   std::unique_ptr<Impl> impl_;
1438 };
1439 
1440 /// \brief Base type class for union data
1441 class ARROW_EXPORT UnionType : public NestedType {
1442  public:
1443   static constexpr int8_t kMaxTypeCode = 127;
1444   static constexpr int kInvalidChildId = -1;
1445 
1446   static Result<std::shared_ptr<DataType>> Make(
1447       const FieldVector& fields, const std::vector<int8_t>& type_codes,
1448       UnionMode::type mode = UnionMode::SPARSE) {
1449     if (mode == UnionMode::SPARSE) {
1450       return sparse_union(fields, type_codes);
1451     } else {
1452       return dense_union(fields, type_codes);
1453     }
1454   }
1455 
1456   DataTypeLayout layout() const override;
1457 
1458   std::string ToString(bool show_metadata = false) const override;
1459 
1460   /// The array of logical type ids.
1461   ///
1462   /// For example, the first type in the union might be denoted by the id 5
1463   /// (instead of 0).
1464   const std::vector<int8_t>& type_codes() const { return type_codes_; }
1465 
1466   /// An array mapping logical type ids to physical child ids.
1467   const std::vector<int>& child_ids() const { return child_ids_; }
1468 
1469   uint8_t max_type_code() const;
1470 
1471   UnionMode::type mode() const;
1472 
1473  protected:
1474   UnionType(FieldVector fields, std::vector<int8_t> type_codes, Type::type id);
1475 
1476   static Status ValidateParameters(const FieldVector& fields,
1477                                    const std::vector<int8_t>& type_codes,
1478                                    UnionMode::type mode);
1479 
1480  private:
1481   std::string ComputeFingerprint() const override;
1482 
1483   std::vector<int8_t> type_codes_;
1484   std::vector<int> child_ids_;
1485 };
1486 
1487 /// \brief Concrete type class for sparse union data
1488 ///
1489 /// A sparse union is a nested type where each logical value is taken from
1490 /// a single child.  A buffer of 8-bit type ids indicates which child
1491 /// a given logical value is to be taken from.
1492 ///
1493 /// In a sparse union, each child array should have the same length as the
1494 /// union array, regardless of the actual number of union values that
1495 /// refer to it.
1496 ///
1497 /// Note that, unlike most other types, unions don't have a top-level validity bitmap.
1498 class ARROW_EXPORT SparseUnionType : public UnionType {
1499  public:
1500   static constexpr Type::type type_id = Type::SPARSE_UNION;
1501 
1502   static constexpr const char* type_name() { return "sparse_union"; }
1503 
1504   SparseUnionType(FieldVector fields, std::vector<int8_t> type_codes);
1505 
1506   // A constructor variant that validates input parameters
1507   static Result<std::shared_ptr<DataType>> Make(FieldVector fields,
1508                                                 std::vector<int8_t> type_codes);
1509 
1510   std::string name() const override { return "sparse_union"; }
1511 };
1512 
1513 /// \brief Concrete type class for dense union data
1514 ///
1515 /// A dense union is a nested type where each logical value is taken from
1516 /// a single child, at a specific offset.  A buffer of 8-bit type ids
1517 /// indicates which child a given logical value is to be taken from,
1518 /// and a buffer of 32-bit offsets indicates at which physical position
1519 /// in the given child array the logical value is to be taken from.
1520 ///
1521 /// Unlike a sparse union, a dense union allows encoding only the child array
1522 /// values which are actually referred to by the union array.  This is
1523 /// counterbalanced by the additional footprint of the offsets buffer, and
1524 /// the additional indirection cost when looking up values.
1525 ///
1526 /// Note that, unlike most other types, unions don't have a top-level validity bitmap.
1527 class ARROW_EXPORT DenseUnionType : public UnionType {
1528  public:
1529   static constexpr Type::type type_id = Type::DENSE_UNION;
1530 
1531   static constexpr const char* type_name() { return "dense_union"; }
1532 
1533   DenseUnionType(FieldVector fields, std::vector<int8_t> type_codes);
1534 
1535   // A constructor variant that validates input parameters
1536   static Result<std::shared_ptr<DataType>> Make(FieldVector fields,
1537                                                 std::vector<int8_t> type_codes);
1538 
1539   std::string name() const override { return "dense_union"; }
1540 };
1541 
1542 /// \brief Type class for run-end encoded data
1543 class ARROW_EXPORT RunEndEncodedType : public NestedType {
1544  public:
1545   static constexpr Type::type type_id = Type::RUN_END_ENCODED;
1546 
1547   static constexpr const char* type_name() { return "run_end_encoded"; }
1548 
1549   explicit RunEndEncodedType(std::shared_ptr<DataType> run_end_type,
1550                              std::shared_ptr<DataType> value_type);
1551   ~RunEndEncodedType() override;
1552 
1553   DataTypeLayout layout() const override {
1554     // A lot of existing code expects at least one buffer
1555     return DataTypeLayout({DataTypeLayout::AlwaysNull()});
1556   }
1557 
1558   const std::shared_ptr<DataType>& run_end_type() const { return fields()[0]->type(); }
1559   const std::shared_ptr<DataType>& value_type() const { return fields()[1]->type(); }
1560 
1561   std::string ToString(bool show_metadata = false) const override;
1562 
1563   std::string name() const override { return "run_end_encoded"; }
1564 
1565   static bool RunEndTypeValid(const DataType& run_end_type);
1566 
1567  private:
1568   std::string ComputeFingerprint() const override;
1569 };
1570 
1571 /// @}
1572 
1573 // ----------------------------------------------------------------------
1574 // Date and time types
1575 
1576 /// \addtogroup temporal-datatypes
1577 ///
1578 /// @{
1579 
1580 /// \brief Base type for all date and time types
1581 class ARROW_EXPORT TemporalType : public FixedWidthType {
1582  public:
1583   using FixedWidthType::FixedWidthType;
1584   // This is only for preventing defining this class in each
1585   // translation unit to avoid one-definition-rule violation.
1586   ~TemporalType() override;
1587 
1588   DataTypeLayout layout() const override {
1589     return DataTypeLayout(
1590         {DataTypeLayout::Bitmap(), DataTypeLayout::FixedWidth(bit_width() / 8)});
1591   }
1592 };
1593 
1594 /// \brief Base type class for date data
1595 class ARROW_EXPORT DateType : public TemporalType {
1596  public:
1597   virtual DateUnit unit() const = 0;
1598 
1599  protected:
1600   explicit DateType(Type::type type_id);
1601 };
1602 
1603 /// Concrete type class for 32-bit date data (as number of days since UNIX epoch)
1604 class ARROW_EXPORT Date32Type : public DateType {
1605  public:
1606   static constexpr Type::type type_id = Type::DATE32;
1607   static constexpr DateUnit UNIT = DateUnit::DAY;
1608   using c_type = int32_t;
1609   using PhysicalType = Int32Type;
1610 
1611   static constexpr const char* type_name() { return "date32"; }
1612 
1613   Date32Type();
1614 
1615   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
1616 
1617   std::string ToString(bool show_metadata = false) const override;
1618 
1619   std::string name() const override { return "date32"; }
1620   DateUnit unit() const override { return UNIT; }
1621 
1622  protected:
1623   std::string ComputeFingerprint() const override;
1624 };
1625 
1626 /// Concrete type class for 64-bit date data (as number of milliseconds since UNIX epoch)
1627 class ARROW_EXPORT Date64Type : public DateType {
1628  public:
1629   static constexpr Type::type type_id = Type::DATE64;
1630   static constexpr DateUnit UNIT = DateUnit::MILLI;
1631   using c_type = int64_t;
1632   using PhysicalType = Int64Type;
1633 
1634   static constexpr const char* type_name() { return "date64"; }
1635 
1636   Date64Type();
1637 
1638   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
1639 
1640   std::string ToString(bool show_metadata = false) const override;
1641 
1642   std::string name() const override { return "date64"; }
1643   DateUnit unit() const override { return UNIT; }
1644 
1645  protected:
1646   std::string ComputeFingerprint() const override;
1647 };
1648 
1649 ARROW_EXPORT
1650 std::ostream& operator<<(std::ostream& os, TimeUnit::type unit);
1651 
1652 /// Base type class for time data
1653 class ARROW_EXPORT TimeType : public TemporalType, public ParametricType {
1654  public:
1655   TimeUnit::type unit() const { return unit_; }
1656 
1657  protected:
1658   TimeType(Type::type type_id, TimeUnit::type unit);
1659   std::string ComputeFingerprint() const override;
1660 
1661   TimeUnit::type unit_;
1662 };
1663 
1664 /// Concrete type class for 32-bit time data (as number of seconds or milliseconds
1665 /// since midnight)
1666 class ARROW_EXPORT Time32Type : public TimeType {
1667  public:
1668   static constexpr Type::type type_id = Type::TIME32;
1669   using c_type = int32_t;
1670   using PhysicalType = Int32Type;
1671 
1672   static constexpr const char* type_name() { return "time32"; }
1673 
1674   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
1675 
1676   explicit Time32Type(TimeUnit::type unit = TimeUnit::MILLI);
1677 
1678   std::string ToString(bool show_metadata = false) const override;
1679 
1680   std::string name() const override { return "time32"; }
1681 };
1682 
1683 /// Concrete type class for 64-bit time data (as number of microseconds or nanoseconds
1684 /// since midnight)
1685 class ARROW_EXPORT Time64Type : public TimeType {
1686  public:
1687   static constexpr Type::type type_id = Type::TIME64;
1688   using c_type = int64_t;
1689   using PhysicalType = Int64Type;
1690 
1691   static constexpr const char* type_name() { return "time64"; }
1692 
1693   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
1694 
1695   explicit Time64Type(TimeUnit::type unit = TimeUnit::NANO);
1696 
1697   std::string ToString(bool show_metadata = false) const override;
1698 
1699   std::string name() const override { return "time64"; }
1700 };
1701 
1702 /// \brief Concrete type class for datetime data (as number of seconds, milliseconds,
1703 /// microseconds or nanoseconds since UNIX epoch)
1704 ///
1705 /// If supplied, the timezone string should take either the form (i) "Area/Location",
1706 /// with values drawn from the names in the IANA Time Zone Database (such as
1707 /// "Europe/Zurich"); or (ii) "(+|-)HH:MM" indicating an absolute offset from GMT
1708 /// (such as "-08:00").  To indicate a native UTC timestamp, one of the strings "UTC",
1709 /// "Etc/UTC" or "+00:00" should be used.
1710 ///
1711 /// If any non-empty string is supplied as the timezone for a TimestampType, then the
1712 /// Arrow field containing that timestamp type (and by extension the column associated
1713 /// with such a field) is considered "timezone-aware".  The integer arrays that comprise
1714 /// a timezone-aware column must contain UTC normalized datetime values, regardless of
1715 /// the contents of their timezone string.  More precisely, (i) the producer of a
1716 /// timezone-aware column must populate its constituent arrays with valid UTC values
1717 /// (performing offset conversions from non-UTC values if necessary); and (ii) the
1718 /// consumer of a timezone-aware column may assume that the column's values are directly
1719 /// comparable (that is, with no offset adjustment required) to the values of any other
1720 /// timezone-aware column or to any other valid UTC datetime value (provided all values
1721 /// are expressed in the same units).
1722 ///
1723 /// If a TimestampType is constructed without a timezone (or, equivalently, if the
1724 /// timezone supplied is an empty string) then the resulting Arrow field (column) is
1725 /// considered "timezone-naive".  The producer of a timezone-naive column may populate
1726 /// its constituent integer arrays with datetime values from any timezone; the consumer
1727 /// of a timezone-naive column should make no assumptions about the interoperability or
1728 /// comparability of the values of such a column with those of any other timestamp
1729 /// column or datetime value.
1730 ///
1731 /// If a timezone-aware field contains a recognized timezone, its values may be
1732 /// localized to that locale upon display; the values of timezone-naive fields must
1733 /// always be displayed "as is", with no localization performed on them.
1734 class ARROW_EXPORT TimestampType : public TemporalType, public ParametricType {
1735  public:
1736   using Unit = TimeUnit;
1737 
1738   static constexpr Type::type type_id = Type::TIMESTAMP;
1739   using c_type = int64_t;
1740   using PhysicalType = Int64Type;
1741 
1742   static constexpr const char* type_name() { return "timestamp"; }
1743 
1744   int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
1745 
1746   explicit TimestampType(TimeUnit::type unit = TimeUnit::MILLI)
1747       : TemporalType(Type::TIMESTAMP), unit_(unit) {}
1748 
1749   explicit TimestampType(TimeUnit::type unit, const std::string& timezone)
1750       : TemporalType(Type::TIMESTAMP), unit_(unit), timezone_(timezone) {}
1751 
1752   std::string ToString(bool show_metadata = false) const override;
1753   std::string name() const override { return "timestamp"; }
1754 
1755   TimeUnit::type unit() const { return unit_; }
1756   const std::string& timezone() const { return timezone_; }
1757 
1758  protected:
1759   std::string ComputeFingerprint() const override;
1760 
1761  private:
1762   TimeUnit::type unit_;
1763   std::string timezone_;
1764 };
1765 
1766 // Base class for the different kinds of calendar intervals.
1767 class ARROW_EXPORT IntervalType : public TemporalType, public ParametricType {
1768  public:
1769   enum type { MONTHS, DAY_TIME, MONTH_DAY_NANO };
1770 
1771   virtual type interval_type() const = 0;
1772 
1773  protected:
1774   explicit IntervalType(Type::type subtype) : TemporalType(subtype) {}
1775   std::string ComputeFingerprint() const override;
1776 };
1777 
1778 /// \brief Represents a number of months.
1779 ///
1780 /// Type representing a number of months.  Corresponds to YearMonth type
1781 /// in Schema.fbs (years are defined as 12 months).
1782 class ARROW_EXPORT MonthIntervalType : public IntervalType {
1783  public:
1784   static constexpr Type::type type_id = Type::INTERVAL_MONTHS;
1785   using c_type = int32_t;
1786   using PhysicalType = Int32Type;
1787 
1788   static constexpr const char* type_name() { return "month_interval"; }
1789 
1790   IntervalType::type interval_type() const override { return IntervalType::MONTHS; }
1791 
1792   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
1793 
1794   MonthIntervalType() : IntervalType(type_id) {}
1795 
1796   std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override {
1797     return name();
1798   }
1799   std::string name() const override { return "month_interval"; }
1800 };
1801 
1802 /// \brief Represents a number of days and milliseconds (fraction of day).
1803 class ARROW_EXPORT DayTimeIntervalType : public IntervalType {
1804  public:
1805   struct DayMilliseconds {
1806     int32_t days = 0;
1807     int32_t milliseconds = 0;
1808     constexpr DayMilliseconds() = default;
1809     constexpr DayMilliseconds(int32_t days, int32_t milliseconds)
1810         : days(days), milliseconds(milliseconds) {}
1811     bool operator==(DayMilliseconds other) const {
1812       return this->days == other.days && this->milliseconds == other.milliseconds;
1813     }
1814     bool operator!=(DayMilliseconds other) const { return !(*this == other); }
1815     bool operator<(DayMilliseconds other) const {
1816       return this->days < other.days || this->milliseconds < other.milliseconds;
1817     }
1818   };
1819   using c_type = DayMilliseconds;
1820   using PhysicalType = DayTimeIntervalType;
1821 
1822   static_assert(sizeof(DayMilliseconds) == 8,
1823                 "DayMilliseconds struct assumed to be of size 8 bytes");
1824   static constexpr Type::type type_id = Type::INTERVAL_DAY_TIME;
1825 
1826   static constexpr const char* type_name() { return "day_time_interval"; }
1827 
1828   IntervalType::type interval_type() const override { return IntervalType::DAY_TIME; }
1829 
1830   DayTimeIntervalType() : IntervalType(type_id) {}
1831 
1832   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
1833 
1834   std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override {
1835     return name();
1836   }
1837   std::string name() const override { return "day_time_interval"; }
1838 };
1839 
1840 ARROW_EXPORT
1841 std::ostream& operator<<(std::ostream& os, DayTimeIntervalType::DayMilliseconds interval);
1842 
1843 /// \brief Represents a number of months, days and nanoseconds between
1844 /// two dates.
1845 ///
1846 /// All fields are independent from one another.
1847 class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType {
1848  public:
1849   struct MonthDayNanos {
1850     int32_t months;
1851     int32_t days;
1852     int64_t nanoseconds;
1853     bool operator==(MonthDayNanos other) const {
1854       return this->months == other.months && this->days == other.days &&
1855              this->nanoseconds == other.nanoseconds;
1856     }
1857     bool operator!=(MonthDayNanos other) const { return !(*this == other); }
1858   };
1859   using c_type = MonthDayNanos;
1860   using PhysicalType = MonthDayNanoIntervalType;
1861 
1862   static_assert(sizeof(MonthDayNanos) == 16,
1863                 "MonthDayNanos struct assumed to be of size 16 bytes");
1864   static constexpr Type::type type_id = Type::INTERVAL_MONTH_DAY_NANO;
1865 
1866   static constexpr const char* type_name() { return "month_day_nano_interval"; }
1867 
1868   IntervalType::type interval_type() const override {
1869     return IntervalType::MONTH_DAY_NANO;
1870   }
1871 
1872   MonthDayNanoIntervalType() : IntervalType(type_id) {}
1873 
1874   int bit_width() const override { return static_cast<int>(sizeof(c_type) * CHAR_BIT); }
1875 
1876   std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override {
1877     return name();
1878   }
1879   std::string name() const override { return "month_day_nano_interval"; }
1880 };
1881 
1882 ARROW_EXPORT
1883 std::ostream& operator<<(std::ostream& os,
1884                          MonthDayNanoIntervalType::MonthDayNanos interval);
1885 
1886 /// \brief Represents an elapsed time without any relation to a calendar artifact.
1887 class ARROW_EXPORT DurationType : public TemporalType, public ParametricType {
1888  public:
1889   using Unit = TimeUnit;
1890 
1891   static constexpr Type::type type_id = Type::DURATION;
1892   using c_type = int64_t;
1893   using PhysicalType = Int64Type;
1894 
1895   static constexpr const char* type_name() { return "duration"; }
1896 
1897   int bit_width() const override { return static_cast<int>(sizeof(int64_t) * CHAR_BIT); }
1898 
1899   explicit DurationType(TimeUnit::type unit = TimeUnit::MILLI)
1900       : TemporalType(Type::DURATION), unit_(unit) {}
1901 
1902   std::string ToString(bool show_metadata = false) const override;
1903   std::string name() const override { return "duration"; }
1904 
1905   TimeUnit::type unit() const { return unit_; }
1906 
1907  protected:
1908   std::string ComputeFingerprint() const override;
1909 
1910  private:
1911   TimeUnit::type unit_;
1912 };
1913 
1914 /// @}
1915 
1916 // ----------------------------------------------------------------------
1917 // Dictionary type (for representing categorical or dictionary-encoded
1918 // in memory)
1919 
1920 /// \brief Dictionary-encoded value type with data-dependent
1921 /// dictionary. Indices are represented by any integer types.
1922 class ARROW_EXPORT DictionaryType : public FixedWidthType {
1923  public:
1924   static constexpr Type::type type_id = Type::DICTIONARY;
1925 
1926   static constexpr const char* type_name() { return "dictionary"; }
1927 
1928   DictionaryType(const std::shared_ptr<DataType>& index_type,
1929                  const std::shared_ptr<DataType>& value_type, bool ordered = false);
1930 
1931   // A constructor variant that validates its input parameters
1932   static Result<std::shared_ptr<DataType>> Make(
1933       const std::shared_ptr<DataType>& index_type,
1934       const std::shared_ptr<DataType>& value_type, bool ordered = false);
1935 
1936   std::string ToString(bool show_metadata = false) const override;
1937   std::string name() const override { return "dictionary"; }
1938 
1939   int bit_width() const override;
1940 
1941   DataTypeLayout layout() const override;
1942 
1943   const std::shared_ptr<DataType>& index_type() const { return index_type_; }
1944   const std::shared_ptr<DataType>& value_type() const { return value_type_; }
1945 
1946   bool ordered() const { return ordered_; }
1947 
1948  protected:
1949   static Status ValidateParameters(const DataType& index_type,
1950                                    const DataType& value_type);
1951 
1952   std::string ComputeFingerprint() const override;
1953 
1954   // Must be an integer type (not currently checked)
1955   std::shared_ptr<DataType> index_type_;
1956   std::shared_ptr<DataType> value_type_;
1957   bool ordered_;
1958 };
1959 
1960 // ----------------------------------------------------------------------
1961 // FieldRef
1962 
1963 /// \class FieldPath
1964 ///
1965 /// Represents a path to a nested field using indices of child fields.
1966 /// For example, given indices {5, 9, 3} the field would be retrieved with
1967 /// schema->field(5)->type()->field(9)->type()->field(3)
1968 ///
1969 /// Attempting to retrieve a child field using a FieldPath which is not valid for
1970 /// a given schema will raise an error. Invalid FieldPaths include:
1971 /// - an index is out of range
1972 /// - the path is empty (note: a default constructed FieldPath will be empty)
1973 ///
1974 /// FieldPaths provide a number of accessors for drilling down to potentially nested
1975 /// children. They are overloaded for convenience to support Schema (returns a field),
1976 /// DataType (returns a child field), Field (returns a child field of this field's type)
1977 /// Array (returns a child array), RecordBatch (returns a column).
1978 class ARROW_EXPORT FieldPath {
1979  public:
1980   FieldPath() = default;
1981 
1982   FieldPath(std::vector<int> indices)  // NOLINT runtime/explicit
1983       : indices_(std::move(indices)) {}
1984 
1985   FieldPath(std::initializer_list<int> indices)  // NOLINT runtime/explicit
1986       : indices_(std::move(indices)) {}
1987 
1988   std::string ToString() const;
1989 
1990   size_t hash() const;
1991   struct Hash {
1992     size_t operator()(const FieldPath& path) const { return path.hash(); }
1993   };
1994 
1995   bool empty() const { return indices_.empty(); }
1996   bool operator==(const FieldPath& other) const { return indices() == other.indices(); }
1997   bool operator!=(const FieldPath& other) const { return indices() != other.indices(); }
1998 
1999   const std::vector<int>& indices() const { return indices_; }
2000   int operator[](size_t i) const { return indices_[i]; }
2001   std::vector<int>::const_iterator begin() const { return indices_.begin(); }
2002   std::vector<int>::const_iterator end() const { return indices_.end(); }
2003 
2004   /// \brief Retrieve the referenced child Field from a Schema, Field, or DataType
2005   Result<std::shared_ptr<Field>> Get(const Schema& schema) const;
2006   Result<std::shared_ptr<Field>> Get(const Field& field) const;
2007   Result<std::shared_ptr<Field>> Get(const DataType& type) const;
2008   Result<std::shared_ptr<Field>> Get(const FieldVector& fields) const;
2009 
2010   static Result<std::shared_ptr<Schema>> GetAll(const Schema& schema,
2011                                                 const std::vector<FieldPath>& paths);
2012 
2013   /// \brief Retrieve the referenced column from a RecordBatch or Table
2014   Result<std::shared_ptr<Array>> Get(const RecordBatch& batch) const;
2015   Result<std::shared_ptr<ChunkedArray>> Get(const Table& table) const;
2016 
2017   /// \brief Retrieve the referenced child from an Array or ArrayData
2018   Result<std::shared_ptr<Array>> Get(const Array& array) const;
2019   Result<std::shared_ptr<ArrayData>> Get(const ArrayData& data) const;
2020 
2021   /// \brief Retrieve the referenced child from a ChunkedArray
2022   Result<std::shared_ptr<ChunkedArray>> Get(const ChunkedArray& chunked_array) const;
2023 
2024   /// \brief Retrieve the referenced child/column from an Array, ArrayData, ChunkedArray,
2025   /// RecordBatch, or Table
2026   ///
2027   /// Unlike `FieldPath::Get`, these variants are not zero-copy and the retrieved child's
2028   /// null bitmap is ANDed with its ancestors'
2029   Result<std::shared_ptr<Array>> GetFlattened(const Array& array,
2030                                               MemoryPool* pool = NULLPTR) const;
2031   Result<std::shared_ptr<ArrayData>> GetFlattened(const ArrayData& data,
2032                                                   MemoryPool* pool = NULLPTR) const;
2033   Result<std::shared_ptr<ChunkedArray>> GetFlattened(const ChunkedArray& chunked_array,
2034                                                      MemoryPool* pool = NULLPTR) const;
2035   Result<std::shared_ptr<Array>> GetFlattened(const RecordBatch& batch,
2036                                               MemoryPool* pool = NULLPTR) const;
2037   Result<std::shared_ptr<ChunkedArray>> GetFlattened(const Table& table,
2038                                                      MemoryPool* pool = NULLPTR) const;
2039 
2040  private:
2041   std::vector<int> indices_;
2042 };
2043 
2044 /// \class FieldRef
2045 /// \brief Descriptor of a (potentially nested) field within a schema.
2046 ///
2047 /// Unlike FieldPath (which exclusively uses indices of child fields), FieldRef may
2048 /// reference a field by name. It is intended to replace parameters like `int field_index`
2049 /// and `const std::string& field_name`; it can be implicitly constructed from either a
2050 /// field index or a name.
2051 ///
2052 /// Nested fields can be referenced as well. Given
2053 ///     schema({field("a", struct_({field("n", null())})), field("b", int32())})
2054 ///
2055 /// the following all indicate the nested field named "n":
2056 ///     FieldRef ref1(0, 0);
2057 ///     FieldRef ref2("a", 0);
2058 ///     FieldRef ref3("a", "n");
2059 ///     FieldRef ref4(0, "n");
2060 ///     ARROW_ASSIGN_OR_RAISE(FieldRef ref5,
2061 ///                           FieldRef::FromDotPath(".a[0]"));
2062 ///
2063 /// FieldPaths matching a FieldRef are retrieved using the member function FindAll.
2064 /// Multiple matches are possible because field names may be duplicated within a schema.
2065 /// For example:
2066 ///     Schema a_is_ambiguous({field("a", int32()), field("a", float32())});
2067 ///     auto matches = FieldRef("a").FindAll(a_is_ambiguous);
2068 ///     assert(matches.size() == 2);
2069 ///     assert(matches[0].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(0)));
2070 ///     assert(matches[1].Get(a_is_ambiguous)->Equals(a_is_ambiguous.field(1)));
2071 ///
2072 /// Convenience accessors are available which raise a helpful error if the field is not
2073 /// found or ambiguous, and for immediately calling FieldPath::Get to retrieve any
2074 /// matching children:
2075 ///     auto maybe_match = FieldRef("struct", "field_i32").FindOneOrNone(schema);
2076 ///     auto maybe_column = FieldRef("struct", "field_i32").GetOne(some_table);
2077 class ARROW_EXPORT FieldRef : public util::EqualityComparable<FieldRef> {
2078  public:
2079   FieldRef() = default;
2080 
2081   /// Construct a FieldRef using a string of indices. The reference will be retrieved as:
2082   /// schema.fields[self.indices[0]].type.fields[self.indices[1]] ...
2083   ///
2084   /// Empty indices are not valid.
2085   FieldRef(FieldPath indices);  // NOLINT runtime/explicit
2086 
2087   /// Construct a by-name FieldRef. Multiple fields may match a by-name FieldRef:
2088   /// [f for f in schema.fields where f.name == self.name]
2089   FieldRef(std::string name) : impl_(std::move(name)) {}    // NOLINT runtime/explicit
2090   FieldRef(const char* name) : impl_(std::string(name)) {}  // NOLINT runtime/explicit
2091 
2092   /// Equivalent to a single index string of indices.
2093   FieldRef(int index) : impl_(FieldPath({index})) {}  // NOLINT runtime/explicit
2094 
2095   /// Construct a nested FieldRef.
2096   explicit FieldRef(std::vector<FieldRef> refs) { Flatten(std::move(refs)); }
2097 
2098   /// Convenience constructor for nested FieldRefs: each argument will be used to
2099   /// construct a FieldRef
2100   template <typename A0, typename A1, typename... A>
2101   FieldRef(A0&& a0, A1&& a1, A&&... a) {
2102     Flatten({// cpplint thinks the following are constructor decls
2103              FieldRef(std::forward<A0>(a0)),     // NOLINT runtime/explicit
2104              FieldRef(std::forward<A1>(a1)),     // NOLINT runtime/explicit
2105              FieldRef(std::forward<A>(a))...});  // NOLINT runtime/explicit
2106   }
2107 
2108   /// Parse a dot path into a FieldRef.
2109   ///
2110   /// dot_path = '.' name
2111   ///          | '[' digit+ ']'
2112   ///          | dot_path+
2113   ///
2114   /// Examples:
2115   ///   ".alpha" => FieldRef("alpha")
2116   ///   "[2]" => FieldRef(2)
2117   ///   ".beta[3]" => FieldRef("beta", 3)
2118   ///   "[5].gamma.delta[7]" => FieldRef(5, "gamma", "delta", 7)
2119   ///   ".hello world" => FieldRef("hello world")
2120   ///   R"(.\[y\]\\tho\.\)" => FieldRef(R"([y]\tho.\)")
2121   ///
2122   /// Note: When parsing a name, a '\' preceding any other character will be dropped from
2123   /// the resulting name. Therefore if a name must contain the characters '.', '\', or '['
2124   /// those must be escaped with a preceding '\'.
2125   static Result<FieldRef> FromDotPath(const std::string& dot_path);
2126   std::string ToDotPath() const;
2127 
2128   bool Equals(const FieldRef& other) const { return impl_ == other.impl_; }
2129 
2130   std::string ToString() const;
2131 
2132   size_t hash() const;
2133   struct Hash {
2134     size_t operator()(const FieldRef& ref) const { return ref.hash(); }
2135   };
2136 
2137   explicit operator bool() const { return Equals(FieldPath{}); }
2138   bool operator!() const { return !Equals(FieldPath{}); }
2139 
2140   bool IsFieldPath() const { return std::holds_alternative<FieldPath>(impl_); }
2141   bool IsName() const { return std::holds_alternative<std::string>(impl_); }
2142   bool IsNested() const {
2143     if (IsName()) return false;
2144     if (IsFieldPath()) return std::get<FieldPath>(impl_).indices().size() > 1;
2145     return true;
2146   }
2147 
2148   /// \brief Return true if this ref is a name or a nested sequence of only names
2149   ///
2150   /// Useful for determining if iteration is possible without recursion or inner loops
2151   bool IsNameSequence() const {
2152     if (IsName()) return true;
2153     if (const auto* nested = nested_refs()) {
2154       for (const auto& ref : *nested) {
2155         if (!ref.IsName()) return false;
2156       }
2157       return !nested->empty();
2158     }
2159     return false;
2160   }
2161 
2162   const FieldPath* field_path() const {
2163     return IsFieldPath() ? &std::get<FieldPath>(impl_) : NULLPTR;
2164   }
2165   const std::string* name() const {
2166     return IsName() ? &std::get<std::string>(impl_) : NULLPTR;
2167   }
2168   const std::vector<FieldRef>* nested_refs() const {
2169     return std::holds_alternative<std::vector<FieldRef>>(impl_)
2170                ? &std::get<std::vector<FieldRef>>(impl_)
2171                : NULLPTR;
2172   }
2173 
2174   /// \brief Retrieve FieldPath of every child field which matches this FieldRef.
2175   std::vector<FieldPath> FindAll(const Schema& schema) const;
2176   std::vector<FieldPath> FindAll(const Field& field) const;
2177   std::vector<FieldPath> FindAll(const DataType& type) const;
2178   std::vector<FieldPath> FindAll(const FieldVector& fields) const;
2179 
2180   /// \brief Convenience function which applies FindAll to arg's type or schema.
2181   std::vector<FieldPath> FindAll(const ArrayData& array) const;
2182   std::vector<FieldPath> FindAll(const Array& array) const;
2183   std::vector<FieldPath> FindAll(const ChunkedArray& chunked_array) const;
2184   std::vector<FieldPath> FindAll(const RecordBatch& batch) const;
2185   std::vector<FieldPath> FindAll(const Table& table) const;
2186 
2187   /// \brief Convenience function: raise an error if matches is empty.
2188   template <typename T>
2189   Status CheckNonEmpty(const std::vector<FieldPath>& matches, const T& root) const {
2190     if (matches.empty()) {
2191       return Status::Invalid("No match for ", ToString(), " in ", root.ToString());
2192     }
2193     return Status::OK();
2194   }
2195 
2196   /// \brief Convenience function: raise an error if matches contains multiple FieldPaths.
2197   template <typename T>
2198   Status CheckNonMultiple(const std::vector<FieldPath>& matches, const T& root) const {
2199     if (matches.size() > 1) {
2200       return Status::Invalid("Multiple matches for ", ToString(), " in ",
2201                              root.ToString());
2202     }
2203     return Status::OK();
2204   }
2205 
2206   /// \brief Retrieve FieldPath of a single child field which matches this
2207   /// FieldRef. Emit an error if none or multiple match.
2208   template <typename T>
2209   Result<FieldPath> FindOne(const T& root) const {
2210     auto matches = FindAll(root);
2211     ARROW_RETURN_NOT_OK(CheckNonEmpty(matches, root));
2212     ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
2213     return std::move(matches[0]);
2214   }
2215 
2216   /// \brief Retrieve FieldPath of a single child field which matches this
2217   /// FieldRef. Emit an error if multiple match. An empty (invalid) FieldPath
2218   /// will be returned if none match.
2219   template <typename T>
2220   Result<FieldPath> FindOneOrNone(const T& root) const {
2221     auto matches = FindAll(root);
2222     ARROW_RETURN_NOT_OK(CheckNonMultiple(matches, root));
2223     if (matches.empty()) {
2224       return FieldPath();
2225     }
2226     return std::move(matches[0]);
2227   }
2228 
2229   template <typename T>
2230   using GetType = decltype(std::declval<FieldPath>().Get(std::declval<T>()).ValueOrDie());
2231 
2232   /// \brief Get all children matching this FieldRef.
2233   template <typename T>
2234   std::vector<GetType<T>> GetAll(const T& root) const {
2235     std::vector<GetType<T>> out;
2236     for (const auto& match : FindAll(root)) {
2237       out.push_back(match.Get(root).ValueOrDie());
2238     }
2239     return out;
2240   }
2241   /// \brief Get all children matching this FieldRef.
2242   ///
2243   /// Unlike `FieldRef::GetAll`, this variant is not zero-copy and the retrieved
2244   /// children's null bitmaps are ANDed with their ancestors'
2245   template <typename T>
2246   Result<std::vector<GetType<T>>> GetAllFlattened(const T& root,
2247                                                   MemoryPool* pool = NULLPTR) const {
2248     std::vector<GetType<T>> out;
2249     for (const auto& match : FindAll(root)) {
2250       ARROW_ASSIGN_OR_RAISE(auto child, match.GetFlattened(root, pool));
2251       out.push_back(std::move(child));
2252     }
2253     return out;
2254   }
2255 
2256   /// \brief Get the single child matching this FieldRef.
2257   /// Emit an error if none or multiple match.
2258   template <typename T>
2259   Result<GetType<T>> GetOne(const T& root) const {
2260     ARROW_ASSIGN_OR_RAISE(auto match, FindOne(root));
2261     return match.Get(root).ValueOrDie();
2262   }
2263   /// \brief Get the single child matching this FieldRef.
2264   ///
2265   /// Unlike `FieldRef::GetOne`, this variant is not zero-copy and the retrieved
2266   /// child's null bitmap is ANDed with its ancestors'
2267   template <typename T>
2268   Result<GetType<T>> GetOneFlattened(const T& root, MemoryPool* pool = NULLPTR) const {
2269     ARROW_ASSIGN_OR_RAISE(auto match, FindOne(root));
2270     return match.GetFlattened(root, pool);
2271   }
2272 
2273   /// \brief Get the single child matching this FieldRef.
2274   /// Return nullptr if none match, emit an error if multiple match.
2275   template <typename T>
2276   Result<GetType<T>> GetOneOrNone(const T& root) const {
2277     ARROW_ASSIGN_OR_RAISE(auto match, FindOneOrNone(root));
2278     if (match.empty()) {
2279       return static_cast<GetType<T>>(NULLPTR);
2280     }
2281     return match.Get(root).ValueOrDie();
2282   }
2283   /// \brief Get the single child matching this FieldRef.
2284   ///
2285   /// Return nullptr if none match, emit an error if multiple match.
2286   /// Unlike `FieldRef::GetOneOrNone`, this variant is not zero-copy and the
2287   /// retrieved child's null bitmap is ANDed with its ancestors'
2288   template <typename T>
2289   Result<GetType<T>> GetOneOrNoneFlattened(const T& root,
2290                                            MemoryPool* pool = NULLPTR) const {
2291     ARROW_ASSIGN_OR_RAISE(auto match, FindOneOrNone(root));
2292     if (match.empty()) {
2293       return static_cast<GetType<T>>(NULLPTR);
2294     }
2295     return match.GetFlattened(root, pool);
2296   }
2297 
2298  private:
2299   void Flatten(std::vector<FieldRef> children);
2300 
2301   std::variant<FieldPath, std::string, std::vector<FieldRef>> impl_;
2302 };
2303 
2304 ARROW_EXPORT void PrintTo(const FieldRef& ref, std::ostream* os);
2305 
2306 ARROW_EXPORT
2307 std::ostream& operator<<(std::ostream& os, const FieldRef&);
2308 
2309 // ----------------------------------------------------------------------
2310 // Schema
2311 
2312 enum class Endianness {
2313   Little = 0,
2314   Big = 1,
2315 #if ARROW_LITTLE_ENDIAN
2316   Native = Little
2317 #else
2318   Native = Big
2319 #endif
2320 };
2321 
2322 /// \class Schema
2323 /// \brief Sequence of arrow::Field objects describing the columns of a record
2324 /// batch or table data structure
2325 class ARROW_EXPORT Schema : public detail::Fingerprintable,
2326                             public util::EqualityComparable<Schema>,
2327                             public util::ToStringOstreamable<Schema> {
2328  public:
2329   explicit Schema(FieldVector fields, Endianness endianness,
2330                   std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
2331 
2332   explicit Schema(FieldVector fields,
2333                   std::shared_ptr<const KeyValueMetadata> metadata = NULLPTR);
2334 
2335   Schema(const Schema&);
2336 
2337   ~Schema() override;
2338 
2339   /// Returns true if all of the schema fields are equal
2340   bool Equals(const Schema& other, bool check_metadata = false) const;
2341   bool Equals(const std::shared_ptr<Schema>& other, bool check_metadata = false) const;
2342 
2343   /// \brief Set endianness in the schema
2344   ///
2345   /// \return new Schema
2346   std::shared_ptr<Schema> WithEndianness(Endianness endianness) const;
2347 
2348   /// \brief Return endianness in the schema
2349   Endianness endianness() const;
2350 
2351   /// \brief Indicate if endianness is equal to platform-native endianness
2352   bool is_native_endian() const;
2353 
2354   /// \brief Return the number of fields (columns) in the schema
2355   int num_fields() const;
2356 
2357   /// Return the ith schema element. Does not boundscheck
2358   const std::shared_ptr<Field>& field(int i) const;
2359 
2360   const FieldVector& fields() const;
2361 
2362   std::vector<std::string> field_names() const;
2363 
2364   /// Returns null if name not found
2365   std::shared_ptr<Field> GetFieldByName(const std::string& name) const;
2366 
2367   /// \brief Return the indices of all fields having this name in sorted order
2368   FieldVector GetAllFieldsByName(const std::string& name) const;
2369 
2370   /// Returns -1 if name not found
2371   int GetFieldIndex(const std::string& name) const;
2372 
2373   /// Return the indices of all fields having this name
2374   std::vector<int> GetAllFieldIndices(const std::string& name) const;
2375 
2376   /// Indicate if field named `name` can be found unambiguously in the schema.
2377   Status CanReferenceFieldByName(const std::string& name) const;
2378 
2379   /// Indicate if fields named `names` can be found unambiguously in the schema.
2380   Status CanReferenceFieldsByNames(const std::vector<std::string>& names) const;
2381 
2382   /// \brief The custom key-value metadata, if any
2383   ///
2384   /// \return metadata may be null
2385   const std::shared_ptr<const KeyValueMetadata>& metadata() const;
2386 
2387   /// \brief Render a string representation of the schema suitable for debugging
2388   /// \param[in] show_metadata when true, if KeyValueMetadata is non-empty,
2389   /// print keys and values in the output
2390   std::string ToString(bool show_metadata = false) const;
2391 
2392   Result<std::shared_ptr<Schema>> AddField(int i,
2393                                            const std::shared_ptr<Field>& field) const;
2394   Result<std::shared_ptr<Schema>> RemoveField(int i) const;
2395   Result<std::shared_ptr<Schema>> SetField(int i,
2396                                            const std::shared_ptr<Field>& field) const;
2397 
2398   /// \brief Replace field names with new names
2399   ///
2400   /// \param[in] names new names
2401   /// \return new Schema
2402   Result<std::shared_ptr<Schema>> WithNames(const std::vector<std::string>& names) const;
2403 
2404   /// \brief Replace key-value metadata with new metadata
2405   ///
2406   /// \param[in] metadata new KeyValueMetadata
2407   /// \return new Schema
2408   std::shared_ptr<Schema> WithMetadata(
2409       const std::shared_ptr<const KeyValueMetadata>& metadata) const;
2410 
2411   /// \brief Return copy of Schema without the KeyValueMetadata
2412   std::shared_ptr<Schema> RemoveMetadata() const;
2413 
2414   /// \brief Indicate that the Schema has non-empty KevValueMetadata
2415   bool HasMetadata() const;
2416 
2417   /// \brief Indicate that the Schema has distinct field names.
2418   bool HasDistinctFieldNames() const;
2419 
2420  protected:
2421   std::string ComputeFingerprint() const override;
2422   std::string ComputeMetadataFingerprint() const override;
2423 
2424  private:
2425   class Impl;
2426   std::unique_ptr<Impl> impl_;
2427 };
2428 
2429 ARROW_EXPORT void PrintTo(const Schema& s, std::ostream* os);
2430 
2431 ARROW_EXPORT
2432 std::string EndiannessToString(Endianness endianness);
2433 
2434 // ----------------------------------------------------------------------
2435 
2436 /// \brief Convenience class to incrementally construct/merge schemas.
2437 ///
2438 /// This class amortizes the cost of validating field name conflicts by
2439 /// maintaining the mapping. The caller also controls the conflict resolution
2440 /// scheme.
2441 class ARROW_EXPORT SchemaBuilder {
2442  public:
2443   // Indicate how field conflict(s) should be resolved when building a schema. A
2444   // conflict arise when a field is added to the builder and one or more field(s)
2445   // with the same name already exists.
2446   enum ConflictPolicy {
2447     // Ignore the conflict and append the field. This is the default behavior of the
2448     // Schema constructor and the `arrow::schema` factory function.
2449     CONFLICT_APPEND = 0,
2450     // Keep the existing field and ignore the newer one.
2451     CONFLICT_IGNORE,
2452     // Replace the existing field with the newer one.
2453     CONFLICT_REPLACE,
2454     // Merge the fields. The merging behavior can be controlled by `Field::MergeOptions`
2455     // specified at construction time. Also see documentation of `Field::MergeWith`.
2456     CONFLICT_MERGE,
2457     // Refuse the new field and error out.
2458     CONFLICT_ERROR
2459   };
2460 
2461   /// \brief Construct an empty SchemaBuilder
2462   /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
2463   SchemaBuilder(
2464       ConflictPolicy conflict_policy = CONFLICT_APPEND,
2465       Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
2466   /// \brief Construct a SchemaBuilder from a list of fields
2467   /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
2468   SchemaBuilder(
2469       FieldVector fields, ConflictPolicy conflict_policy = CONFLICT_APPEND,
2470       Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
2471   /// \brief Construct a SchemaBuilder from a schema, preserving the metadata
2472   /// `field_merge_options` is only effective when `conflict_policy` == `CONFLICT_MERGE`.
2473   SchemaBuilder(
2474       const std::shared_ptr<Schema>& schema,
2475       ConflictPolicy conflict_policy = CONFLICT_APPEND,
2476       Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
2477 
2478   /// \brief Return the conflict resolution method.
2479   ConflictPolicy policy() const;
2480 
2481   /// \brief Set the conflict resolution method.
2482   void SetPolicy(ConflictPolicy resolution);
2483 
2484   /// \brief Add a field to the constructed schema.
2485   ///
2486   /// \param[in] field to add to the constructed Schema.
2487   /// \return A failure if encountered.
2488   Status AddField(const std::shared_ptr<Field>& field);
2489 
2490   /// \brief Add multiple fields to the constructed schema.
2491   ///
2492   /// \param[in] fields to add to the constructed Schema.
2493   /// \return The first failure encountered, if any.
2494   Status AddFields(const FieldVector& fields);
2495 
2496   /// \brief Add fields of a Schema to the constructed Schema.
2497   ///
2498   /// \param[in] schema to take fields to add to the constructed Schema.
2499   /// \return The first failure encountered, if any.
2500   Status AddSchema(const std::shared_ptr<Schema>& schema);
2501 
2502   /// \brief Add fields of multiple Schemas to the constructed Schema.
2503   ///
2504   /// \param[in] schemas to take fields to add to the constructed Schema.
2505   /// \return The first failure encountered, if any.
2506   Status AddSchemas(const std::vector<std::shared_ptr<Schema>>& schemas);
2507 
2508   Status AddMetadata(const KeyValueMetadata& metadata);
2509 
2510   /// \brief Return the constructed Schema.
2511   ///
2512   /// The builder internal state is not affected by invoking this method, i.e.
2513   /// a single builder can yield multiple incrementally constructed schemas.
2514   ///
2515   /// \return the constructed schema.
2516   Result<std::shared_ptr<Schema>> Finish() const;
2517 
2518   /// \brief Merge schemas in a unified schema according to policy.
2519   static Result<std::shared_ptr<Schema>> Merge(
2520       const std::vector<std::shared_ptr<Schema>>& schemas,
2521       ConflictPolicy policy = CONFLICT_MERGE);
2522 
2523   /// \brief Indicate if schemas are compatible to merge according to policy.
2524   static Status AreCompatible(const std::vector<std::shared_ptr<Schema>>& schemas,
2525                               ConflictPolicy policy = CONFLICT_MERGE);
2526 
2527   /// \brief Reset internal state with an empty schema (and metadata).
2528   void Reset();
2529 
2530   ~SchemaBuilder();
2531 
2532  private:
2533   class Impl;
2534   std::unique_ptr<Impl> impl_;
2535 
2536   Status AppendField(const std::shared_ptr<Field>& field);
2537 };
2538 
2539 /// \brief Unifies schemas by merging fields by name.
2540 ///
2541 /// The behavior of field merging can be controlled via `Field::MergeOptions`.
2542 ///
2543 /// The resulting schema will contain the union of fields from all schemas.
2544 /// Fields with the same name will be merged. See `Field::MergeOptions`.
2545 /// - They are expected to be mergeable under provided `field_merge_options`.
2546 /// - The unified field will inherit the metadata from the schema where
2547 ///   that field is first defined.
2548 /// - The first N fields in the schema will be ordered the same as the
2549 ///   N fields in the first schema.
2550 /// The resulting schema will inherit its metadata from the first input schema.
2551 /// Returns an error if:
2552 /// - Any input schema contains fields with duplicate names.
2553 /// - Fields of the same name are not mergeable.
2554 ARROW_EXPORT
2555 Result<std::shared_ptr<Schema>> UnifySchemas(
2556     const std::vector<std::shared_ptr<Schema>>& schemas,
2557     Field::MergeOptions field_merge_options = Field::MergeOptions::Defaults());
2558 
2559 namespace internal {
2560 
2561 constexpr bool may_have_validity_bitmap(Type::type id) {
2562   switch (id) {
2563     case Type::NA:
2564     case Type::DENSE_UNION:
2565     case Type::SPARSE_UNION:
2566     case Type::RUN_END_ENCODED:
2567       return false;
2568     default:
2569       return true;
2570   }
2571 }
2572 
2573 ARROW_DEPRECATED("Deprecated in 17.0.0. Use may_have_validity_bitmap() instead.")
2574 constexpr bool HasValidityBitmap(Type::type id) { return may_have_validity_bitmap(id); }
2575 
2576 ARROW_EXPORT
2577 std::string ToString(Type::type id);
2578 
2579 ARROW_EXPORT
2580 std::string ToTypeName(Type::type id);
2581 
2582 ARROW_EXPORT
2583 std::string ToString(TimeUnit::type unit);
2584 
2585 }  // namespace internal
2586 
2587 // Helpers to get instances of data types based on general categories
2588 
2589 /// \brief Signed integer types
2590 ARROW_EXPORT
2591 const std::vector<std::shared_ptr<DataType>>& SignedIntTypes();
2592 /// \brief Unsigned integer types
2593 ARROW_EXPORT
2594 const std::vector<std::shared_ptr<DataType>>& UnsignedIntTypes();
2595 /// \brief Signed and unsigned integer types
2596 ARROW_EXPORT
2597 const std::vector<std::shared_ptr<DataType>>& IntTypes();
2598 /// \brief Floating point types
2599 ARROW_EXPORT
2600 const std::vector<std::shared_ptr<DataType>>& FloatingPointTypes();
2601 /// \brief Number types without boolean - integer and floating point types
2602 ARROW_EXPORT
2603 const std::vector<std::shared_ptr<DataType>>& NumericTypes();
2604 /// \brief Binary and string-like types (except fixed-size binary)
2605 ARROW_EXPORT
2606 const std::vector<std::shared_ptr<DataType>>& BaseBinaryTypes();
2607 /// \brief Binary and large-binary types
2608 ARROW_EXPORT
2609 const std::vector<std::shared_ptr<DataType>>& BinaryTypes();
2610 /// \brief String and large-string types
2611 ARROW_EXPORT
2612 const std::vector<std::shared_ptr<DataType>>& StringTypes();
2613 /// \brief String-view and Binary-view
2614 ARROW_EXPORT
2615 const std::vector<std::shared_ptr<DataType>>& BinaryViewTypes();
2616 /// \brief Temporal types including date, time and timestamps for each unit
2617 ARROW_EXPORT
2618 const std::vector<std::shared_ptr<DataType>>& TemporalTypes();
2619 /// \brief Interval types
2620 ARROW_EXPORT
2621 const std::vector<std::shared_ptr<DataType>>& IntervalTypes();
2622 /// \brief Duration types for each unit
2623 ARROW_EXPORT
2624 const std::vector<std::shared_ptr<DataType>>& DurationTypes();
2625 /// \brief Numeric, base binary, date, boolean and null types
2626 ARROW_EXPORT
2627 const std::vector<std::shared_ptr<DataType>>& PrimitiveTypes();
2628 
2629 }  // namespace arrow