Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:26:55

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cstdint>
0021 #include <optional>
0022 #include <string>
0023 #include <variant>
0024 
0025 #include "arrow/type.h"
0026 #include "arrow/util/visibility.h"
0027 
0028 namespace arrow {
0029 
0030 /// \class ArrayStatistics
0031 /// \brief Statistics for an Array
0032 ///
0033 /// Apache Arrow format doesn't have statistics but data source such
0034 /// as Apache Parquet may have statistics. Statistics associated with
0035 /// data source can be read unified API via this class.
0036 struct ARROW_EXPORT ArrayStatistics {
0037   /// \brief The type for maximum and minimum values. If the target
0038   /// value exists, one of them is used. `std::nullopt` is used
0039   /// otherwise.
0040   using ValueType = std::variant<bool, int64_t, uint64_t, double, std::string>;
0041 
0042   static const std::shared_ptr<DataType>& ValueToArrowType(
0043       const std::optional<ValueType>& value,
0044       const std::shared_ptr<DataType>& array_type) {
0045     if (!value.has_value()) {
0046       return null();
0047     }
0048 
0049     struct Visitor {
0050       const std::shared_ptr<DataType>& array_type;
0051 
0052       const std::shared_ptr<DataType>& operator()(const bool&) { return boolean(); }
0053       const std::shared_ptr<DataType>& operator()(const int64_t&) { return int64(); }
0054       const std::shared_ptr<DataType>& operator()(const uint64_t&) { return uint64(); }
0055       const std::shared_ptr<DataType>& operator()(const double&) { return float64(); }
0056       const std::shared_ptr<DataType>& operator()(const std::string&) {
0057         switch (array_type->id()) {
0058           case Type::STRING:
0059           case Type::BINARY:
0060           case Type::FIXED_SIZE_BINARY:
0061           case Type::LARGE_STRING:
0062           case Type::LARGE_BINARY:
0063             return array_type;
0064           default:
0065             return utf8();
0066         }
0067       }
0068     } visitor{array_type};
0069     return std::visit(visitor, value.value());
0070   }
0071 
0072   /// \brief The number of null values, may not be set
0073   std::optional<int64_t> null_count = std::nullopt;
0074 
0075   /// \brief The number of distinct values, may not be set
0076   std::optional<int64_t> distinct_count = std::nullopt;
0077 
0078   /// \brief The minimum value, may not be set
0079   std::optional<ValueType> min = std::nullopt;
0080 
0081   /// \brief Compute Arrow type of the minimum value.
0082   ///
0083   /// If \ref ValueType is `std::string`, `array_type` may be
0084   /// used. If `array_type` is a binary-like type such as \ref
0085   /// arrow::binary and \ref arrow::large_utf8, `array_type` is
0086   /// returned. \ref arrow::utf8 is returned otherwise.
0087   ///
0088   /// If \ref ValueType isn't `std::string`, `array_type` isn't used.
0089   ///
0090   /// \param array_type The Arrow type of the associated array.
0091   ///
0092   /// \return \ref arrow::null if the minimum value is `std::nullopt`,
0093   ///         Arrow type based on \ref ValueType of the \ref min
0094   ///         otherwise.
0095   const std::shared_ptr<DataType>& MinArrowType(
0096       const std::shared_ptr<DataType>& array_type) {
0097     return ValueToArrowType(min, array_type);
0098   }
0099 
0100   /// \brief Whether the minimum value is exact or not
0101   bool is_min_exact = false;
0102 
0103   /// \brief The maximum value, may not be set
0104   std::optional<ValueType> max = std::nullopt;
0105 
0106   /// \brief Compute Arrow type of the maximum value.
0107   ///
0108   /// If \ref ValueType is `std::string`, `array_type` may be
0109   /// used. If `array_type` is a binary-like type such as \ref
0110   /// arrow::binary and \ref arrow::large_utf8, `array_type` is
0111   /// returned. \ref arrow::utf8 is returned otherwise.
0112   ///
0113   /// If \ref ValueType isn't `std::string`, `array_type` isn't used.
0114   ///
0115   /// \param array_type The Arrow type of the associated array.
0116   ///
0117   /// \return \ref arrow::null if the maximum value is `std::nullopt`,
0118   ///         Arrow type based on \ref ValueType of the \ref max
0119   ///         otherwise.
0120   const std::shared_ptr<DataType>& MaxArrowType(
0121       const std::shared_ptr<DataType>& array_type) {
0122     return ValueToArrowType(max, array_type);
0123   }
0124 
0125   /// \brief Whether the maximum value is exact or not
0126   bool is_max_exact = false;
0127 
0128   /// \brief Check two statistics for equality
0129   bool Equals(const ArrayStatistics& other) const {
0130     return null_count == other.null_count && distinct_count == other.distinct_count &&
0131            min == other.min && is_min_exact == other.is_min_exact && max == other.max &&
0132            is_max_exact == other.is_max_exact;
0133   }
0134 
0135   /// \brief Check two statistics for equality
0136   bool operator==(const ArrayStatistics& other) const { return Equals(other); }
0137 
0138   /// \brief Check two statistics for not equality
0139   bool operator!=(const ArrayStatistics& other) const { return !Equals(other); }
0140 };
0141 
0142 }  // namespace arrow