![]() |
|
|||
File indexing completed on 2025-08-28 08:26:55
0001 // Licensed to the Apache Software Foundation (ASF) under one 0002 // or more contributor license agreements. See the NOTICE file 0003 // distributed with this work for additional information 0004 // regarding copyright ownership. The ASF licenses this file 0005 // to you under the Apache License, Version 2.0 (the 0006 // "License"); you may not use this file except in compliance 0007 // with the License. You may obtain a copy of the License at 0008 // 0009 // http://www.apache.org/licenses/LICENSE-2.0 0010 // 0011 // Unless required by applicable law or agreed to in writing, 0012 // software distributed under the License is distributed on an 0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 0014 // KIND, either express or implied. See the License for the 0015 // specific language governing permissions and limitations 0016 // under the License. 0017 0018 #pragma once 0019 0020 #include <cstdint> 0021 #include <optional> 0022 #include <string> 0023 #include <variant> 0024 0025 #include "arrow/type.h" 0026 #include "arrow/util/visibility.h" 0027 0028 namespace arrow { 0029 0030 /// \class ArrayStatistics 0031 /// \brief Statistics for an Array 0032 /// 0033 /// Apache Arrow format doesn't have statistics but data source such 0034 /// as Apache Parquet may have statistics. Statistics associated with 0035 /// data source can be read unified API via this class. 0036 struct ARROW_EXPORT ArrayStatistics { 0037 /// \brief The type for maximum and minimum values. If the target 0038 /// value exists, one of them is used. `std::nullopt` is used 0039 /// otherwise. 0040 using ValueType = std::variant<bool, int64_t, uint64_t, double, std::string>; 0041 0042 static const std::shared_ptr<DataType>& ValueToArrowType( 0043 const std::optional<ValueType>& value, 0044 const std::shared_ptr<DataType>& array_type) { 0045 if (!value.has_value()) { 0046 return null(); 0047 } 0048 0049 struct Visitor { 0050 const std::shared_ptr<DataType>& array_type; 0051 0052 const std::shared_ptr<DataType>& operator()(const bool&) { return boolean(); } 0053 const std::shared_ptr<DataType>& operator()(const int64_t&) { return int64(); } 0054 const std::shared_ptr<DataType>& operator()(const uint64_t&) { return uint64(); } 0055 const std::shared_ptr<DataType>& operator()(const double&) { return float64(); } 0056 const std::shared_ptr<DataType>& operator()(const std::string&) { 0057 switch (array_type->id()) { 0058 case Type::STRING: 0059 case Type::BINARY: 0060 case Type::FIXED_SIZE_BINARY: 0061 case Type::LARGE_STRING: 0062 case Type::LARGE_BINARY: 0063 return array_type; 0064 default: 0065 return utf8(); 0066 } 0067 } 0068 } visitor{array_type}; 0069 return std::visit(visitor, value.value()); 0070 } 0071 0072 /// \brief The number of null values, may not be set 0073 std::optional<int64_t> null_count = std::nullopt; 0074 0075 /// \brief The number of distinct values, may not be set 0076 std::optional<int64_t> distinct_count = std::nullopt; 0077 0078 /// \brief The minimum value, may not be set 0079 std::optional<ValueType> min = std::nullopt; 0080 0081 /// \brief Compute Arrow type of the minimum value. 0082 /// 0083 /// If \ref ValueType is `std::string`, `array_type` may be 0084 /// used. If `array_type` is a binary-like type such as \ref 0085 /// arrow::binary and \ref arrow::large_utf8, `array_type` is 0086 /// returned. \ref arrow::utf8 is returned otherwise. 0087 /// 0088 /// If \ref ValueType isn't `std::string`, `array_type` isn't used. 0089 /// 0090 /// \param array_type The Arrow type of the associated array. 0091 /// 0092 /// \return \ref arrow::null if the minimum value is `std::nullopt`, 0093 /// Arrow type based on \ref ValueType of the \ref min 0094 /// otherwise. 0095 const std::shared_ptr<DataType>& MinArrowType( 0096 const std::shared_ptr<DataType>& array_type) { 0097 return ValueToArrowType(min, array_type); 0098 } 0099 0100 /// \brief Whether the minimum value is exact or not 0101 bool is_min_exact = false; 0102 0103 /// \brief The maximum value, may not be set 0104 std::optional<ValueType> max = std::nullopt; 0105 0106 /// \brief Compute Arrow type of the maximum value. 0107 /// 0108 /// If \ref ValueType is `std::string`, `array_type` may be 0109 /// used. If `array_type` is a binary-like type such as \ref 0110 /// arrow::binary and \ref arrow::large_utf8, `array_type` is 0111 /// returned. \ref arrow::utf8 is returned otherwise. 0112 /// 0113 /// If \ref ValueType isn't `std::string`, `array_type` isn't used. 0114 /// 0115 /// \param array_type The Arrow type of the associated array. 0116 /// 0117 /// \return \ref arrow::null if the maximum value is `std::nullopt`, 0118 /// Arrow type based on \ref ValueType of the \ref max 0119 /// otherwise. 0120 const std::shared_ptr<DataType>& MaxArrowType( 0121 const std::shared_ptr<DataType>& array_type) { 0122 return ValueToArrowType(max, array_type); 0123 } 0124 0125 /// \brief Whether the maximum value is exact or not 0126 bool is_max_exact = false; 0127 0128 /// \brief Check two statistics for equality 0129 bool Equals(const ArrayStatistics& other) const { 0130 return null_count == other.null_count && distinct_count == other.distinct_count && 0131 min == other.min && is_min_exact == other.is_min_exact && max == other.max && 0132 is_max_exact == other.is_max_exact; 0133 } 0134 0135 /// \brief Check two statistics for equality 0136 bool operator==(const ArrayStatistics& other) const { return Equals(other); } 0137 0138 /// \brief Check two statistics for not equality 0139 bool operator!=(const ArrayStatistics& other) const { return !Equals(other); } 0140 }; 0141 0142 } // namespace arrow
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |