File indexing completed on 2025-08-27 08:47:18
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <cstdint>
0021 #include <memory>
0022 #include <string>
0023 #include <type_traits>
0024 #include <utility>
0025 #include <variant>
0026 #include <vector>
0027
0028 #include "arrow/array/data.h"
0029 #include "arrow/device_allocation_type_set.h"
0030 #include "arrow/scalar.h"
0031 #include "arrow/type.h"
0032 #include "arrow/type_traits.h"
0033 #include "arrow/util/checked_cast.h"
0034 #include "arrow/util/macros.h"
0035 #include "arrow/util/visibility.h"
0036
0037 namespace arrow {
0038
0039 class Array;
0040 class ChunkedArray;
0041 class RecordBatch;
0042 class Table;
0043
0044
0045
0046 struct ARROW_EXPORT Datum {
0047
0048 enum Kind { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE };
0049
0050
0051 struct Empty {};
0052
0053
0054
0055 static constexpr int64_t kUnknownLength = -1;
0056
0057
0058
0059
0060 std::variant<Empty, std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>,
0061 std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
0062 std::shared_ptr<Table>>
0063 value;
0064
0065
0066 Datum() = default;
0067
0068 Datum(const Datum& other) = default;
0069 Datum& operator=(const Datum& other) = default;
0070 Datum(Datum&& other) = default;
0071 Datum& operator=(Datum&& other) = default;
0072
0073
0074 Datum(std::shared_ptr<Scalar> value)
0075 : value(std::move(value)) {}
0076
0077
0078 Datum(std::shared_ptr<ArrayData> value)
0079 : value(std::move(value)) {}
0080
0081
0082 Datum(ArrayData arg)
0083 : value(std::make_shared<ArrayData>(std::move(arg))) {}
0084
0085
0086 Datum(const Array& value);
0087
0088
0089 Datum(const std::shared_ptr<Array>& value);
0090
0091
0092 Datum(std::shared_ptr<ChunkedArray> value);
0093
0094
0095 Datum(std::shared_ptr<RecordBatch> value);
0096
0097
0098 Datum(std::shared_ptr<Table> value);
0099
0100
0101
0102
0103 explicit Datum(const ChunkedArray& value);
0104
0105
0106
0107
0108 explicit Datum(const RecordBatch& value);
0109
0110
0111
0112
0113 explicit Datum(const Table& value);
0114
0115
0116 template <typename T, bool IsArray = std::is_base_of_v<Array, T>,
0117 bool IsScalar = std::is_base_of_v<Scalar, T>,
0118 typename = enable_if_t<IsArray || IsScalar>>
0119 Datum(std::shared_ptr<T> value)
0120 : Datum(std::shared_ptr<typename std::conditional<IsArray, Array, Scalar>::type>(
0121 std::move(value))) {}
0122
0123
0124 template <typename T, typename TV = typename std::remove_reference_t<T>,
0125 bool IsArray = std::is_base_of_v<Array, T>,
0126 bool IsScalar = std::is_base_of_v<Scalar, T>,
0127 typename = enable_if_t<IsArray || IsScalar>>
0128 Datum(T&& value)
0129 : Datum(std::make_shared<TV>(std::forward<T>(value))) {}
0130
0131
0132
0133
0134 template <typename T, typename = enable_if_t<std::is_base_of_v<Scalar, T>>>
0135 Datum(const T& value)
0136 : Datum(std::make_shared<T>(value)) {}
0137
0138
0139
0140 explicit Datum(bool value);
0141
0142 explicit Datum(int8_t value);
0143
0144 explicit Datum(uint8_t value);
0145
0146 explicit Datum(int16_t value);
0147
0148 explicit Datum(uint16_t value);
0149
0150 explicit Datum(int32_t value);
0151
0152 explicit Datum(uint32_t value);
0153
0154 explicit Datum(int64_t value);
0155
0156 explicit Datum(uint64_t value);
0157
0158 explicit Datum(float value);
0159
0160 explicit Datum(double value);
0161
0162 explicit Datum(std::string value);
0163
0164 explicit Datum(const char* value);
0165
0166
0167 template <template <typename, typename> class StdDuration, typename Rep,
0168 typename Period,
0169 typename = decltype(DurationScalar{StdDuration<Rep, Period>{}})>
0170 explicit Datum(StdDuration<Rep, Period> d) : Datum{DurationScalar(d)} {}
0171
0172
0173 Datum::Kind kind() const {
0174 switch (this->value.index()) {
0175 case 0:
0176 return Datum::NONE;
0177 case 1:
0178 return Datum::SCALAR;
0179 case 2:
0180 return Datum::ARRAY;
0181 case 3:
0182 return Datum::CHUNKED_ARRAY;
0183 case 4:
0184 return Datum::RECORD_BATCH;
0185 case 5:
0186 return Datum::TABLE;
0187 default:
0188 return Datum::NONE;
0189 }
0190 }
0191
0192
0193
0194
0195
0196 const std::shared_ptr<ArrayData>& array() const {
0197 return std::get<std::shared_ptr<ArrayData>>(this->value);
0198 }
0199
0200
0201
0202
0203 int64_t TotalBufferSize() const;
0204
0205
0206
0207
0208
0209 ArrayData* mutable_array() const { return this->array().get(); }
0210
0211
0212
0213 std::shared_ptr<Array> make_array() const;
0214
0215
0216
0217 const std::shared_ptr<ChunkedArray>& chunked_array() const {
0218 return std::get<std::shared_ptr<ChunkedArray>>(this->value);
0219 }
0220
0221
0222
0223 const std::shared_ptr<RecordBatch>& record_batch() const {
0224 return std::get<std::shared_ptr<RecordBatch>>(this->value);
0225 }
0226
0227
0228
0229 const std::shared_ptr<Table>& table() const {
0230 return std::get<std::shared_ptr<Table>>(this->value);
0231 }
0232
0233
0234
0235 const std::shared_ptr<Scalar>& scalar() const {
0236 return std::get<std::shared_ptr<Scalar>>(this->value);
0237 }
0238
0239
0240
0241
0242
0243 template <typename ExactType>
0244 std::shared_ptr<ExactType> array_as() const {
0245 return internal::checked_pointer_cast<ExactType>(this->make_array());
0246 }
0247
0248
0249
0250
0251
0252 template <typename ExactType>
0253 const ExactType& scalar_as() const {
0254 return internal::checked_cast<const ExactType&>(*this->scalar());
0255 }
0256
0257
0258 bool is_array() const { return this->kind() == Datum::ARRAY; }
0259
0260
0261 bool is_chunked_array() const { return this->kind() == Datum::CHUNKED_ARRAY; }
0262
0263
0264 bool is_arraylike() const {
0265 return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY;
0266 }
0267
0268
0269 bool is_scalar() const { return this->kind() == Datum::SCALAR; }
0270
0271
0272 bool is_value() const { return this->is_arraylike() || this->is_scalar(); }
0273
0274
0275
0276
0277 int64_t null_count() const;
0278
0279
0280
0281
0282 const std::shared_ptr<DataType>& type() const;
0283
0284
0285
0286
0287 const std::shared_ptr<Schema>& schema() const;
0288
0289
0290
0291
0292 int64_t length() const;
0293
0294
0295
0296
0297 ArrayVector chunks() const;
0298
0299 DeviceAllocationTypeSet device_types() const;
0300
0301
0302 bool Equals(const Datum& other) const;
0303
0304 bool operator==(const Datum& other) const { return Equals(other); }
0305 bool operator!=(const Datum& other) const { return !Equals(other); }
0306
0307 std::string ToString() const;
0308 };
0309
0310 ARROW_EXPORT void PrintTo(const Datum&, std::ostream*);
0311
0312 ARROW_EXPORT std::string ToString(Datum::Kind kind);
0313
0314 }