File indexing completed on 2025-08-27 08:47:21
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018 #pragma once
0019
0020 #include <algorithm>
0021 #include <cstddef>
0022 #include <memory>
0023 #include <string>
0024 #include <tuple>
0025 #include <type_traits>
0026 #include <utility>
0027 #include <vector>
0028
0029 #include "arrow/array.h"
0030 #include "arrow/array/builder_base.h"
0031 #include "arrow/array/builder_binary.h"
0032 #include "arrow/array/builder_nested.h"
0033 #include "arrow/array/builder_primitive.h"
0034 #include "arrow/chunked_array.h"
0035 #include "arrow/compute/api.h"
0036 #include "arrow/status.h"
0037 #include "arrow/table.h"
0038 #include "arrow/type_fwd.h"
0039 #include "arrow/type_traits.h"
0040 #include "arrow/util/checked_cast.h"
0041 #include "arrow/util/macros.h"
0042
0043 namespace arrow {
0044
0045 class Schema;
0046
0047 namespace stl {
0048
0049 namespace internal {
0050
0051 template <typename T, typename = void>
0052 struct is_optional_like : public std::false_type {};
0053
0054 template <typename T, typename = void>
0055 struct is_dereferencable : public std::false_type {};
0056
0057 template <typename T>
0058 struct is_dereferencable<T, arrow::internal::void_t<decltype(*std::declval<T>())>>
0059 : public std::true_type {};
0060
0061 template <typename T>
0062 struct is_optional_like<
0063 T, typename std::enable_if<
0064 std::is_constructible<bool, T>::value && is_dereferencable<T>::value &&
0065 !std::is_array<typename std::remove_reference<T>::type>::value>::type>
0066 : public std::true_type {};
0067
0068 template <size_t N, typename Tuple>
0069 using BareTupleElement =
0070 typename std::decay<typename std::tuple_element<N, Tuple>::type>::type;
0071
0072 }
0073
0074 template <typename T, typename R = void>
0075 using enable_if_optional_like =
0076 typename std::enable_if<internal::is_optional_like<T>::value, R>::type;
0077
0078
0079 template <typename T, typename Enable = void>
0080 struct ConversionTraits {};
0081
0082
0083 template <typename CType>
0084 using CBuilderType =
0085 typename TypeTraits<typename ConversionTraits<CType>::ArrowType>::BuilderType;
0086
0087
0088
0089
0090
0091
0092 template <typename ValueCType, typename Range>
0093 inline Status AppendListValues(CBuilderType<ValueCType>& value_builder,
0094 Range&& cell_range) {
0095 for (auto const& value : cell_range) {
0096 ARROW_RETURN_NOT_OK(ConversionTraits<ValueCType>::AppendRow(value_builder, value));
0097 }
0098 return Status::OK();
0099 }
0100
0101 #define ARROW_STL_CONVERSION(CType_, ArrowType_) \
0102 template <> \
0103 struct ConversionTraits<CType_> : public CTypeTraits<CType_> { \
0104 static Status AppendRow(typename TypeTraits<ArrowType_>::BuilderType& builder, \
0105 CType_ cell) { \
0106 return builder.Append(cell); \
0107 } \
0108 static CType_ GetEntry(const typename TypeTraits<ArrowType_>::ArrayType& array, \
0109 size_t j) { \
0110 return array.Value(j); \
0111 } \
0112 }; \
0113 \
0114 template <> \
0115 inline Status AppendListValues<CType_, const std::vector<CType_>&>( \
0116 typename TypeTraits<ArrowType_>::BuilderType & value_builder, \
0117 const std::vector<CType_>& cell_range) { \
0118 return value_builder.AppendValues(cell_range); \
0119 }
0120
0121 ARROW_STL_CONVERSION(bool, BooleanType)
0122 ARROW_STL_CONVERSION(int8_t, Int8Type)
0123 ARROW_STL_CONVERSION(int16_t, Int16Type)
0124 ARROW_STL_CONVERSION(int32_t, Int32Type)
0125 ARROW_STL_CONVERSION(int64_t, Int64Type)
0126 ARROW_STL_CONVERSION(uint8_t, UInt8Type)
0127 ARROW_STL_CONVERSION(uint16_t, UInt16Type)
0128 ARROW_STL_CONVERSION(uint32_t, UInt32Type)
0129 ARROW_STL_CONVERSION(uint64_t, UInt64Type)
0130 ARROW_STL_CONVERSION(float, FloatType)
0131 ARROW_STL_CONVERSION(double, DoubleType)
0132
0133 template <>
0134 struct ConversionTraits<std::string> : public CTypeTraits<std::string> {
0135 static Status AppendRow(StringBuilder& builder, const std::string& cell) {
0136 return builder.Append(cell);
0137 }
0138 static std::string GetEntry(const StringArray& array, size_t j) {
0139 return array.GetString(j);
0140 }
0141 };
0142
0143
0144
0145
0146
0147
0148 template <typename ValueCType, typename ListBuilderType, typename Range>
0149 Status AppendCellRange(ListBuilderType& builder, Range&& cell_range) {
0150 constexpr bool is_list_builder = std::is_same<ListBuilderType, ListBuilder>::value;
0151 constexpr bool is_large_list_builder =
0152 std::is_same<ListBuilderType, LargeListBuilder>::value;
0153 static_assert(
0154 is_list_builder || is_large_list_builder,
0155 "Builder type must be either ListBuilder or LargeListBuilder for appending "
0156 "multiple rows.");
0157
0158 using ChildBuilderType = CBuilderType<ValueCType>;
0159 ARROW_RETURN_NOT_OK(builder.Append());
0160 auto& value_builder =
0161 ::arrow::internal::checked_cast<ChildBuilderType&>(*builder.value_builder());
0162
0163
0164 return AppendListValues<ValueCType>(value_builder, std::forward<Range>(cell_range));
0165 }
0166
0167 template <typename ValueCType>
0168 struct ConversionTraits<std::vector<ValueCType>>
0169 : public CTypeTraits<std::vector<ValueCType>> {
0170 static Status AppendRow(ListBuilder& builder, const std::vector<ValueCType>& cell) {
0171 return AppendCellRange<ValueCType>(builder, cell);
0172 }
0173
0174 static std::vector<ValueCType> GetEntry(const ListArray& array, size_t j) {
0175 using ElementArrayType =
0176 typename TypeTraits<typename ConversionTraits<ValueCType>::ArrowType>::ArrayType;
0177
0178 const ElementArrayType& value_array =
0179 ::arrow::internal::checked_cast<const ElementArrayType&>(*array.values());
0180
0181 std::vector<ValueCType> vec(array.value_length(j));
0182 for (int64_t i = 0; i < array.value_length(j); i++) {
0183 vec[i] =
0184 ConversionTraits<ValueCType>::GetEntry(value_array, array.value_offset(j) + i);
0185 }
0186 return vec;
0187 }
0188 };
0189
0190 template <class ValueCType, std::size_t N>
0191 struct ConversionTraits<std::array<ValueCType, N>>
0192 : public CTypeTraits<std::array<ValueCType, N>> {
0193 static arrow::Status AppendRow(FixedSizeListBuilder& builder,
0194 const std::array<ValueCType, N>& values) {
0195 auto vb =
0196 ::arrow::internal::checked_cast<typename CTypeTraits<ValueCType>::BuilderType*>(
0197 builder.value_builder());
0198 ARROW_RETURN_NOT_OK(builder.Append());
0199 return vb->AppendValues(values.data(), N);
0200 }
0201
0202 static std::array<ValueCType, N> GetEntry(const ::arrow::FixedSizeListArray& array,
0203 size_t j) {
0204 using ElementArrayType = typename TypeTraits<
0205 typename stl::ConversionTraits<ValueCType>::ArrowType>::ArrayType;
0206
0207 const ElementArrayType& value_array =
0208 ::arrow::internal::checked_cast<const ElementArrayType&>(*array.values());
0209
0210 std::array<ValueCType, N> arr;
0211 for (size_t i = 0; i < N; i++) {
0212 arr[i] = stl::ConversionTraits<ValueCType>::GetEntry(value_array,
0213 array.value_offset(j) + i);
0214 }
0215 return arr;
0216 }
0217 };
0218
0219 template <typename Optional>
0220 struct ConversionTraits<Optional, enable_if_optional_like<Optional>>
0221 : public CTypeTraits<typename std::decay<decltype(*std::declval<Optional>())>::type> {
0222 using OptionalInnerType =
0223 typename std::decay<decltype(*std::declval<Optional>())>::type;
0224 using typename CTypeTraits<OptionalInnerType>::ArrowType;
0225 using CTypeTraits<OptionalInnerType>::type_singleton;
0226
0227 static Status AppendRow(typename TypeTraits<ArrowType>::BuilderType& builder,
0228 const Optional& cell) {
0229 if (cell) {
0230 return ConversionTraits<OptionalInnerType>::AppendRow(builder, *cell);
0231 } else {
0232 return builder.AppendNull();
0233 }
0234 }
0235 };
0236
0237
0238
0239
0240
0241 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0242 struct SchemaFromTuple {
0243 using Element = internal::BareTupleElement<N - 1, Tuple>;
0244
0245
0246
0247
0248
0249
0250 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
0251 const std::vector<std::string>& names) {
0252 std::vector<std::shared_ptr<Field>> ret =
0253 SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursion(names);
0254 auto type = ConversionTraits<Element>::type_singleton();
0255 ret.push_back(field(names[N - 1], type, internal::is_optional_like<Element>::value));
0256 return ret;
0257 }
0258
0259
0260
0261
0262
0263
0264
0265
0266
0267
0268
0269 static std::shared_ptr<Schema> MakeSchema(const std::vector<std::string>& names) {
0270 return std::make_shared<Schema>(MakeSchemaRecursion(names));
0271 }
0272
0273
0274
0275
0276
0277
0278 template <typename NamesTuple>
0279 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
0280 const NamesTuple& names) {
0281 using std::get;
0282
0283 std::vector<std::shared_ptr<Field>> ret =
0284 SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursionT(names);
0285 std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
0286 ret.push_back(
0287 field(get<N - 1>(names), type, internal::is_optional_like<Element>::value));
0288 return ret;
0289 }
0290
0291
0292
0293
0294
0295
0296
0297
0298
0299
0300
0301 template <typename NamesTuple>
0302 static std::shared_ptr<Schema> MakeSchema(const NamesTuple& names) {
0303 return std::make_shared<Schema>(MakeSchemaRecursionT<NamesTuple>(names));
0304 }
0305 };
0306
0307 template <typename Tuple>
0308 struct SchemaFromTuple<Tuple, 0> {
0309 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
0310 const std::vector<std::string>& names) {
0311 std::vector<std::shared_ptr<Field>> ret;
0312 ret.reserve(names.size());
0313 return ret;
0314 }
0315
0316 template <typename NamesTuple>
0317 static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
0318 const NamesTuple& names) {
0319 std::vector<std::shared_ptr<Field>> ret;
0320 ret.reserve(std::tuple_size<NamesTuple>::value);
0321 return ret;
0322 }
0323 };
0324
0325 namespace internal {
0326
0327 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0328 struct CreateBuildersRecursive {
0329 static Status Make(MemoryPool* pool,
0330 std::vector<std::unique_ptr<ArrayBuilder>>* builders) {
0331 using Element = BareTupleElement<N - 1, Tuple>;
0332 std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
0333 ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &builders->at(N - 1)));
0334
0335 return CreateBuildersRecursive<Tuple, N - 1>::Make(pool, builders);
0336 }
0337 };
0338
0339 template <typename Tuple>
0340 struct CreateBuildersRecursive<Tuple, 0> {
0341 static Status Make(MemoryPool*, std::vector<std::unique_ptr<ArrayBuilder>>*) {
0342 return Status::OK();
0343 }
0344 };
0345
0346 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0347 struct RowIterator {
0348 static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
0349 const Tuple& row) {
0350 using std::get;
0351 using Element = BareTupleElement<N - 1, Tuple>;
0352 using BuilderType =
0353 typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::BuilderType;
0354
0355 BuilderType& builder =
0356 ::arrow::internal::checked_cast<BuilderType&>(*builders[N - 1]);
0357 ARROW_RETURN_NOT_OK(ConversionTraits<Element>::AppendRow(builder, get<N - 1>(row)));
0358
0359 return RowIterator<Tuple, N - 1>::Append(builders, row);
0360 }
0361 };
0362
0363 template <typename Tuple>
0364 struct RowIterator<Tuple, 0> {
0365 static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
0366 const Tuple& row) {
0367 return Status::OK();
0368 }
0369 };
0370
0371 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0372 struct EnsureColumnTypes {
0373 static Status Cast(const Table& table, std::shared_ptr<Table>* table_owner,
0374 const compute::CastOptions& cast_options, compute::ExecContext* ctx,
0375 std::reference_wrapper<const ::arrow::Table>* result) {
0376 using Element = BareTupleElement<N - 1, Tuple>;
0377 std::shared_ptr<DataType> expected_type = ConversionTraits<Element>::type_singleton();
0378
0379 if (!table.schema()->field(N - 1)->type()->Equals(*expected_type)) {
0380 ARROW_ASSIGN_OR_RAISE(
0381 Datum casted,
0382 compute::Cast(table.column(N - 1), expected_type, cast_options, ctx));
0383 auto new_field = table.schema()->field(N - 1)->WithType(expected_type);
0384 ARROW_ASSIGN_OR_RAISE(*table_owner,
0385 table.SetColumn(N - 1, new_field, casted.chunked_array()));
0386 *result = **table_owner;
0387 }
0388
0389 return EnsureColumnTypes<Tuple, N - 1>::Cast(result->get(), table_owner, cast_options,
0390 ctx, result);
0391 }
0392 };
0393
0394 template <typename Tuple>
0395 struct EnsureColumnTypes<Tuple, 0> {
0396 static Status Cast(const Table& table, std::shared_ptr<Table>* table_owner,
0397 const compute::CastOptions& cast_options, compute::ExecContext* ctx,
0398 std::reference_wrapper<const ::arrow::Table>* result) {
0399 return Status::OK();
0400 }
0401 };
0402
0403 template <typename Range, typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0404 struct TupleSetter {
0405 static void Fill(const Table& table, Range* rows) {
0406 using std::get;
0407 using Element = typename std::tuple_element<N - 1, Tuple>::type;
0408 using ArrayType =
0409 typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::ArrayType;
0410
0411 auto iter = rows->begin();
0412 const ChunkedArray& chunked_array = *table.column(N - 1);
0413 for (int i = 0; i < chunked_array.num_chunks(); i++) {
0414 const ArrayType& array =
0415 ::arrow::internal::checked_cast<const ArrayType&>(*chunked_array.chunk(i));
0416 for (int64_t j = 0; j < array.length(); j++) {
0417 get<N - 1>(*iter++) = ConversionTraits<Element>::GetEntry(array, j);
0418 }
0419 }
0420
0421 return TupleSetter<Range, Tuple, N - 1>::Fill(table, rows);
0422 }
0423 };
0424
0425 template <typename Range, typename Tuple>
0426 struct TupleSetter<Range, Tuple, 0> {
0427 static void Fill(const Table& table, Range* rows) {}
0428 };
0429
0430 }
0431
0432 template <typename Range>
0433 Status TableFromTupleRange(MemoryPool* pool, Range&& rows,
0434 const std::vector<std::string>& names,
0435 std::shared_ptr<Table>* table) {
0436 using row_type = typename std::iterator_traits<decltype(std::begin(rows))>::value_type;
0437 constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
0438
0439 std::shared_ptr<Schema> schema = SchemaFromTuple<row_type>::MakeSchema(names);
0440
0441 std::vector<std::unique_ptr<ArrayBuilder>> builders(n_columns);
0442 ARROW_RETURN_NOT_OK(internal::CreateBuildersRecursive<row_type>::Make(pool, &builders));
0443
0444 for (auto const& row : rows) {
0445 ARROW_RETURN_NOT_OK(internal::RowIterator<row_type>::Append(builders, row));
0446 }
0447
0448 std::vector<std::shared_ptr<Array>> arrays;
0449 for (auto const& builder : builders) {
0450 std::shared_ptr<Array> array;
0451 ARROW_RETURN_NOT_OK(builder->Finish(&array));
0452 arrays.emplace_back(array);
0453 }
0454
0455 *table = Table::Make(std::move(schema), std::move(arrays));
0456
0457 return Status::OK();
0458 }
0459
0460 template <typename Range>
0461 Status TupleRangeFromTable(const Table& table, const compute::CastOptions& cast_options,
0462 compute::ExecContext* ctx, Range* rows) {
0463 using row_type = typename std::decay<decltype(*std::begin(*rows))>::type;
0464 constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
0465
0466 if (table.schema()->num_fields() != n_columns) {
0467 return Status::Invalid(
0468 "Number of columns in the table does not match the width of the target: ",
0469 table.schema()->num_fields(), " != ", n_columns);
0470 }
0471
0472 if (std::size(*rows) != static_cast<size_t>(table.num_rows())) {
0473 return Status::Invalid(
0474 "Number of rows in the table does not match the size of the target: ",
0475 table.num_rows(), " != ", std::size(*rows));
0476 }
0477
0478
0479 std::shared_ptr<Table> table_owner;
0480 std::reference_wrapper<const ::arrow::Table> current_table(table);
0481
0482 ARROW_RETURN_NOT_OK(internal::EnsureColumnTypes<row_type>::Cast(
0483 table, &table_owner, cast_options, ctx, ¤t_table));
0484
0485 internal::TupleSetter<Range, row_type>::Fill(current_table.get(), rows);
0486
0487 return Status::OK();
0488 }
0489
0490 }
0491 }