Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-27 08:47:21

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <algorithm>
0021 #include <cstddef>
0022 #include <memory>
0023 #include <string>
0024 #include <tuple>
0025 #include <type_traits>
0026 #include <utility>
0027 #include <vector>
0028 
0029 #include "arrow/array.h"
0030 #include "arrow/array/builder_base.h"
0031 #include "arrow/array/builder_binary.h"
0032 #include "arrow/array/builder_nested.h"
0033 #include "arrow/array/builder_primitive.h"
0034 #include "arrow/chunked_array.h"
0035 #include "arrow/compute/api.h"
0036 #include "arrow/status.h"
0037 #include "arrow/table.h"
0038 #include "arrow/type_fwd.h"
0039 #include "arrow/type_traits.h"
0040 #include "arrow/util/checked_cast.h"
0041 #include "arrow/util/macros.h"
0042 
0043 namespace arrow {
0044 
0045 class Schema;
0046 
0047 namespace stl {
0048 
0049 namespace internal {
0050 
0051 template <typename T, typename = void>
0052 struct is_optional_like : public std::false_type {};
0053 
0054 template <typename T, typename = void>
0055 struct is_dereferencable : public std::false_type {};
0056 
0057 template <typename T>
0058 struct is_dereferencable<T, arrow::internal::void_t<decltype(*std::declval<T>())>>
0059     : public std::true_type {};
0060 
0061 template <typename T>
0062 struct is_optional_like<
0063     T, typename std::enable_if<
0064            std::is_constructible<bool, T>::value && is_dereferencable<T>::value &&
0065            !std::is_array<typename std::remove_reference<T>::type>::value>::type>
0066     : public std::true_type {};
0067 
0068 template <size_t N, typename Tuple>
0069 using BareTupleElement =
0070     typename std::decay<typename std::tuple_element<N, Tuple>::type>::type;
0071 
0072 }  // namespace internal
0073 
0074 template <typename T, typename R = void>
0075 using enable_if_optional_like =
0076     typename std::enable_if<internal::is_optional_like<T>::value, R>::type;
0077 
0078 /// Traits meta class to map standard C/C++ types to equivalent Arrow types.
0079 template <typename T, typename Enable = void>
0080 struct ConversionTraits {};
0081 
0082 /// Returns builder type for given standard C/C++ type.
0083 template <typename CType>
0084 using CBuilderType =
0085     typename TypeTraits<typename ConversionTraits<CType>::ArrowType>::BuilderType;
0086 
0087 /// Default implementation of AppendListValues.
0088 ///
0089 /// This function can be specialized by user to take advantage of appending
0090 /// contiguous ranges while appending. This default implementation will call
0091 /// ConversionTraits<ValueCType>::AppendRow() for each value in the range.
0092 template <typename ValueCType, typename Range>
0093 inline Status AppendListValues(CBuilderType<ValueCType>& value_builder,
0094                                Range&& cell_range) {
0095   for (auto const& value : cell_range) {
0096     ARROW_RETURN_NOT_OK(ConversionTraits<ValueCType>::AppendRow(value_builder, value));
0097   }
0098   return Status::OK();
0099 }
0100 
0101 #define ARROW_STL_CONVERSION(CType_, ArrowType_)                                    \
0102   template <>                                                                       \
0103   struct ConversionTraits<CType_> : public CTypeTraits<CType_> {                    \
0104     static Status AppendRow(typename TypeTraits<ArrowType_>::BuilderType& builder,  \
0105                             CType_ cell) {                                          \
0106       return builder.Append(cell);                                                  \
0107     }                                                                               \
0108     static CType_ GetEntry(const typename TypeTraits<ArrowType_>::ArrayType& array, \
0109                            size_t j) {                                              \
0110       return array.Value(j);                                                        \
0111     }                                                                               \
0112   };                                                                                \
0113                                                                                     \
0114   template <>                                                                       \
0115   inline Status AppendListValues<CType_, const std::vector<CType_>&>(               \
0116       typename TypeTraits<ArrowType_>::BuilderType & value_builder,                 \
0117       const std::vector<CType_>& cell_range) {                                      \
0118     return value_builder.AppendValues(cell_range);                                  \
0119   }
0120 
0121 ARROW_STL_CONVERSION(bool, BooleanType)
0122 ARROW_STL_CONVERSION(int8_t, Int8Type)
0123 ARROW_STL_CONVERSION(int16_t, Int16Type)
0124 ARROW_STL_CONVERSION(int32_t, Int32Type)
0125 ARROW_STL_CONVERSION(int64_t, Int64Type)
0126 ARROW_STL_CONVERSION(uint8_t, UInt8Type)
0127 ARROW_STL_CONVERSION(uint16_t, UInt16Type)
0128 ARROW_STL_CONVERSION(uint32_t, UInt32Type)
0129 ARROW_STL_CONVERSION(uint64_t, UInt64Type)
0130 ARROW_STL_CONVERSION(float, FloatType)
0131 ARROW_STL_CONVERSION(double, DoubleType)
0132 
0133 template <>
0134 struct ConversionTraits<std::string> : public CTypeTraits<std::string> {
0135   static Status AppendRow(StringBuilder& builder, const std::string& cell) {
0136     return builder.Append(cell);
0137   }
0138   static std::string GetEntry(const StringArray& array, size_t j) {
0139     return array.GetString(j);
0140   }
0141 };
0142 
0143 /// Append cell range elements as a single value to the list builder.
0144 ///
0145 /// Cell range will be added to child builder using AppendListValues<ValueCType>()
0146 /// if provided. AppendListValues<ValueCType>() has a default implementation, but
0147 /// it can be specialized by users.
0148 template <typename ValueCType, typename ListBuilderType, typename Range>
0149 Status AppendCellRange(ListBuilderType& builder, Range&& cell_range) {
0150   constexpr bool is_list_builder = std::is_same<ListBuilderType, ListBuilder>::value;
0151   constexpr bool is_large_list_builder =
0152       std::is_same<ListBuilderType, LargeListBuilder>::value;
0153   static_assert(
0154       is_list_builder || is_large_list_builder,
0155       "Builder type must be either ListBuilder or LargeListBuilder for appending "
0156       "multiple rows.");
0157 
0158   using ChildBuilderType = CBuilderType<ValueCType>;
0159   ARROW_RETURN_NOT_OK(builder.Append());
0160   auto& value_builder =
0161       ::arrow::internal::checked_cast<ChildBuilderType&>(*builder.value_builder());
0162 
0163   // XXX: Remove appended value before returning if status isn't OK?
0164   return AppendListValues<ValueCType>(value_builder, std::forward<Range>(cell_range));
0165 }
0166 
0167 template <typename ValueCType>
0168 struct ConversionTraits<std::vector<ValueCType>>
0169     : public CTypeTraits<std::vector<ValueCType>> {
0170   static Status AppendRow(ListBuilder& builder, const std::vector<ValueCType>& cell) {
0171     return AppendCellRange<ValueCType>(builder, cell);
0172   }
0173 
0174   static std::vector<ValueCType> GetEntry(const ListArray& array, size_t j) {
0175     using ElementArrayType =
0176         typename TypeTraits<typename ConversionTraits<ValueCType>::ArrowType>::ArrayType;
0177 
0178     const ElementArrayType& value_array =
0179         ::arrow::internal::checked_cast<const ElementArrayType&>(*array.values());
0180 
0181     std::vector<ValueCType> vec(array.value_length(j));
0182     for (int64_t i = 0; i < array.value_length(j); i++) {
0183       vec[i] =
0184           ConversionTraits<ValueCType>::GetEntry(value_array, array.value_offset(j) + i);
0185     }
0186     return vec;
0187   }
0188 };
0189 
0190 template <class ValueCType, std::size_t N>
0191 struct ConversionTraits<std::array<ValueCType, N>>
0192     : public CTypeTraits<std::array<ValueCType, N>> {
0193   static arrow::Status AppendRow(FixedSizeListBuilder& builder,
0194                                  const std::array<ValueCType, N>& values) {
0195     auto vb =
0196         ::arrow::internal::checked_cast<typename CTypeTraits<ValueCType>::BuilderType*>(
0197             builder.value_builder());
0198     ARROW_RETURN_NOT_OK(builder.Append());
0199     return vb->AppendValues(values.data(), N);
0200   }
0201 
0202   static std::array<ValueCType, N> GetEntry(const ::arrow::FixedSizeListArray& array,
0203                                             size_t j) {
0204     using ElementArrayType = typename TypeTraits<
0205         typename stl::ConversionTraits<ValueCType>::ArrowType>::ArrayType;
0206 
0207     const ElementArrayType& value_array =
0208         ::arrow::internal::checked_cast<const ElementArrayType&>(*array.values());
0209 
0210     std::array<ValueCType, N> arr;
0211     for (size_t i = 0; i < N; i++) {
0212       arr[i] = stl::ConversionTraits<ValueCType>::GetEntry(value_array,
0213                                                            array.value_offset(j) + i);
0214     }
0215     return arr;
0216   }
0217 };
0218 
0219 template <typename Optional>
0220 struct ConversionTraits<Optional, enable_if_optional_like<Optional>>
0221     : public CTypeTraits<typename std::decay<decltype(*std::declval<Optional>())>::type> {
0222   using OptionalInnerType =
0223       typename std::decay<decltype(*std::declval<Optional>())>::type;
0224   using typename CTypeTraits<OptionalInnerType>::ArrowType;
0225   using CTypeTraits<OptionalInnerType>::type_singleton;
0226 
0227   static Status AppendRow(typename TypeTraits<ArrowType>::BuilderType& builder,
0228                           const Optional& cell) {
0229     if (cell) {
0230       return ConversionTraits<OptionalInnerType>::AppendRow(builder, *cell);
0231     } else {
0232       return builder.AppendNull();
0233     }
0234   }
0235 };
0236 
0237 /// Build an arrow::Schema based upon the types defined in a std::tuple-like structure.
0238 ///
0239 /// While the type information is available at compile-time, we still need to add the
0240 /// column names at runtime, thus these methods are not constexpr.
0241 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0242 struct SchemaFromTuple {
0243   using Element = internal::BareTupleElement<N - 1, Tuple>;
0244 
0245   // Implementations that take a vector-like object for the column names.
0246 
0247   /// Recursively build a vector of arrow::Field from the defined types.
0248   ///
0249   /// In most cases MakeSchema is the better entrypoint for the Schema creation.
0250   static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
0251       const std::vector<std::string>& names) {
0252     std::vector<std::shared_ptr<Field>> ret =
0253         SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursion(names);
0254     auto type = ConversionTraits<Element>::type_singleton();
0255     ret.push_back(field(names[N - 1], type, internal::is_optional_like<Element>::value));
0256     return ret;
0257   }
0258 
0259   /// Build a Schema from the types of the tuple-like structure passed in as template
0260   /// parameter assign the column names at runtime.
0261   ///
0262   /// An example usage of this API can look like the following:
0263   ///
0264   /// \code{.cpp}
0265   /// using TupleType = std::tuple<int, std::vector<std::string>>;
0266   /// std::shared_ptr<Schema> schema =
0267   ///   SchemaFromTuple<TupleType>::MakeSchema({"int_column", "list_of_strings_column"});
0268   /// \endcode
0269   static std::shared_ptr<Schema> MakeSchema(const std::vector<std::string>& names) {
0270     return std::make_shared<Schema>(MakeSchemaRecursion(names));
0271   }
0272 
0273   // Implementations that take a tuple-like object for the column names.
0274 
0275   /// Recursively build a vector of arrow::Field from the defined types.
0276   ///
0277   /// In most cases MakeSchema is the better entrypoint for the Schema creation.
0278   template <typename NamesTuple>
0279   static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
0280       const NamesTuple& names) {
0281     using std::get;
0282 
0283     std::vector<std::shared_ptr<Field>> ret =
0284         SchemaFromTuple<Tuple, N - 1>::MakeSchemaRecursionT(names);
0285     std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
0286     ret.push_back(
0287         field(get<N - 1>(names), type, internal::is_optional_like<Element>::value));
0288     return ret;
0289   }
0290 
0291   /// Build a Schema from the types of the tuple-like structure passed in as template
0292   /// parameter assign the column names at runtime.
0293   ///
0294   /// An example usage of this API can look like the following:
0295   ///
0296   /// \code{.cpp}
0297   /// using TupleType = std::tuple<int, std::vector<std::string>>;
0298   /// std::shared_ptr<Schema> schema =
0299   ///   SchemaFromTuple<TupleType>::MakeSchema({"int_column", "list_of_strings_column"});
0300   /// \endcode
0301   template <typename NamesTuple>
0302   static std::shared_ptr<Schema> MakeSchema(const NamesTuple& names) {
0303     return std::make_shared<Schema>(MakeSchemaRecursionT<NamesTuple>(names));
0304   }
0305 };
0306 
0307 template <typename Tuple>
0308 struct SchemaFromTuple<Tuple, 0> {
0309   static std::vector<std::shared_ptr<Field>> MakeSchemaRecursion(
0310       const std::vector<std::string>& names) {
0311     std::vector<std::shared_ptr<Field>> ret;
0312     ret.reserve(names.size());
0313     return ret;
0314   }
0315 
0316   template <typename NamesTuple>
0317   static std::vector<std::shared_ptr<Field>> MakeSchemaRecursionT(
0318       const NamesTuple& names) {
0319     std::vector<std::shared_ptr<Field>> ret;
0320     ret.reserve(std::tuple_size<NamesTuple>::value);
0321     return ret;
0322   }
0323 };
0324 
0325 namespace internal {
0326 
0327 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0328 struct CreateBuildersRecursive {
0329   static Status Make(MemoryPool* pool,
0330                      std::vector<std::unique_ptr<ArrayBuilder>>* builders) {
0331     using Element = BareTupleElement<N - 1, Tuple>;
0332     std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
0333     ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &builders->at(N - 1)));
0334 
0335     return CreateBuildersRecursive<Tuple, N - 1>::Make(pool, builders);
0336   }
0337 };
0338 
0339 template <typename Tuple>
0340 struct CreateBuildersRecursive<Tuple, 0> {
0341   static Status Make(MemoryPool*, std::vector<std::unique_ptr<ArrayBuilder>>*) {
0342     return Status::OK();
0343   }
0344 };
0345 
0346 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0347 struct RowIterator {
0348   static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
0349                        const Tuple& row) {
0350     using std::get;
0351     using Element = BareTupleElement<N - 1, Tuple>;
0352     using BuilderType =
0353         typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::BuilderType;
0354 
0355     BuilderType& builder =
0356         ::arrow::internal::checked_cast<BuilderType&>(*builders[N - 1]);
0357     ARROW_RETURN_NOT_OK(ConversionTraits<Element>::AppendRow(builder, get<N - 1>(row)));
0358 
0359     return RowIterator<Tuple, N - 1>::Append(builders, row);
0360   }
0361 };
0362 
0363 template <typename Tuple>
0364 struct RowIterator<Tuple, 0> {
0365   static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
0366                        const Tuple& row) {
0367     return Status::OK();
0368   }
0369 };
0370 
0371 template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0372 struct EnsureColumnTypes {
0373   static Status Cast(const Table& table, std::shared_ptr<Table>* table_owner,
0374                      const compute::CastOptions& cast_options, compute::ExecContext* ctx,
0375                      std::reference_wrapper<const ::arrow::Table>* result) {
0376     using Element = BareTupleElement<N - 1, Tuple>;
0377     std::shared_ptr<DataType> expected_type = ConversionTraits<Element>::type_singleton();
0378 
0379     if (!table.schema()->field(N - 1)->type()->Equals(*expected_type)) {
0380       ARROW_ASSIGN_OR_RAISE(
0381           Datum casted,
0382           compute::Cast(table.column(N - 1), expected_type, cast_options, ctx));
0383       auto new_field = table.schema()->field(N - 1)->WithType(expected_type);
0384       ARROW_ASSIGN_OR_RAISE(*table_owner,
0385                             table.SetColumn(N - 1, new_field, casted.chunked_array()));
0386       *result = **table_owner;
0387     }
0388 
0389     return EnsureColumnTypes<Tuple, N - 1>::Cast(result->get(), table_owner, cast_options,
0390                                                  ctx, result);
0391   }
0392 };
0393 
0394 template <typename Tuple>
0395 struct EnsureColumnTypes<Tuple, 0> {
0396   static Status Cast(const Table& table, std::shared_ptr<Table>* table_owner,
0397                      const compute::CastOptions& cast_options, compute::ExecContext* ctx,
0398                      std::reference_wrapper<const ::arrow::Table>* result) {
0399     return Status::OK();
0400   }
0401 };
0402 
0403 template <typename Range, typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
0404 struct TupleSetter {
0405   static void Fill(const Table& table, Range* rows) {
0406     using std::get;
0407     using Element = typename std::tuple_element<N - 1, Tuple>::type;
0408     using ArrayType =
0409         typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::ArrayType;
0410 
0411     auto iter = rows->begin();
0412     const ChunkedArray& chunked_array = *table.column(N - 1);
0413     for (int i = 0; i < chunked_array.num_chunks(); i++) {
0414       const ArrayType& array =
0415           ::arrow::internal::checked_cast<const ArrayType&>(*chunked_array.chunk(i));
0416       for (int64_t j = 0; j < array.length(); j++) {
0417         get<N - 1>(*iter++) = ConversionTraits<Element>::GetEntry(array, j);
0418       }
0419     }
0420 
0421     return TupleSetter<Range, Tuple, N - 1>::Fill(table, rows);
0422   }
0423 };
0424 
0425 template <typename Range, typename Tuple>
0426 struct TupleSetter<Range, Tuple, 0> {
0427   static void Fill(const Table& table, Range* rows) {}
0428 };
0429 
0430 }  // namespace internal
0431 
0432 template <typename Range>
0433 Status TableFromTupleRange(MemoryPool* pool, Range&& rows,
0434                            const std::vector<std::string>& names,
0435                            std::shared_ptr<Table>* table) {
0436   using row_type = typename std::iterator_traits<decltype(std::begin(rows))>::value_type;
0437   constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
0438 
0439   std::shared_ptr<Schema> schema = SchemaFromTuple<row_type>::MakeSchema(names);
0440 
0441   std::vector<std::unique_ptr<ArrayBuilder>> builders(n_columns);
0442   ARROW_RETURN_NOT_OK(internal::CreateBuildersRecursive<row_type>::Make(pool, &builders));
0443 
0444   for (auto const& row : rows) {
0445     ARROW_RETURN_NOT_OK(internal::RowIterator<row_type>::Append(builders, row));
0446   }
0447 
0448   std::vector<std::shared_ptr<Array>> arrays;
0449   for (auto const& builder : builders) {
0450     std::shared_ptr<Array> array;
0451     ARROW_RETURN_NOT_OK(builder->Finish(&array));
0452     arrays.emplace_back(array);
0453   }
0454 
0455   *table = Table::Make(std::move(schema), std::move(arrays));
0456 
0457   return Status::OK();
0458 }
0459 
0460 template <typename Range>
0461 Status TupleRangeFromTable(const Table& table, const compute::CastOptions& cast_options,
0462                            compute::ExecContext* ctx, Range* rows) {
0463   using row_type = typename std::decay<decltype(*std::begin(*rows))>::type;
0464   constexpr std::size_t n_columns = std::tuple_size<row_type>::value;
0465 
0466   if (table.schema()->num_fields() != n_columns) {
0467     return Status::Invalid(
0468         "Number of columns in the table does not match the width of the target: ",
0469         table.schema()->num_fields(), " != ", n_columns);
0470   }
0471 
0472   if (std::size(*rows) != static_cast<size_t>(table.num_rows())) {
0473     return Status::Invalid(
0474         "Number of rows in the table does not match the size of the target: ",
0475         table.num_rows(), " != ", std::size(*rows));
0476   }
0477 
0478   // Check that all columns have the correct type, otherwise cast them.
0479   std::shared_ptr<Table> table_owner;
0480   std::reference_wrapper<const ::arrow::Table> current_table(table);
0481 
0482   ARROW_RETURN_NOT_OK(internal::EnsureColumnTypes<row_type>::Cast(
0483       table, &table_owner, cast_options, ctx, &current_table));
0484 
0485   internal::TupleSetter<Range, row_type>::Fill(current_table.get(), rows);
0486 
0487   return Status::OK();
0488 }
0489 
0490 }  // namespace stl
0491 }  // namespace arrow