Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-08-28 08:27:09

0001 // Licensed to the Apache Software Foundation (ASF) under one
0002 // or more contributor license agreements.  See the NOTICE file
0003 // distributed with this work for additional information
0004 // regarding copyright ownership.  The ASF licenses this file
0005 // to you under the Apache License, Version 2.0 (the
0006 // "License"); you may not use this file except in compliance
0007 // with the License.  You may obtain a copy of the License at
0008 //
0009 //   http://www.apache.org/licenses/LICENSE-2.0
0010 //
0011 // Unless required by applicable law or agreed to in writing,
0012 // software distributed under the License is distributed on an
0013 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
0014 // KIND, either express or implied.  See the License for the
0015 // specific language governing permissions and limitations
0016 // under the License.
0017 
0018 #pragma once
0019 
0020 #include <cassert>
0021 #include <functional>
0022 #include <memory>
0023 #include <optional>
0024 #include <tuple>
0025 #include <type_traits>
0026 #include <utility>
0027 #include <vector>
0028 
0029 #include "arrow/result.h"
0030 #include "arrow/status.h"
0031 #include "arrow/util/compare.h"
0032 #include "arrow/util/functional.h"
0033 #include "arrow/util/macros.h"
0034 #include "arrow/util/visibility.h"
0035 
0036 namespace arrow {
0037 
0038 template <typename T>
0039 class Iterator;
0040 
0041 template <typename T>
0042 struct IterationTraits {
0043   /// \brief a reserved value which indicates the end of iteration. By
0044   /// default this is NULLPTR since most iterators yield pointer types.
0045   /// Specialize IterationTraits if different end semantics are required.
0046   ///
0047   /// Note: This should not be used to determine if a given value is a
0048   /// terminal value.  Use IsIterationEnd (which uses IsEnd) instead.  This
0049   /// is only for returning terminal values.
0050   static T End() { return T(NULLPTR); }
0051 
0052   /// \brief Checks to see if the value is a terminal value.
0053   /// A method is used here since T is not necessarily comparable in many
0054   /// cases even though it has a distinct final value
0055   static bool IsEnd(const T& val) { return val == End(); }
0056 };
0057 
0058 template <typename T>
0059 T IterationEnd() {
0060   return IterationTraits<T>::End();
0061 }
0062 
0063 template <typename T>
0064 bool IsIterationEnd(const T& val) {
0065   return IterationTraits<T>::IsEnd(val);
0066 }
0067 
0068 template <typename T>
0069 struct IterationTraits<std::optional<T>> {
0070   /// \brief by default when iterating through a sequence of optional,
0071   /// nullopt indicates the end of iteration.
0072   /// Specialize IterationTraits if different end semantics are required.
0073   static std::optional<T> End() { return std::nullopt; }
0074 
0075   /// \brief by default when iterating through a sequence of optional,
0076   /// nullopt (!has_value()) indicates the end of iteration.
0077   /// Specialize IterationTraits if different end semantics are required.
0078   static bool IsEnd(const std::optional<T>& val) { return !val.has_value(); }
0079 
0080   // TODO(bkietz) The range-for loop over Iterator<optional<T>> yields
0081   // Result<optional<T>> which is unnecessary (since only the unyielded end optional
0082   // is nullopt. Add IterationTraits::GetRangeElement() to handle this case
0083 };
0084 
0085 /// \brief A generic Iterator that can return errors
0086 template <typename T>
0087 class Iterator : public util::EqualityComparable<Iterator<T>> {
0088  public:
0089   /// \brief Iterator may be constructed from any type which has a member function
0090   /// with signature Result<T> Next();
0091   /// End of iterator is signalled by returning IteratorTraits<T>::End();
0092   ///
0093   /// The argument is moved or copied to the heap and kept in a unique_ptr<void>. Only
0094   /// its destructor and its Next method (which are stored in function pointers) are
0095   /// referenced after construction.
0096   ///
0097   /// This approach is used to dodge MSVC linkage hell (ARROW-6244, ARROW-6558) when using
0098   /// an abstract template base class: instead of being inlined as usual for a template
0099   /// function the base's virtual destructor will be exported, leading to multiple
0100   /// definition errors when linking to any other TU where the base is instantiated.
0101   template <typename Wrapped>
0102   explicit Iterator(Wrapped has_next)
0103       : ptr_(new Wrapped(std::move(has_next)), Delete<Wrapped>), next_(Next<Wrapped>) {}
0104 
0105   Iterator() : ptr_(NULLPTR, [](void*) {}) {}
0106 
0107   /// \brief Return the next element of the sequence, IterationTraits<T>::End() when the
0108   /// iteration is completed.
0109   Result<T> Next() {
0110     if (ptr_) {
0111       auto next_result = next_(ptr_.get());
0112       if (next_result.ok() && IsIterationEnd(next_result.ValueUnsafe())) {
0113         ptr_.reset(NULLPTR);
0114       }
0115       return next_result;
0116     } else {
0117       return IterationTraits<T>::End();
0118     }
0119   }
0120 
0121   /// Pass each element of the sequence to a visitor. Will return any error status
0122   /// returned by the visitor, terminating iteration.
0123   template <typename Visitor>
0124   Status Visit(Visitor&& visitor) {
0125     for (;;) {
0126       ARROW_ASSIGN_OR_RAISE(auto value, Next());
0127 
0128       if (IsIterationEnd(value)) break;
0129 
0130       ARROW_RETURN_NOT_OK(visitor(std::move(value)));
0131     }
0132 
0133     return Status::OK();
0134   }
0135 
0136   /// Iterators will only compare equal if they are both null.
0137   /// Equality comparability is required to make an Iterator of Iterators
0138   /// (to check for the end condition).
0139   bool Equals(const Iterator& other) const { return ptr_ == other.ptr_; }
0140 
0141   explicit operator bool() const { return ptr_ != NULLPTR; }
0142 
0143   class RangeIterator {
0144    public:
0145     RangeIterator() : value_(IterationTraits<T>::End()) {}
0146 
0147     explicit RangeIterator(Iterator i)
0148         : value_(IterationTraits<T>::End()),
0149           iterator_(std::make_shared<Iterator>(std::move(i))) {
0150       Next();
0151     }
0152 
0153     bool operator!=(const RangeIterator& other) const { return value_ != other.value_; }
0154 
0155     RangeIterator& operator++() {
0156       Next();
0157       return *this;
0158     }
0159 
0160     Result<T> operator*() {
0161       ARROW_RETURN_NOT_OK(value_.status());
0162 
0163       auto value = std::move(value_);
0164       value_ = IterationTraits<T>::End();
0165       return value;
0166     }
0167 
0168    private:
0169     void Next() {
0170       if (!value_.ok()) {
0171         value_ = IterationTraits<T>::End();
0172         return;
0173       }
0174       value_ = iterator_->Next();
0175     }
0176 
0177     Result<T> value_;
0178     std::shared_ptr<Iterator> iterator_;
0179   };
0180 
0181   RangeIterator begin() { return RangeIterator(std::move(*this)); }
0182 
0183   RangeIterator end() { return RangeIterator(); }
0184 
0185   /// \brief Move every element of this iterator into a vector.
0186   Result<std::vector<T>> ToVector() {
0187     std::vector<T> out;
0188     for (auto maybe_element : *this) {
0189       ARROW_ASSIGN_OR_RAISE(auto element, maybe_element);
0190       out.push_back(std::move(element));
0191     }
0192     return out;
0193   }
0194 
0195  private:
0196   /// Implementation of deleter for ptr_: Casts from void* to the wrapped type and
0197   /// deletes that.
0198   template <typename HasNext>
0199   static void Delete(void* ptr) {
0200     delete static_cast<HasNext*>(ptr);
0201   }
0202 
0203   /// Implementation of Next: Casts from void* to the wrapped type and invokes that
0204   /// type's Next member function.
0205   template <typename HasNext>
0206   static Result<T> Next(void* ptr) {
0207     return static_cast<HasNext*>(ptr)->Next();
0208   }
0209 
0210   /// ptr_ is a unique_ptr to void with a custom deleter: a function pointer which first
0211   /// casts from void* to a pointer to the wrapped type then deletes that.
0212   std::unique_ptr<void, void (*)(void*)> ptr_;
0213 
0214   /// next_ is a function pointer which first casts from void* to a pointer to the wrapped
0215   /// type then invokes its Next member function.
0216   Result<T> (*next_)(void*) = NULLPTR;
0217 };
0218 
0219 template <typename T>
0220 struct TransformFlow {
0221   using YieldValueType = T;
0222 
0223   TransformFlow(YieldValueType value, bool ready_for_next)
0224       : finished_(false),
0225         ready_for_next_(ready_for_next),
0226         yield_value_(std::move(value)) {}
0227   TransformFlow(bool finished, bool ready_for_next)
0228       : finished_(finished), ready_for_next_(ready_for_next), yield_value_() {}
0229 
0230   bool HasValue() const { return yield_value_.has_value(); }
0231   bool Finished() const { return finished_; }
0232   bool ReadyForNext() const { return ready_for_next_; }
0233   T Value() const { return *yield_value_; }
0234 
0235   bool finished_ = false;
0236   bool ready_for_next_ = false;
0237   std::optional<YieldValueType> yield_value_;
0238 };
0239 
0240 struct TransformFinish {
0241   template <typename T>
0242   operator TransformFlow<T>() && {  // NOLINT explicit
0243     return TransformFlow<T>(true, true);
0244   }
0245 };
0246 
0247 struct TransformSkip {
0248   template <typename T>
0249   operator TransformFlow<T>() && {  // NOLINT explicit
0250     return TransformFlow<T>(false, true);
0251   }
0252 };
0253 
0254 template <typename T>
0255 TransformFlow<T> TransformYield(T value = {}, bool ready_for_next = true) {
0256   return TransformFlow<T>(std::move(value), ready_for_next);
0257 }
0258 
0259 template <typename T, typename V>
0260 using Transformer = std::function<Result<TransformFlow<V>>(T)>;
0261 
0262 template <typename T, typename V>
0263 class TransformIterator {
0264  public:
0265   explicit TransformIterator(Iterator<T> it, Transformer<T, V> transformer)
0266       : it_(std::move(it)),
0267         transformer_(std::move(transformer)),
0268         last_value_(),
0269         finished_() {}
0270 
0271   Result<V> Next() {
0272     while (!finished_) {
0273       ARROW_ASSIGN_OR_RAISE(std::optional<V> next, Pump());
0274       if (next.has_value()) {
0275         return std::move(*next);
0276       }
0277       ARROW_ASSIGN_OR_RAISE(last_value_, it_.Next());
0278     }
0279     return IterationTraits<V>::End();
0280   }
0281 
0282  private:
0283   // Calls the transform function on the current value.  Can return in several ways
0284   // * If the next value is requested (e.g. skip) it will return an empty optional
0285   // * If an invalid status is encountered that will be returned
0286   // * If finished it will return IterationTraits<V>::End()
0287   // * If a value is returned by the transformer that will be returned
0288   Result<std::optional<V>> Pump() {
0289     if (!finished_ && last_value_.has_value()) {
0290       auto next_res = transformer_(*last_value_);
0291       if (!next_res.ok()) {
0292         finished_ = true;
0293         return next_res.status();
0294       }
0295       auto next = std::move(*next_res);
0296       if (next.ReadyForNext()) {
0297         if (IsIterationEnd(*last_value_)) {
0298           finished_ = true;
0299         }
0300         last_value_.reset();
0301       }
0302       if (next.Finished()) {
0303         finished_ = true;
0304       }
0305       if (next.HasValue()) {
0306         return next.Value();
0307       }
0308     }
0309     if (finished_) {
0310       return IterationTraits<V>::End();
0311     }
0312     return std::nullopt;
0313   }
0314 
0315   Iterator<T> it_;
0316   Transformer<T, V> transformer_;
0317   std::optional<T> last_value_;
0318   bool finished_ = false;
0319 };
0320 
0321 /// \brief Transforms an iterator according to a transformer, returning a new Iterator.
0322 ///
0323 /// The transformer will be called on each element of the source iterator and for each
0324 /// call it can yield a value, skip, or finish the iteration.  When yielding a value the
0325 /// transformer can choose to consume the source item (the default, ready_for_next = true)
0326 /// or to keep it and it will be called again on the same value.
0327 ///
0328 /// This is essentially a more generic form of the map operation that can return 0, 1, or
0329 /// many values for each of the source items.
0330 ///
0331 /// The transformer will be exposed to the end of the source sequence
0332 /// (IterationTraits::End) in case it needs to return some penultimate item(s).
0333 ///
0334 /// Any invalid status returned by the transformer will be returned immediately.
0335 template <typename T, typename V>
0336 Iterator<V> MakeTransformedIterator(Iterator<T> it, Transformer<T, V> op) {
0337   return Iterator<V>(TransformIterator<T, V>(std::move(it), std::move(op)));
0338 }
0339 
0340 template <typename T>
0341 struct IterationTraits<Iterator<T>> {
0342   // The end condition for an Iterator of Iterators is a default constructed (null)
0343   // Iterator.
0344   static Iterator<T> End() { return Iterator<T>(); }
0345   static bool IsEnd(const Iterator<T>& val) { return !val; }
0346 };
0347 
0348 template <typename Fn, typename T>
0349 class FunctionIterator {
0350  public:
0351   explicit FunctionIterator(Fn fn) : fn_(std::move(fn)) {}
0352 
0353   Result<T> Next() { return fn_(); }
0354 
0355  private:
0356   Fn fn_;
0357 };
0358 
0359 /// \brief Construct an Iterator which invokes a callable on Next()
0360 template <typename Fn,
0361           typename Ret = typename internal::call_traits::return_type<Fn>::ValueType>
0362 Iterator<Ret> MakeFunctionIterator(Fn fn) {
0363   return Iterator<Ret>(FunctionIterator<Fn, Ret>(std::move(fn)));
0364 }
0365 
0366 template <typename T>
0367 Iterator<T> MakeEmptyIterator() {
0368   return MakeFunctionIterator([]() -> Result<T> { return IterationTraits<T>::End(); });
0369 }
0370 
0371 template <typename T>
0372 Iterator<T> MakeErrorIterator(Status s) {
0373   return MakeFunctionIterator([s]() -> Result<T> {
0374     ARROW_RETURN_NOT_OK(s);
0375     return IterationTraits<T>::End();
0376   });
0377 }
0378 
0379 /// \brief Simple iterator which yields the elements of a std::vector
0380 template <typename T>
0381 class VectorIterator {
0382  public:
0383   explicit VectorIterator(std::vector<T> v) : elements_(std::move(v)) {}
0384 
0385   Result<T> Next() {
0386     if (i_ == elements_.size()) {
0387       return IterationTraits<T>::End();
0388     }
0389     return std::move(elements_[i_++]);
0390   }
0391 
0392  private:
0393   std::vector<T> elements_;
0394   size_t i_ = 0;
0395 };
0396 
0397 template <typename T>
0398 Iterator<T> MakeVectorIterator(std::vector<T> v) {
0399   return Iterator<T>(VectorIterator<T>(std::move(v)));
0400 }
0401 
0402 /// \brief Simple iterator which yields *pointers* to the elements of a std::vector<T>.
0403 /// This is provided to support T where IterationTraits<T>::End is not specialized
0404 template <typename T>
0405 class VectorPointingIterator {
0406  public:
0407   explicit VectorPointingIterator(std::vector<T> v) : elements_(std::move(v)) {}
0408 
0409   Result<T*> Next() {
0410     if (i_ == elements_.size()) {
0411       return NULLPTR;
0412     }
0413     return &elements_[i_++];
0414   }
0415 
0416  private:
0417   std::vector<T> elements_;
0418   size_t i_ = 0;
0419 };
0420 
0421 template <typename T>
0422 Iterator<T*> MakeVectorPointingIterator(std::vector<T> v) {
0423   return Iterator<T*>(VectorPointingIterator<T>(std::move(v)));
0424 }
0425 
0426 /// \brief MapIterator takes ownership of an iterator and a function to apply
0427 /// on every element. The mapped function is not allowed to fail.
0428 template <typename Fn, typename I, typename O>
0429 class MapIterator {
0430  public:
0431   explicit MapIterator(Fn map, Iterator<I> it)
0432       : map_(std::move(map)), it_(std::move(it)) {}
0433 
0434   Result<O> Next() {
0435     ARROW_ASSIGN_OR_RAISE(I i, it_.Next());
0436 
0437     if (IsIterationEnd(i)) {
0438       return IterationTraits<O>::End();
0439     }
0440 
0441     return map_(std::move(i));
0442   }
0443 
0444  private:
0445   Fn map_;
0446   Iterator<I> it_;
0447 };
0448 
0449 /// \brief MapIterator takes ownership of an iterator and a function to apply
0450 /// on every element. The mapped function is not allowed to fail.
0451 template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
0452           typename To = internal::call_traits::return_type<Fn>>
0453 Iterator<To> MakeMapIterator(Fn map, Iterator<From> it) {
0454   return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
0455 }
0456 
0457 /// \brief Like MapIterator, but where the function can fail.
0458 template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
0459           typename To = typename internal::call_traits::return_type<Fn>::ValueType>
0460 Iterator<To> MakeMaybeMapIterator(Fn map, Iterator<From> it) {
0461   return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
0462 }
0463 
0464 struct FilterIterator {
0465   enum Action { ACCEPT, REJECT };
0466 
0467   template <typename To>
0468   static Result<std::pair<To, Action>> Reject() {
0469     return std::make_pair(IterationTraits<To>::End(), REJECT);
0470   }
0471 
0472   template <typename To>
0473   static Result<std::pair<To, Action>> Accept(To out) {
0474     return std::make_pair(std::move(out), ACCEPT);
0475   }
0476 
0477   template <typename To>
0478   static Result<std::pair<To, Action>> MaybeAccept(Result<To> maybe_out) {
0479     return std::move(maybe_out).Map(Accept<To>);
0480   }
0481 
0482   template <typename To>
0483   static Result<std::pair<To, Action>> Error(Status s) {
0484     return s;
0485   }
0486 
0487   template <typename Fn, typename From, typename To>
0488   class Impl {
0489    public:
0490     explicit Impl(Fn filter, Iterator<From> it) : filter_(filter), it_(std::move(it)) {}
0491 
0492     Result<To> Next() {
0493       To out = IterationTraits<To>::End();
0494       Action action;
0495 
0496       for (;;) {
0497         ARROW_ASSIGN_OR_RAISE(From i, it_.Next());
0498 
0499         if (IsIterationEnd(i)) {
0500           return IterationTraits<To>::End();
0501         }
0502 
0503         ARROW_ASSIGN_OR_RAISE(std::tie(out, action), filter_(std::move(i)));
0504 
0505         if (action == ACCEPT) return out;
0506       }
0507     }
0508 
0509    private:
0510     Fn filter_;
0511     Iterator<From> it_;
0512   };
0513 };
0514 
0515 /// \brief Like MapIterator, but where the function can fail or reject elements.
0516 template <
0517     typename Fn, typename From = typename internal::call_traits::argument_type<0, Fn>,
0518     typename Ret = typename internal::call_traits::return_type<Fn>::ValueType,
0519     typename To = typename std::tuple_element<0, Ret>::type,
0520     typename Enable = typename std::enable_if<std::is_same<
0521         typename std::tuple_element<1, Ret>::type, FilterIterator::Action>::value>::type>
0522 Iterator<To> MakeFilterIterator(Fn filter, Iterator<From> it) {
0523   return Iterator<To>(
0524       FilterIterator::Impl<Fn, From, To>(std::move(filter), std::move(it)));
0525 }
0526 
0527 /// \brief FlattenIterator takes an iterator generating iterators and yields a
0528 /// unified iterator that flattens/concatenates in a single stream.
0529 template <typename T>
0530 class FlattenIterator {
0531  public:
0532   explicit FlattenIterator(Iterator<Iterator<T>> it) : parent_(std::move(it)) {}
0533 
0534   Result<T> Next() {
0535     if (IsIterationEnd(child_)) {
0536       // Pop from parent's iterator.
0537       ARROW_ASSIGN_OR_RAISE(child_, parent_.Next());
0538 
0539       // Check if final iteration reached.
0540       if (IsIterationEnd(child_)) {
0541         return IterationTraits<T>::End();
0542       }
0543 
0544       return Next();
0545     }
0546 
0547     // Pop from child_ and check for depletion.
0548     ARROW_ASSIGN_OR_RAISE(T out, child_.Next());
0549     if (IsIterationEnd(out)) {
0550       // Reset state such that we pop from parent on the recursive call
0551       child_ = IterationTraits<Iterator<T>>::End();
0552 
0553       return Next();
0554     }
0555 
0556     return out;
0557   }
0558 
0559  private:
0560   Iterator<Iterator<T>> parent_;
0561   Iterator<T> child_ = IterationTraits<Iterator<T>>::End();
0562 };
0563 
0564 template <typename T>
0565 Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
0566   return Iterator<T>(FlattenIterator<T>(std::move(it)));
0567 }
0568 
0569 template <typename Reader>
0570 Iterator<typename Reader::ValueType> MakeIteratorFromReader(
0571     const std::shared_ptr<Reader>& reader) {
0572   return MakeFunctionIterator([reader] { return reader->Next(); });
0573 }
0574 
0575 }  // namespace arrow