Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/root/ROOT/RResultPtr.hxx was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 // Author: Enrico Guiraud, Danilo Piparo CERN  03/2017
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #ifndef ROOT_RRESULTPTR
0012 #define ROOT_RRESULTPTR
0013 
0014 #include "ROOT/RDF/RActionBase.hxx"
0015 #include "RtypesCore.h"
0016 #include "ROOT/RDF/RLoopManager.hxx"
0017 #include "ROOT/TypeTraits.hxx"
0018 #include "TError.h" // Warning
0019 
0020 #include <memory>
0021 #include <functional>
0022 #include <type_traits> // std::is_constructible
0023 
0024 namespace ROOT {
0025 namespace RDF {
0026 template <typename T>
0027 class RResultPtr;
0028 
0029 namespace Experimental {
0030 template <typename T>
0031 class RResultMap;
0032 
0033 template <typename T>
0034 RResultMap<T> VariationsFor(RResultPtr<T> resPtr);
0035 } // namespace Experimental
0036 
0037 template <typename Proxied, typename DataSource>
0038 class RInterface;
0039 } // namespace RDF
0040 
0041 namespace Internal {
0042 namespace RDF {
0043 class GraphCreatorHelper;
0044 /**
0045  * \brief Creates a new RResultPtr with a cloned action.
0046  *
0047  * \tparam T The type of the result held by the RResultPtr.
0048  * \param inptr The pointer.
0049  * \return A new pointer with a cloned action.
0050  */
0051 template <typename T>
0052 ROOT::RDF::RResultPtr<T> CloneResultAndAction(const ROOT::RDF::RResultPtr<T> &inptr)
0053 {
0054    // We call the copy constructor, to copy also the metadata of certain
0055    // result types, e.g. a for a TH1D we have to create a new histogram with
0056    // the same binning and axis limits.
0057    std::shared_ptr<T> copiedResult{new T{*inptr.fObjPtr}};
0058    return ROOT::RDF::RResultPtr<T>(copiedResult, inptr.fLoopManager,
0059                                    inptr.fActionPtr->CloneAction(reinterpret_cast<void *>(&copiedResult)));
0060 }
0061 
0062 using SnapshotPtr_t = ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager, void>>;
0063 /**
0064  * \brief Creates a new RResultPtr with a cloned Snapshot action.
0065  *
0066  * \param inptr The pointer.
0067  * \param outputFileName A new name for the output file of the cloned action.
0068  * \return A new pointer with a cloned action.
0069  *
0070  * This overload is needed since cloning a Snapshot node usually also involves
0071  * changing the name of the output file, otherwise the cloned Snapshot would
0072  * overwrite the same file.
0073  */
0074 SnapshotPtr_t CloneResultAndAction(const SnapshotPtr_t &inptr, const std::string &outputFileName);
0075 } // namespace RDF
0076 } // namespace Internal
0077 
0078 namespace Detail {
0079 namespace RDF {
0080 using ROOT::RDF::RResultPtr;
0081 // Fwd decl for RResultPtr
0082 template <typename T>
0083 RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &df,
0084                             std::shared_ptr<ROOT::Internal::RDF::RActionBase> actionPtr);
0085 
0086 // Fwd decl for GetMergeableValue
0087 template <typename T>
0088 class RMergeableValue;
0089 
0090 template <typename T>
0091 std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr);
0092 } // namespace RDF
0093 } // namespace Detail
0094 namespace RDF {
0095 namespace RDFInternal = ROOT::Internal::RDF;
0096 namespace RDFDetail = ROOT::Detail::RDF;
0097 namespace TTraits = ROOT::TypeTraits;
0098 
0099 /// Smart pointer for the return type of actions.
0100 /**
0101 \class ROOT::RDF::RResultPtr
0102 \ingroup dataframe
0103 \brief A wrapper around the result of RDataFrame actions able to trigger calculations lazily.
0104 \tparam T Type of the action result
0105 
0106 A wrapper around a shared_ptr which allows to access the result of RDataFrame actions.
0107 The underlying object can be accessed by dereferencing the RResultPtr:
0108 ~~~{.cpp}
0109 ROOT::RDF::RResultPtr<TH1D> histo = rdf.Histo1D(...);
0110 histo->Draw(); // Starts running the event loop
0111 ~~~
0112 Upon invocation of the arrow operator or dereferencing (`operator*`), the
0113 loop on the events and calculations of all scheduled actions are executed
0114 if needed.
0115 It is possible to iterate on the result proxy if the proxied object is a collection.
0116 ~~~{.cpp}
0117 for (auto& myItem : myResultProxy) { ... };
0118 ~~~
0119 If iteration is not supported by the type of the proxied object, a compilation error is thrown.
0120 
0121 When shared ownership to the result is desired, a copy of the underlying shared_ptr can be obtained:
0122 ~~~{.cpp}
0123 std::shared_ptr<TH1D> ProduceResult(const char *columnname) {
0124    auto ht = rdf.Histo1D(*h, columname);
0125    return ht.GetSharedPtr();
0126 }
0127 ~~~
0128 Note that this will run the event loop. If this is not desired, the RResultPtr can be copied.
0129 */
0130 template <typename T>
0131 class RResultPtr {
0132    // friend declarations
0133    template <typename T1>
0134    friend class RResultPtr;
0135 
0136    template <typename T1>
0137    friend RResultPtr<T1> RDFDetail::MakeResultPtr(const std::shared_ptr<T1> &, ::ROOT::Detail::RDF::RLoopManager &,
0138                                                   std::shared_ptr<RDFInternal::RActionBase>);
0139 
0140    template <typename T1>
0141    friend ROOT::RDF::Experimental::RResultMap<T1> ROOT::RDF::Experimental::VariationsFor(RResultPtr<T1> resPtr);
0142 
0143    template <class T1, class T2>
0144    friend bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
0145    template <class T1, class T2>
0146    friend bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
0147    template <class T1>
0148    friend bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
0149    template <class T1>
0150    friend bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
0151    template <class T1>
0152    friend bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
0153    template <class T1>
0154    friend bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
0155    friend std::unique_ptr<RDFDetail::RMergeableValue<T>> RDFDetail::GetMergeableValue<T>(RResultPtr<T> &rptr);
0156 
0157    friend class ROOT::Internal::RDF::GraphDrawing::GraphCreatorHelper;
0158 
0159    friend class RResultHandle;
0160 
0161    friend RResultPtr<T> ROOT::Internal::RDF::CloneResultAndAction<T>(const RResultPtr<T> &inptr);
0162    friend ROOT::Internal::RDF::SnapshotPtr_t
0163    ROOT::Internal::RDF::CloneResultAndAction(const ROOT::Internal::RDF::SnapshotPtr_t &inptr,
0164                                              const std::string &outputFileName);
0165    /// \cond HIDDEN_SYMBOLS
0166    template <typename V, bool hasBeginEnd = TTraits::HasBeginAndEnd<V>::value>
0167    struct RIterationHelper {
0168       using Iterator_t = void;
0169       void GetBegin(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask begin for this class."); }
0170       void GetEnd(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask end for this class."); }
0171    };
0172 
0173    template <typename V>
0174    struct RIterationHelper<V, true> {
0175       using Iterator_t = decltype(std::begin(std::declval<V>()));
0176       static Iterator_t GetBegin(const V &v) { return std::begin(v); };
0177       static Iterator_t GetEnd(const V &v) { return std::end(v); };
0178    };
0179    /// \endcond
0180 
0181    /// Non-owning pointer to the RLoopManager at the root of this computation graph.
0182    /// The RLoopManager is guaranteed to be always in scope if fLoopManager is not a nullptr.
0183    RDFDetail::RLoopManager *fLoopManager = nullptr;
0184    std::shared_ptr<T> fObjPtr; ///< Shared pointer encapsulating the wrapped result
0185    /// Owning pointer to the action that will produce this result.
0186    /// Ownership is shared with other copies of this ResultPtr.
0187    std::shared_ptr<RDFInternal::RActionBase> fActionPtr;
0188 
0189    /// Triggers the event loop in the RLoopManager
0190    void TriggerRun();
0191 
0192    void ThrowIfNull()
0193    {
0194       if (fObjPtr == nullptr)
0195          throw std::runtime_error("Trying to access the contents of a null RResultPtr.");
0196    }
0197 
0198    RResultPtr(std::shared_ptr<T> objPtr, RDFDetail::RLoopManager *lm,
0199               std::shared_ptr<RDFInternal::RActionBase> actionPtr)
0200       : fLoopManager(lm), fObjPtr(std::move(objPtr)), fActionPtr(std::move(actionPtr))
0201    {
0202    }
0203 
0204 public:
0205    using Value_t = T;                       ///< Convenience alias to simplify access to proxied type
0206    static constexpr ULong64_t kOnce = 0ull; ///< Convenience definition to express a callback must be executed once
0207 
0208    RResultPtr() = default;
0209    RResultPtr(const RResultPtr &) = default;
0210    RResultPtr(RResultPtr &&) = default;
0211    RResultPtr &operator=(const RResultPtr &) = default;
0212    RResultPtr &operator=(RResultPtr &&) = default;
0213    explicit operator bool() const { return bool(fObjPtr); }
0214 
0215    /// Convert a RResultPtr<T2> to a RResultPtr<T>.
0216    ///
0217    /// Useful e.g. to store a number of RResultPtr<TH1D> and RResultPtr<TH2D> in a std::vector<RResultPtr<TH1>>.
0218    /// The requirements on T2 and T are the same as for conversion between std::shared_ptr<T2> and std::shared_ptr<T>.
0219    template <typename T2,
0220              std::enable_if_t<std::is_constructible<std::shared_ptr<T>, std::shared_ptr<T2>>::value, int> = 0>
0221    RResultPtr(const RResultPtr<T2> &r) : fLoopManager(r.fLoopManager), fObjPtr(r.fObjPtr), fActionPtr(r.fActionPtr)
0222    {
0223    }
0224 
0225    /// Produce the encapsulated result, and return a shared pointer to it.
0226    /// If RDataFrame hasn't produced the result yet, triggers the event loop and execution
0227    /// of all actions booked in the associated RLoopManager.
0228    /// \note To share a "lazy" handle to the result without running the event loop, copy the RResultPtr.
0229    std::shared_ptr<T> GetSharedPtr()
0230    {
0231       if (fActionPtr != nullptr && !fActionPtr->HasRun())
0232          TriggerRun();
0233       return fObjPtr;
0234    }
0235 
0236    /// Get a const reference to the encapsulated object.
0237    /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
0238    const T &GetValue()
0239    {
0240       ThrowIfNull();
0241       return *GetSharedPtr();
0242    }
0243 
0244    /// Get the pointer to the encapsulated object.
0245    /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
0246    /// \note Ownership is not transferred to the caller.
0247    T *GetPtr() { return GetSharedPtr().get(); }
0248 
0249    /// Get a reference to the encapsulated object.
0250    /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
0251    T &operator*()
0252    {
0253       ThrowIfNull();
0254       return *GetSharedPtr();
0255    }
0256 
0257    /// Get a pointer to the encapsulated object.
0258    /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
0259    /// \note Ownership is not transferred to the caller.
0260    T *operator->()
0261    {
0262       ThrowIfNull();
0263       return GetSharedPtr().get();
0264    }
0265 
0266    /// Return an iterator to the beginning of the contained object if this makes
0267    /// sense, throw a compilation error otherwise
0268    typename RIterationHelper<T>::Iterator_t begin()
0269    {
0270       ThrowIfNull();
0271       if (!fActionPtr->HasRun())
0272          TriggerRun();
0273       return RIterationHelper<T>::GetBegin(*fObjPtr);
0274    }
0275 
0276    /// Return an iterator to the end of the contained object if this makes
0277    /// sense, throw a compilation error otherwise
0278    typename RIterationHelper<T>::Iterator_t end()
0279    {
0280       ThrowIfNull();
0281       if (!fActionPtr->HasRun())
0282          TriggerRun();
0283       return RIterationHelper<T>::GetEnd(*fObjPtr);
0284    }
0285 
0286    // clang-format off
0287    /// Register a callback that RDataFrame will execute "everyNEvents" on a partial result.
0288    ///
0289    /// \param[in] everyNEvents Frequency at which the callback will be called, as a number of events processed
0290    /// \param[in] callback a callable with signature `void(Value_t&)` where Value_t is the type of the value contained in this RResultPtr
0291    /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
0292    ///
0293    /// The callback must be a callable (lambda, function, functor class...) that takes a reference to the result type as
0294    /// argument and returns nothing. RDataFrame will invoke registered callbacks passing partial action results as
0295    /// arguments to them (e.g. a histogram filled with a part of the selected events, a counter incremented only up to a
0296    /// certain point, a mean over a subset of the events and so forth).
0297    ///
0298    /// Callbacks can be used e.g. to inspect partial results of the analysis while the event loop is running. For
0299    /// example one can draw an up-to-date version of a result histogram every 100 entries like this:
0300    /// \code{.cpp}
0301    /// auto h = tdf.Histo1D("x");
0302    /// TCanvas c("c","x hist");
0303    /// h.OnPartialResult(100, [&c](TH1D &h_) { c.cd(); h_.Draw(); c.Update(); });
0304    /// h->Draw(); // event loop runs here, this `Draw` is executed after the event loop is finished
0305    /// \endcode
0306    ///
0307    /// A value of 0 for everyNEvents indicates the callback must be executed only once, before running the event loop.
0308    /// A conveniece definition `kOnce` is provided to make this fact more expressive in user code (see snippet below).
0309    /// Multiple callbacks can be registered with the same RResultPtr (i.e. results of RDataFrame actions) and will
0310    /// be executed sequentially. Callbacks are executed in the order they were registered.
0311    /// The type of the value contained in a RResultPtr is also available as RResultPtr<T>::Value_t, e.g.
0312    /// \code{.cpp}
0313    /// auto h = tdf.Histo1D("x");
0314    /// // h.kOnce is 0
0315    /// // decltype(h)::Value_t is TH1D
0316    /// \endcode
0317    ///
0318    /// When implicit multi-threading is enabled, the callback:
0319    /// - will never be executed by multiple threads concurrently: it needs not be thread-safe. For example the snippet
0320    ///   above that draws the partial histogram on a canvas works seamlessly in multi-thread event loops.
0321    /// - will be executed by the first worker that arrives at the callback, and then only be executed when the same result
0322    ///   is updated. For example, if dataframe uses N internal copies of a result, it will always be the `i`th < N object
0323    ///   that is passed into the callback.
0324    /// - will always be executed "everyNEvents": the partial result passed into the callback will have accumulated N more
0325    ///   events, irrespective of the progress that other worker threads make.
0326    /// - might be executed by a different worker thread at different times: the value of `std::this_thread::get_id()`
0327    ///   might change between calls.
0328    ///
0329    /// To register a callback that is called by _each_ worker thread (concurrently) every N events one can use
0330    /// OnPartialResultSlot().
0331    // clang-format on
0332    RResultPtr<T> &OnPartialResult(ULong64_t everyNEvents, std::function<void(T &)> callback)
0333    {
0334       ThrowIfNull();
0335       auto actionPtr = fActionPtr;
0336       constexpr auto kUninit = std::numeric_limits<unsigned int>::max();
0337       auto activeSlot = std::make_shared<std::atomic_uint>(kUninit);
0338       auto c = [=](unsigned int slot) {
0339          if (activeSlot->load() == kUninit) {
0340             // Try to grab the right to run the callback for our slot:
0341             unsigned int expected = kUninit;
0342             activeSlot->compare_exchange_strong(expected, slot);
0343          }
0344          if (activeSlot->load() != slot)
0345             return;
0346 
0347          auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
0348          callback(*partialResult);
0349       };
0350       fLoopManager->RegisterCallback(everyNEvents, std::move(c));
0351       return *this;
0352    }
0353 
0354    // clang-format off
0355    /// Register a callback that RDataFrame will execute in each worker thread concurrently on that thread's partial result.
0356    ///
0357    /// \param[in] everyNEvents Frequency at which the callback will be called by each thread, as a number of events processed
0358    /// \param[in] callback A callable with signature `void(unsigned int, Value_t&)` where Value_t is the type of the value contained in this RResultPtr
0359    /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
0360    ///
0361    /// See `OnPartialResult` for a generic explanation of the callback mechanism.
0362    /// Compared to `OnPartialResult`, this method has two major differences:
0363    /// - all worker threads invoke the callback once every specified number of events. The event count is per-thread,
0364    ///   and callback invocation might happen concurrently (i.e. the callback must be thread-safe)
0365    /// - the callable must take an extra `unsigned int` parameter corresponding to a multi-thread "processing slot":
0366    ///   this is a "helper value" to simplify writing thread-safe callbacks: different worker threads might invoke the
0367    ///   callback concurrently but always with different `slot` numbers.
0368    /// - a value of 0 for everyNEvents indicates the callback must be executed once _per slot_.
0369    ///
0370    /// For example, the following snippet prints out a thread-safe progress bar of the events processed by RDataFrame
0371    /// \code
0372    /// auto c = tdf.Count(); // any action would do, but `Count` is the most lightweight
0373    /// std::string progress;
0374    /// std::mutex bar_mutex;
0375    /// c.OnPartialResultSlot(nEvents / 100, [&progress, &bar_mutex](unsigned int, ULong64_t &) {
0376    ///    std::lock_guard<std::mutex> lg(bar_mutex);
0377    ///    progress.push_back('#');
0378    ///    std::cout << "\r[" << std::left << std::setw(100) << progress << ']' << std::flush;
0379    /// });
0380    /// std::cout << "Analysis running..." << std::endl;
0381    /// *c; // trigger the event loop by accessing an action's result
0382    /// std::cout << "\nDone!" << std::endl;
0383    /// \endcode
0384    // clang-format on
0385    RResultPtr<T> &OnPartialResultSlot(ULong64_t everyNEvents, std::function<void(unsigned int, T &)> callback)
0386    {
0387       ThrowIfNull();
0388       auto actionPtr = fActionPtr;
0389       auto c = [actionPtr, callback](unsigned int slot) {
0390          auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
0391          callback(slot, *partialResult);
0392       };
0393       fLoopManager->RegisterCallback(everyNEvents, std::move(c));
0394       return *this;
0395    }
0396 
0397    // clang-format off
0398    /// Check whether the result has already been computed
0399    ///
0400    /// ~~~{.cpp}
0401    /// auto res = df.Count();
0402    /// res.IsReady(); // false, access will trigger event loop
0403    /// std::cout << *res << std::endl; // triggers event loop
0404    /// res.IsReady(); // true
0405    /// ~~~
0406    // clang-format on
0407    bool IsReady() const
0408    {
0409       if (fActionPtr == nullptr)
0410          return false;
0411       return fActionPtr->HasRun();
0412    }
0413 };
0414 
0415 template <typename T>
0416 void RResultPtr<T>::TriggerRun()
0417 {
0418    fLoopManager->Run();
0419 }
0420 
0421 template <class T1, class T2>
0422 bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs)
0423 {
0424    return lhs.fObjPtr == rhs.fObjPtr;
0425 }
0426 
0427 template <class T1, class T2>
0428 bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs)
0429 {
0430    return lhs.fObjPtr != rhs.fObjPtr;
0431 }
0432 
0433 template <class T1>
0434 bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
0435 {
0436    return lhs.fObjPtr == rhs;
0437 }
0438 
0439 template <class T1>
0440 bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
0441 {
0442    return lhs == rhs.fObjPtr;
0443 }
0444 
0445 template <class T1>
0446 bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
0447 {
0448    return lhs.fObjPtr != rhs;
0449 }
0450 
0451 template <class T1>
0452 bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
0453 {
0454    return lhs != rhs.fObjPtr;
0455 }
0456 
0457 } // namespace RDF
0458 
0459 namespace Detail {
0460 namespace RDF {
0461 /// Create a RResultPtr and set its pointer to the corresponding RAction
0462 /// This overload is invoked by non-jitted actions, as they have access to RAction before constructing RResultPtr.
0463 template <typename T>
0464 RResultPtr<T>
0465 MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &lm, std::shared_ptr<RDFInternal::RActionBase> actionPtr)
0466 {
0467    return RResultPtr<T>(r, &lm, std::move(actionPtr));
0468 }
0469 
0470 ////////////////////////////////////////////////////////////////////////////////
0471 /// \brief Retrieve a mergeable value from an RDataFrame action.
0472 /// \param[in] rptr lvalue reference of an RResultPtr object.
0473 /// \returns An RMergeableValue holding the result of the action, wrapped in an
0474 ///          `std::unique_ptr`.
0475 ///
0476 /// This function triggers the execution of the RDataFrame computation graph.
0477 /// Then retrieves an RMergeableValue object created with the result wrapped by
0478 /// the RResultPtr argument. The user obtains ownership of the mergeable, which
0479 /// in turn holds a copy of the result of the action. The RResultPtr is not
0480 /// destroyed in the process and will still retain (shared) ownership of the
0481 /// original result.
0482 ///
0483 /// Example usage:
0484 /// ~~~{.cpp}
0485 /// using namespace ROOT::Detail::RDF;
0486 /// ROOT::RDataFrame d("myTree", "file_*.root");
0487 /// auto h = d.Histo1D("Branch_A");
0488 /// auto mergeablehisto = GetMergeableValue(h);
0489 /// ~~~
0490 template <typename T>
0491 std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr)
0492 {
0493    rptr.ThrowIfNull();
0494    if (!rptr.fActionPtr->HasRun())
0495       rptr.TriggerRun(); // Prevents from using `const` specifier in parameter
0496    return std::unique_ptr<RMergeableValue<T>>{
0497       static_cast<RMergeableValue<T> *>(rptr.fActionPtr->GetMergeableValue().release())};
0498 }
0499 } // namespace RDF
0500 } // namespace Detail
0501 } // namespace ROOT
0502 
0503 #endif // ROOT_TRESULTPROXY