|
|
|||
Warning, file /include/root/ROOT/RResultPtr.hxx was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017 0002 0003 /************************************************************************* 0004 * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers. * 0005 * All rights reserved. * 0006 * * 0007 * For the licensing terms see $ROOTSYS/LICENSE. * 0008 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0009 *************************************************************************/ 0010 0011 #ifndef ROOT_RRESULTPTR 0012 #define ROOT_RRESULTPTR 0013 0014 #include "ROOT/RDF/RActionBase.hxx" 0015 #include "RtypesCore.h" 0016 #include "ROOT/RDF/RLoopManager.hxx" 0017 #include "ROOT/TypeTraits.hxx" 0018 #include "TError.h" // Warning 0019 0020 #include <memory> 0021 #include <functional> 0022 #include <type_traits> // std::is_constructible 0023 0024 namespace ROOT { 0025 namespace RDF { 0026 template <typename T> 0027 class RResultPtr; 0028 0029 namespace Experimental { 0030 template <typename T> 0031 class RResultMap; 0032 0033 template <typename T> 0034 RResultMap<T> VariationsFor(RResultPtr<T> resPtr); 0035 } // namespace Experimental 0036 0037 template <typename Proxied, typename DataSource> 0038 class RInterface; 0039 } // namespace RDF 0040 0041 namespace Internal { 0042 namespace RDF { 0043 class GraphCreatorHelper; 0044 /** 0045 * \brief Creates a new RResultPtr with a cloned action. 0046 * 0047 * \tparam T The type of the result held by the RResultPtr. 0048 * \param inptr The pointer. 0049 * \return A new pointer with a cloned action. 0050 */ 0051 template <typename T> 0052 ROOT::RDF::RResultPtr<T> CloneResultAndAction(const ROOT::RDF::RResultPtr<T> &inptr) 0053 { 0054 // We call the copy constructor, to copy also the metadata of certain 0055 // result types, e.g. a for a TH1D we have to create a new histogram with 0056 // the same binning and axis limits. 0057 std::shared_ptr<T> copiedResult{new T{*inptr.fObjPtr}}; 0058 return ROOT::RDF::RResultPtr<T>(copiedResult, inptr.fLoopManager, 0059 inptr.fActionPtr->CloneAction(reinterpret_cast<void *>(&copiedResult))); 0060 } 0061 0062 using SnapshotPtr_t = ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager, void>>; 0063 /** 0064 * \brief Creates a new RResultPtr with a cloned Snapshot action. 0065 * 0066 * \param inptr The pointer. 0067 * \param outputFileName A new name for the output file of the cloned action. 0068 * \return A new pointer with a cloned action. 0069 * 0070 * This overload is needed since cloning a Snapshot node usually also involves 0071 * changing the name of the output file, otherwise the cloned Snapshot would 0072 * overwrite the same file. 0073 */ 0074 SnapshotPtr_t CloneResultAndAction(const SnapshotPtr_t &inptr, const std::string &outputFileName); 0075 } // namespace RDF 0076 } // namespace Internal 0077 0078 namespace Detail { 0079 namespace RDF { 0080 using ROOT::RDF::RResultPtr; 0081 // Fwd decl for RResultPtr 0082 template <typename T> 0083 RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &df, 0084 std::shared_ptr<ROOT::Internal::RDF::RActionBase> actionPtr); 0085 0086 // Fwd decl for GetMergeableValue 0087 template <typename T> 0088 class RMergeableValue; 0089 0090 template <typename T> 0091 std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr); 0092 } // namespace RDF 0093 } // namespace Detail 0094 namespace RDF { 0095 namespace RDFInternal = ROOT::Internal::RDF; 0096 namespace RDFDetail = ROOT::Detail::RDF; 0097 namespace TTraits = ROOT::TypeTraits; 0098 0099 /// Smart pointer for the return type of actions. 0100 /** 0101 \class ROOT::RDF::RResultPtr 0102 \ingroup dataframe 0103 \brief A wrapper around the result of RDataFrame actions able to trigger calculations lazily. 0104 \tparam T Type of the action result 0105 0106 A wrapper around a shared_ptr which allows to access the result of RDataFrame actions. 0107 The underlying object can be accessed by dereferencing the RResultPtr: 0108 ~~~{.cpp} 0109 ROOT::RDF::RResultPtr<TH1D> histo = rdf.Histo1D(...); 0110 histo->Draw(); // Starts running the event loop 0111 ~~~ 0112 Upon invocation of the arrow operator or dereferencing (`operator*`), the 0113 loop on the events and calculations of all scheduled actions are executed 0114 if needed. 0115 It is possible to iterate on the result proxy if the proxied object is a collection. 0116 ~~~{.cpp} 0117 for (auto& myItem : myResultProxy) { ... }; 0118 ~~~ 0119 If iteration is not supported by the type of the proxied object, a compilation error is thrown. 0120 0121 When shared ownership to the result is desired, a copy of the underlying shared_ptr can be obtained: 0122 ~~~{.cpp} 0123 std::shared_ptr<TH1D> ProduceResult(const char *columnname) { 0124 auto ht = rdf.Histo1D(*h, columname); 0125 return ht.GetSharedPtr(); 0126 } 0127 ~~~ 0128 Note that this will run the event loop. If this is not desired, the RResultPtr can be copied. 0129 */ 0130 template <typename T> 0131 class RResultPtr { 0132 // friend declarations 0133 template <typename T1> 0134 friend class RResultPtr; 0135 0136 template <typename T1> 0137 friend RResultPtr<T1> RDFDetail::MakeResultPtr(const std::shared_ptr<T1> &, ::ROOT::Detail::RDF::RLoopManager &, 0138 std::shared_ptr<RDFInternal::RActionBase>); 0139 0140 template <typename T1> 0141 friend ROOT::RDF::Experimental::RResultMap<T1> ROOT::RDF::Experimental::VariationsFor(RResultPtr<T1> resPtr); 0142 0143 template <class T1, class T2> 0144 friend bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs); 0145 template <class T1, class T2> 0146 friend bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs); 0147 template <class T1> 0148 friend bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs); 0149 template <class T1> 0150 friend bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs); 0151 template <class T1> 0152 friend bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs); 0153 template <class T1> 0154 friend bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs); 0155 friend std::unique_ptr<RDFDetail::RMergeableValue<T>> RDFDetail::GetMergeableValue<T>(RResultPtr<T> &rptr); 0156 0157 friend class ROOT::Internal::RDF::GraphDrawing::GraphCreatorHelper; 0158 0159 friend class RResultHandle; 0160 0161 friend RResultPtr<T> ROOT::Internal::RDF::CloneResultAndAction<T>(const RResultPtr<T> &inptr); 0162 friend ROOT::Internal::RDF::SnapshotPtr_t 0163 ROOT::Internal::RDF::CloneResultAndAction(const ROOT::Internal::RDF::SnapshotPtr_t &inptr, 0164 const std::string &outputFileName); 0165 /// \cond HIDDEN_SYMBOLS 0166 template <typename V, bool hasBeginEnd = TTraits::HasBeginAndEnd<V>::value> 0167 struct RIterationHelper { 0168 using Iterator_t = void; 0169 void GetBegin(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask begin for this class."); } 0170 void GetEnd(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask end for this class."); } 0171 }; 0172 0173 template <typename V> 0174 struct RIterationHelper<V, true> { 0175 using Iterator_t = decltype(std::begin(std::declval<V>())); 0176 static Iterator_t GetBegin(const V &v) { return std::begin(v); }; 0177 static Iterator_t GetEnd(const V &v) { return std::end(v); }; 0178 }; 0179 /// \endcond 0180 0181 /// Non-owning pointer to the RLoopManager at the root of this computation graph. 0182 /// The RLoopManager is guaranteed to be always in scope if fLoopManager is not a nullptr. 0183 RDFDetail::RLoopManager *fLoopManager = nullptr; 0184 std::shared_ptr<T> fObjPtr; ///< Shared pointer encapsulating the wrapped result 0185 /// Owning pointer to the action that will produce this result. 0186 /// Ownership is shared with other copies of this ResultPtr. 0187 std::shared_ptr<RDFInternal::RActionBase> fActionPtr; 0188 0189 /// Triggers the event loop in the RLoopManager 0190 void TriggerRun(); 0191 0192 void ThrowIfNull() 0193 { 0194 if (fObjPtr == nullptr) 0195 throw std::runtime_error("Trying to access the contents of a null RResultPtr."); 0196 } 0197 0198 RResultPtr(std::shared_ptr<T> objPtr, RDFDetail::RLoopManager *lm, 0199 std::shared_ptr<RDFInternal::RActionBase> actionPtr) 0200 : fLoopManager(lm), fObjPtr(std::move(objPtr)), fActionPtr(std::move(actionPtr)) 0201 { 0202 } 0203 0204 public: 0205 using Value_t = T; ///< Convenience alias to simplify access to proxied type 0206 static constexpr ULong64_t kOnce = 0ull; ///< Convenience definition to express a callback must be executed once 0207 0208 RResultPtr() = default; 0209 RResultPtr(const RResultPtr &) = default; 0210 RResultPtr(RResultPtr &&) = default; 0211 RResultPtr &operator=(const RResultPtr &) = default; 0212 RResultPtr &operator=(RResultPtr &&) = default; 0213 explicit operator bool() const { return bool(fObjPtr); } 0214 0215 /// Convert a RResultPtr<T2> to a RResultPtr<T>. 0216 /// 0217 /// Useful e.g. to store a number of RResultPtr<TH1D> and RResultPtr<TH2D> in a std::vector<RResultPtr<TH1>>. 0218 /// The requirements on T2 and T are the same as for conversion between std::shared_ptr<T2> and std::shared_ptr<T>. 0219 template <typename T2, 0220 std::enable_if_t<std::is_constructible<std::shared_ptr<T>, std::shared_ptr<T2>>::value, int> = 0> 0221 RResultPtr(const RResultPtr<T2> &r) : fLoopManager(r.fLoopManager), fObjPtr(r.fObjPtr), fActionPtr(r.fActionPtr) 0222 { 0223 } 0224 0225 /// Produce the encapsulated result, and return a shared pointer to it. 0226 /// If RDataFrame hasn't produced the result yet, triggers the event loop and execution 0227 /// of all actions booked in the associated RLoopManager. 0228 /// \note To share a "lazy" handle to the result without running the event loop, copy the RResultPtr. 0229 std::shared_ptr<T> GetSharedPtr() 0230 { 0231 if (fActionPtr != nullptr && !fActionPtr->HasRun()) 0232 TriggerRun(); 0233 return fObjPtr; 0234 } 0235 0236 /// Get a const reference to the encapsulated object. 0237 /// Triggers event loop and execution of all actions booked in the associated RLoopManager. 0238 const T &GetValue() 0239 { 0240 ThrowIfNull(); 0241 return *GetSharedPtr(); 0242 } 0243 0244 /// Get the pointer to the encapsulated object. 0245 /// Triggers event loop and execution of all actions booked in the associated RLoopManager. 0246 /// \note Ownership is not transferred to the caller. 0247 T *GetPtr() { return GetSharedPtr().get(); } 0248 0249 /// Get a reference to the encapsulated object. 0250 /// Triggers event loop and execution of all actions booked in the associated RLoopManager. 0251 T &operator*() 0252 { 0253 ThrowIfNull(); 0254 return *GetSharedPtr(); 0255 } 0256 0257 /// Get a pointer to the encapsulated object. 0258 /// Triggers event loop and execution of all actions booked in the associated RLoopManager. 0259 /// \note Ownership is not transferred to the caller. 0260 T *operator->() 0261 { 0262 ThrowIfNull(); 0263 return GetSharedPtr().get(); 0264 } 0265 0266 /// Return an iterator to the beginning of the contained object if this makes 0267 /// sense, throw a compilation error otherwise 0268 typename RIterationHelper<T>::Iterator_t begin() 0269 { 0270 ThrowIfNull(); 0271 if (!fActionPtr->HasRun()) 0272 TriggerRun(); 0273 return RIterationHelper<T>::GetBegin(*fObjPtr); 0274 } 0275 0276 /// Return an iterator to the end of the contained object if this makes 0277 /// sense, throw a compilation error otherwise 0278 typename RIterationHelper<T>::Iterator_t end() 0279 { 0280 ThrowIfNull(); 0281 if (!fActionPtr->HasRun()) 0282 TriggerRun(); 0283 return RIterationHelper<T>::GetEnd(*fObjPtr); 0284 } 0285 0286 // clang-format off 0287 /// Register a callback that RDataFrame will execute "everyNEvents" on a partial result. 0288 /// 0289 /// \param[in] everyNEvents Frequency at which the callback will be called, as a number of events processed 0290 /// \param[in] callback a callable with signature `void(Value_t&)` where Value_t is the type of the value contained in this RResultPtr 0291 /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls 0292 /// 0293 /// The callback must be a callable (lambda, function, functor class...) that takes a reference to the result type as 0294 /// argument and returns nothing. RDataFrame will invoke registered callbacks passing partial action results as 0295 /// arguments to them (e.g. a histogram filled with a part of the selected events, a counter incremented only up to a 0296 /// certain point, a mean over a subset of the events and so forth). 0297 /// 0298 /// Callbacks can be used e.g. to inspect partial results of the analysis while the event loop is running. For 0299 /// example one can draw an up-to-date version of a result histogram every 100 entries like this: 0300 /// \code{.cpp} 0301 /// auto h = tdf.Histo1D("x"); 0302 /// TCanvas c("c","x hist"); 0303 /// h.OnPartialResult(100, [&c](TH1D &h_) { c.cd(); h_.Draw(); c.Update(); }); 0304 /// h->Draw(); // event loop runs here, this `Draw` is executed after the event loop is finished 0305 /// \endcode 0306 /// 0307 /// A value of 0 for everyNEvents indicates the callback must be executed only once, before running the event loop. 0308 /// A conveniece definition `kOnce` is provided to make this fact more expressive in user code (see snippet below). 0309 /// Multiple callbacks can be registered with the same RResultPtr (i.e. results of RDataFrame actions) and will 0310 /// be executed sequentially. Callbacks are executed in the order they were registered. 0311 /// The type of the value contained in a RResultPtr is also available as RResultPtr<T>::Value_t, e.g. 0312 /// \code{.cpp} 0313 /// auto h = tdf.Histo1D("x"); 0314 /// // h.kOnce is 0 0315 /// // decltype(h)::Value_t is TH1D 0316 /// \endcode 0317 /// 0318 /// When implicit multi-threading is enabled, the callback: 0319 /// - will never be executed by multiple threads concurrently: it needs not be thread-safe. For example the snippet 0320 /// above that draws the partial histogram on a canvas works seamlessly in multi-thread event loops. 0321 /// - will be executed by the first worker that arrives at the callback, and then only be executed when the same result 0322 /// is updated. For example, if dataframe uses N internal copies of a result, it will always be the `i`th < N object 0323 /// that is passed into the callback. 0324 /// - will always be executed "everyNEvents": the partial result passed into the callback will have accumulated N more 0325 /// events, irrespective of the progress that other worker threads make. 0326 /// - might be executed by a different worker thread at different times: the value of `std::this_thread::get_id()` 0327 /// might change between calls. 0328 /// 0329 /// To register a callback that is called by _each_ worker thread (concurrently) every N events one can use 0330 /// OnPartialResultSlot(). 0331 // clang-format on 0332 RResultPtr<T> &OnPartialResult(ULong64_t everyNEvents, std::function<void(T &)> callback) 0333 { 0334 ThrowIfNull(); 0335 auto actionPtr = fActionPtr; 0336 constexpr auto kUninit = std::numeric_limits<unsigned int>::max(); 0337 auto activeSlot = std::make_shared<std::atomic_uint>(kUninit); 0338 auto c = [=](unsigned int slot) { 0339 if (activeSlot->load() == kUninit) { 0340 // Try to grab the right to run the callback for our slot: 0341 unsigned int expected = kUninit; 0342 activeSlot->compare_exchange_strong(expected, slot); 0343 } 0344 if (activeSlot->load() != slot) 0345 return; 0346 0347 auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot)); 0348 callback(*partialResult); 0349 }; 0350 fLoopManager->RegisterCallback(everyNEvents, std::move(c)); 0351 return *this; 0352 } 0353 0354 // clang-format off 0355 /// Register a callback that RDataFrame will execute in each worker thread concurrently on that thread's partial result. 0356 /// 0357 /// \param[in] everyNEvents Frequency at which the callback will be called by each thread, as a number of events processed 0358 /// \param[in] callback A callable with signature `void(unsigned int, Value_t&)` where Value_t is the type of the value contained in this RResultPtr 0359 /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls 0360 /// 0361 /// See `OnPartialResult` for a generic explanation of the callback mechanism. 0362 /// Compared to `OnPartialResult`, this method has two major differences: 0363 /// - all worker threads invoke the callback once every specified number of events. The event count is per-thread, 0364 /// and callback invocation might happen concurrently (i.e. the callback must be thread-safe) 0365 /// - the callable must take an extra `unsigned int` parameter corresponding to a multi-thread "processing slot": 0366 /// this is a "helper value" to simplify writing thread-safe callbacks: different worker threads might invoke the 0367 /// callback concurrently but always with different `slot` numbers. 0368 /// - a value of 0 for everyNEvents indicates the callback must be executed once _per slot_. 0369 /// 0370 /// For example, the following snippet prints out a thread-safe progress bar of the events processed by RDataFrame 0371 /// \code 0372 /// auto c = tdf.Count(); // any action would do, but `Count` is the most lightweight 0373 /// std::string progress; 0374 /// std::mutex bar_mutex; 0375 /// c.OnPartialResultSlot(nEvents / 100, [&progress, &bar_mutex](unsigned int, ULong64_t &) { 0376 /// std::lock_guard<std::mutex> lg(bar_mutex); 0377 /// progress.push_back('#'); 0378 /// std::cout << "\r[" << std::left << std::setw(100) << progress << ']' << std::flush; 0379 /// }); 0380 /// std::cout << "Analysis running..." << std::endl; 0381 /// *c; // trigger the event loop by accessing an action's result 0382 /// std::cout << "\nDone!" << std::endl; 0383 /// \endcode 0384 // clang-format on 0385 RResultPtr<T> &OnPartialResultSlot(ULong64_t everyNEvents, std::function<void(unsigned int, T &)> callback) 0386 { 0387 ThrowIfNull(); 0388 auto actionPtr = fActionPtr; 0389 auto c = [actionPtr, callback](unsigned int slot) { 0390 auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot)); 0391 callback(slot, *partialResult); 0392 }; 0393 fLoopManager->RegisterCallback(everyNEvents, std::move(c)); 0394 return *this; 0395 } 0396 0397 // clang-format off 0398 /// Check whether the result has already been computed 0399 /// 0400 /// ~~~{.cpp} 0401 /// auto res = df.Count(); 0402 /// res.IsReady(); // false, access will trigger event loop 0403 /// std::cout << *res << std::endl; // triggers event loop 0404 /// res.IsReady(); // true 0405 /// ~~~ 0406 // clang-format on 0407 bool IsReady() const 0408 { 0409 if (fActionPtr == nullptr) 0410 return false; 0411 return fActionPtr->HasRun(); 0412 } 0413 }; 0414 0415 template <typename T> 0416 void RResultPtr<T>::TriggerRun() 0417 { 0418 fLoopManager->Run(); 0419 } 0420 0421 template <class T1, class T2> 0422 bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs) 0423 { 0424 return lhs.fObjPtr == rhs.fObjPtr; 0425 } 0426 0427 template <class T1, class T2> 0428 bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs) 0429 { 0430 return lhs.fObjPtr != rhs.fObjPtr; 0431 } 0432 0433 template <class T1> 0434 bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs) 0435 { 0436 return lhs.fObjPtr == rhs; 0437 } 0438 0439 template <class T1> 0440 bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs) 0441 { 0442 return lhs == rhs.fObjPtr; 0443 } 0444 0445 template <class T1> 0446 bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs) 0447 { 0448 return lhs.fObjPtr != rhs; 0449 } 0450 0451 template <class T1> 0452 bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs) 0453 { 0454 return lhs != rhs.fObjPtr; 0455 } 0456 0457 } // namespace RDF 0458 0459 namespace Detail { 0460 namespace RDF { 0461 /// Create a RResultPtr and set its pointer to the corresponding RAction 0462 /// This overload is invoked by non-jitted actions, as they have access to RAction before constructing RResultPtr. 0463 template <typename T> 0464 RResultPtr<T> 0465 MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &lm, std::shared_ptr<RDFInternal::RActionBase> actionPtr) 0466 { 0467 return RResultPtr<T>(r, &lm, std::move(actionPtr)); 0468 } 0469 0470 //////////////////////////////////////////////////////////////////////////////// 0471 /// \brief Retrieve a mergeable value from an RDataFrame action. 0472 /// \param[in] rptr lvalue reference of an RResultPtr object. 0473 /// \returns An RMergeableValue holding the result of the action, wrapped in an 0474 /// `std::unique_ptr`. 0475 /// 0476 /// This function triggers the execution of the RDataFrame computation graph. 0477 /// Then retrieves an RMergeableValue object created with the result wrapped by 0478 /// the RResultPtr argument. The user obtains ownership of the mergeable, which 0479 /// in turn holds a copy of the result of the action. The RResultPtr is not 0480 /// destroyed in the process and will still retain (shared) ownership of the 0481 /// original result. 0482 /// 0483 /// Example usage: 0484 /// ~~~{.cpp} 0485 /// using namespace ROOT::Detail::RDF; 0486 /// ROOT::RDataFrame d("myTree", "file_*.root"); 0487 /// auto h = d.Histo1D("Branch_A"); 0488 /// auto mergeablehisto = GetMergeableValue(h); 0489 /// ~~~ 0490 template <typename T> 0491 std::unique_ptr<RMergeableValue<T>> GetMergeableValue(RResultPtr<T> &rptr) 0492 { 0493 rptr.ThrowIfNull(); 0494 if (!rptr.fActionPtr->HasRun()) 0495 rptr.TriggerRun(); // Prevents from using `const` specifier in parameter 0496 return std::unique_ptr<RMergeableValue<T>>{ 0497 static_cast<RMergeableValue<T> *>(rptr.fActionPtr->GetMergeableValue().release())}; 0498 } 0499 } // namespace RDF 0500 } // namespace Detail 0501 } // namespace ROOT 0502 0503 #endif // ROOT_TRESULTPROXY
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|