|
||||
File indexing completed on 2025-01-18 10:10:34
0001 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017 0002 0003 /************************************************************************* 0004 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. * 0005 * All rights reserved. * 0006 * * 0007 * For the licensing terms see $ROOTSYS/LICENSE. * 0008 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0009 *************************************************************************/ 0010 0011 #ifndef ROOT_RDF_TINTERFACE 0012 #define ROOT_RDF_TINTERFACE 0013 0014 #include "ROOT/RDataSource.hxx" 0015 #include "ROOT/RDF/ActionHelpers.hxx" 0016 #include "ROOT/RDF/HistoModels.hxx" 0017 #include "ROOT/RDF/InterfaceUtils.hxx" 0018 #include "ROOT/RDF/RColumnRegister.hxx" 0019 #include "ROOT/RDF/RDefine.hxx" 0020 #include "ROOT/RDF/RDefinePerSample.hxx" 0021 #include "ROOT/RDF/RFilter.hxx" 0022 #include "ROOT/RDF/RInterfaceBase.hxx" 0023 #include "ROOT/RDF/RVariation.hxx" 0024 #include "ROOT/RDF/RLazyDSImpl.hxx" 0025 #include "ROOT/RDF/RLoopManager.hxx" 0026 #include "ROOT/RDF/RRange.hxx" 0027 #include "ROOT/RDF/Utils.hxx" 0028 #include "ROOT/RDF/RDFDescription.hxx" 0029 #include "ROOT/RDF/RVariationsDescription.hxx" 0030 #include "ROOT/RResultPtr.hxx" 0031 #include "ROOT/RSnapshotOptions.hxx" 0032 #include <string_view> 0033 #include "ROOT/RVec.hxx" 0034 #include "ROOT/TypeTraits.hxx" 0035 #include "RtypesCore.h" // for ULong64_t 0036 #include "TDirectory.h" 0037 #include "TH1.h" // For Histo actions 0038 #include "TH2.h" // For Histo actions 0039 #include "TH3.h" // For Histo actions 0040 #include "THn.h" 0041 #include "TProfile.h" 0042 #include "TProfile2D.h" 0043 #include "TStatistic.h" 0044 0045 #include <algorithm> 0046 #include <cstddef> 0047 #include <initializer_list> 0048 #include <iterator> // std::back_insterter 0049 #include <limits> 0050 #include <memory> 0051 #include <set> 0052 #include <sstream> 0053 #include <stdexcept> 0054 #include <string> 0055 #include <type_traits> // is_same, enable_if 0056 #include <typeinfo> 0057 #include <unordered_set> 0058 #include <utility> // std::index_sequence 0059 #include <vector> 0060 0061 class TGraph; 0062 0063 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface 0064 namespace ROOT { 0065 void DisableImplicitMT(); 0066 bool IsImplicitMTEnabled(); 0067 void EnableImplicitMT(UInt_t numthreads); 0068 class RDataFrame; 0069 } // namespace ROOT 0070 namespace cling { 0071 std::string printValue(ROOT::RDataFrame *tdf); 0072 } 0073 0074 namespace ROOT { 0075 namespace RDF { 0076 namespace RDFDetail = ROOT::Detail::RDF; 0077 namespace RDFInternal = ROOT::Internal::RDF; 0078 namespace TTraits = ROOT::TypeTraits; 0079 0080 template <typename Proxied, typename DataSource> 0081 class RInterface; 0082 0083 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>; 0084 } // namespace RDF 0085 0086 namespace Internal { 0087 namespace RDF { 0088 class GraphCreatorHelper; 0089 void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange); 0090 void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec); 0091 void TriggerRun(ROOT::RDF::RNode node); 0092 } // namespace RDF 0093 } // namespace Internal 0094 0095 namespace RDF { 0096 0097 // clang-format off 0098 /** 0099 * \class ROOT::RDF::RInterface 0100 * \ingroup dataframe 0101 * \brief The public interface to the RDataFrame federation of classes. 0102 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually. 0103 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default. 0104 * 0105 * The documentation of each method features a one liner illustrating how to use the method, for example showing how 0106 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user. 0107 */ 0108 // clang-format on 0109 template <typename Proxied, typename DataSource = void> 0110 class RInterface : public RInterfaceBase { 0111 using DS_t = DataSource; 0112 using RFilterBase = RDFDetail::RFilterBase; 0113 using RRangeBase = RDFDetail::RRangeBase; 0114 using RLoopManager = RDFDetail::RLoopManager; 0115 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt 0116 friend class RDFInternal::GraphDrawing::GraphCreatorHelper; 0117 0118 template <typename T, typename W> 0119 friend class RInterface; 0120 0121 friend void RDFInternal::TriggerRun(RNode node); 0122 friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange); 0123 friend void RDFInternal::ChangeSpec(const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec); 0124 0125 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface. 0126 0127 public: 0128 //////////////////////////////////////////////////////////////////////////// 0129 /// \brief Copy-assignment operator for RInterface. 0130 RInterface &operator=(const RInterface &) = default; 0131 0132 //////////////////////////////////////////////////////////////////////////// 0133 /// \brief Copy-ctor for RInterface. 0134 RInterface(const RInterface &) = default; 0135 0136 //////////////////////////////////////////////////////////////////////////// 0137 /// \brief Move-ctor for RInterface. 0138 RInterface(RInterface &&) = default; 0139 0140 //////////////////////////////////////////////////////////////////////////// 0141 /// \brief Move-assignment operator for RInterface. 0142 RInterface &operator=(RInterface &&) = default; 0143 0144 //////////////////////////////////////////////////////////////////////////// 0145 /// \brief Build a RInterface from a RLoopManager. 0146 /// This constructor is only available for RInterface<RLoopManager>. 0147 template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>> 0148 RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied) 0149 { 0150 } 0151 0152 //////////////////////////////////////////////////////////////////////////// 0153 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode. 0154 /// Different RDataFrame methods return different C++ types. All nodes, however, 0155 /// can be cast to this common type at the cost of a small performance penalty. 0156 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them 0157 /// around via (non-template, C++11) helper functions. 0158 /// Example usage: 0159 /// ~~~{.cpp} 0160 /// // a function that conditionally adds a Range to a RDataFrame node. 0161 /// RNode MaybeAddRange(RNode df, bool mustAddRange) 0162 /// { 0163 /// return mustAddRange ? df.Range(1) : df; 0164 /// } 0165 /// // use as : 0166 /// ROOT::RDataFrame df(10); 0167 /// auto maybeRanged = MaybeAddRange(df, true); 0168 /// ~~~ 0169 /// Note that it is not a problem to pass RNode's by value. 0170 operator RNode() const 0171 { 0172 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister); 0173 } 0174 0175 //////////////////////////////////////////////////////////////////////////// 0176 /// \brief Append a filter to the call graph. 0177 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` 0178 /// signalling whether the event has passed the selection (true) or not (false). 0179 /// \param[in] columns Names of the columns/branches in input to the filter function. 0180 /// \param[in] name Optional name of this filter. See `Report`. 0181 /// \return the filter node of the computation graph. 0182 /// 0183 /// Append a filter node at the point of the call graph corresponding to the 0184 /// object this method is called on. 0185 /// The callable `f` should not have side-effects (e.g. modification of an 0186 /// external or static variable) to ensure correct results when implicit 0187 /// multi-threading is active. 0188 /// 0189 /// RDataFrame only evaluates filters when necessary: if multiple filters 0190 /// are chained one after another, they are executed in order and the first 0191 /// one returning false causes the event to be discarded. 0192 /// Even if multiple actions or transformations depend on the same filter, 0193 /// it is executed once per entry. If its result is requested more than 0194 /// once, the cached result is served. 0195 /// 0196 /// ### Example usage: 0197 /// ~~~{.cpp} 0198 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y" 0199 /// auto filtered = df.Filter(myCut, {"x", "y"}); 0200 /// 0201 /// // String: it must contain valid C++ except that column names can be used instead of variable names 0202 /// auto filtered = df.Filter("x*y > 0"); 0203 /// ~~~ 0204 /// 0205 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0206 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0207 /// ~~~{.cpp} 0208 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0209 /// ~~~ 0210 /// but instead this will: 0211 /// ~~~{.cpp} 0212 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0213 /// ~~~ 0214 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0215 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> 0216 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "") 0217 { 0218 RDFInternal::CheckFilter(f); 0219 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types; 0220 constexpr auto nColumns = ColTypes_t::list_size; 0221 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); 0222 CheckAndFillDSColumns(validColumnNames, ColTypes_t()); 0223 0224 using F_t = RDFDetail::RFilter<F, Proxied>; 0225 0226 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name); 0227 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister); 0228 } 0229 0230 //////////////////////////////////////////////////////////////////////////// 0231 /// \brief Append a filter to the call graph. 0232 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` 0233 /// signalling whether the event has passed the selection (true) or not (false). 0234 /// \param[in] name Optional name of this filter. See `Report`. 0235 /// \return the filter node of the computation graph. 0236 /// 0237 /// Refer to the first overload of this method for the full documentation. 0238 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0239 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name) 0240 { 0241 // The sfinae is there in order to pick up the overloaded method which accepts two strings 0242 // rather than this template method. 0243 return Filter(f, {}, name); 0244 } 0245 0246 //////////////////////////////////////////////////////////////////////////// 0247 /// \brief Append a filter to the call graph. 0248 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` 0249 /// signalling whether the event has passed the selection (true) or not (false). 0250 /// \param[in] columns Names of the columns/branches in input to the filter function. 0251 /// \return the filter node of the computation graph. 0252 /// 0253 /// Refer to the first overload of this method for the full documentation. 0254 template <typename F> 0255 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns) 0256 { 0257 return Filter(f, ColumnNames_t{columns}); 0258 } 0259 0260 //////////////////////////////////////////////////////////////////////////// 0261 /// \brief Append a filter to the call graph. 0262 /// \param[in] expression The filter expression in C++ 0263 /// \param[in] name Optional name of this filter. See `Report`. 0264 /// \return the filter node of the computation graph. 0265 /// 0266 /// The expression is just-in-time compiled and used to filter entries. It must 0267 /// be valid C++ syntax in which variable names are substituted with the names 0268 /// of branches/columns. 0269 /// 0270 /// ### Example usage: 0271 /// ~~~{.cpp} 0272 /// auto filtered_df = df.Filter("myCollection.size() > 3"); 0273 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size"); 0274 /// ~~~ 0275 /// 0276 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0277 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0278 /// ~~~{.cpp} 0279 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0280 /// ~~~ 0281 /// but instead this will: 0282 /// ~~~{.cpp} 0283 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0284 /// ~~~ 0285 RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "") 0286 { 0287 // deleted by the jitted call to JitFilterHelper 0288 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0289 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type; 0290 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister); 0291 const auto jittedFilter = 0292 RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fLoopManager->GetBranchNames(), fColRegister, 0293 fLoopManager->GetTree(), fDataSource); 0294 0295 return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fColRegister); 0296 } 0297 0298 // clang-format off 0299 //////////////////////////////////////////////////////////////////////////// 0300 /// \brief Define a new column. 0301 /// \param[in] name The name of the defined column. 0302 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0303 /// \param[in] columns Names of the columns/branches in input to the producer function. 0304 /// \return the first node of the computation graph for which the new quantity is defined. 0305 /// 0306 /// Define a column that will be visible from all subsequent nodes 0307 /// of the functional chain. The `expression` is only evaluated for entries that pass 0308 /// all the preceding filters. 0309 /// A new variable is created called `name`, accessible as if it was contained 0310 /// in the dataset from subsequent transformations/actions. 0311 /// 0312 /// Use cases include: 0313 /// * caching the results of complex calculations for easy and efficient multiple access 0314 /// * extraction of quantities of interest from complex objects 0315 /// 0316 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph. 0317 /// 0318 /// ### Example usage: 0319 /// ~~~{.cpp} 0320 /// // assuming a function with signature: 0321 /// double myComplexCalculation(const RVec<float> &muon_pts); 0322 /// // we can pass it directly to Define 0323 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"}); 0324 /// // alternatively, we can pass the body of the function as a string, as in Filter: 0325 /// auto df_with_define = df.Define("newColumn", "x*x + y*y"); 0326 /// ~~~ 0327 /// 0328 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0329 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0330 /// ~~~{.cpp} 0331 /// df.Define("x2", "Map(v, [](float e) { return e*e; })") 0332 /// ~~~ 0333 /// but instead this will: 0334 /// ~~~{.cpp} 0335 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })") 0336 /// ~~~ 0337 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0338 RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0339 { 0340 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define"); 0341 } 0342 // clang-format on 0343 0344 // clang-format off 0345 //////////////////////////////////////////////////////////////////////////// 0346 /// \brief Define a new column with a value dependent on the processing slot. 0347 /// \param[in] name The name of the defined column. 0348 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0349 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number). 0350 /// \return the first node of the computation graph for which the new quantity is defined. 0351 /// 0352 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner. 0353 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types 0354 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer 0355 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with 0356 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. 0357 /// 0358 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant: 0359 /// ~~~{.cpp} 0360 /// int function(unsigned int, double, double); 0361 /// df.Define("x", function, {"rdfslot_", "column1", "column2"}) 0362 /// df.DefineSlot("x", function, {"column1", "column2"}) 0363 /// ~~~ 0364 /// 0365 /// See Define() for more information. 0366 template <typename F> 0367 RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0368 { 0369 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot"); 0370 } 0371 // clang-format on 0372 0373 // clang-format off 0374 //////////////////////////////////////////////////////////////////////////// 0375 /// \brief Define a new column with a value dependent on the processing slot and the current entry. 0376 /// \param[in] name The name of the defined column. 0377 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0378 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry). 0379 /// \return the first node of the computation graph for which the new quantity is defined. 0380 /// 0381 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom 0382 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...` 0383 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned 0384 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with 0385 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter 0386 /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread. 0387 /// 0388 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant: 0389 /// ~~~{.cpp} 0390 /// int function(unsigned int, ULong64_t, double, double); 0391 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"}) 0392 /// DefineSlotEntry("x", function, {"column1", "column2"}) 0393 /// ~~~ 0394 /// 0395 /// See Define() for more information. 0396 template <typename F> 0397 RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0398 { 0399 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns, 0400 "DefineSlotEntry"); 0401 } 0402 // clang-format on 0403 0404 //////////////////////////////////////////////////////////////////////////// 0405 /// \brief Define a new column. 0406 /// \param[in] name The name of the defined column. 0407 /// \param[in] expression An expression in C++ which represents the defined value 0408 /// \return the first node of the computation graph for which the new quantity is defined. 0409 /// 0410 /// The expression is just-in-time compiled and used to produce the column entries. 0411 /// It must be valid C++ syntax in which variable names are substituted with the names 0412 /// of branches/columns. 0413 /// 0414 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0415 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0416 /// ~~~{.cpp} 0417 /// df.Define("x2", "Map(v, [](float e) { return e*e; })") 0418 /// ~~~ 0419 /// but instead this will: 0420 /// ~~~{.cpp} 0421 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })") 0422 /// ~~~ 0423 /// 0424 /// Refer to the first overload of this method for the full documentation. 0425 RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression) 0426 { 0427 constexpr auto where = "Define"; 0428 RDFInternal::CheckValidCppVarName(name, where); 0429 // these checks must be done before jitting lest we throw exceptions in jitted code 0430 RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), 0431 fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); 0432 0433 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0434 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister, 0435 fLoopManager->GetBranchNames(), upcastNodeOnHeap); 0436 0437 RDFInternal::RColumnRegister newCols(fColRegister); 0438 newCols.AddDefine(std::move(jittedDefine)); 0439 0440 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0441 0442 return newInterface; 0443 } 0444 0445 //////////////////////////////////////////////////////////////////////////// 0446 /// \brief Overwrite the value and/or type of an existing column. 0447 /// \param[in] name The name of the column to redefine. 0448 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0449 /// \param[in] columns Names of the columns/branches in input to the expression. 0450 /// \return the first node of the computation graph for which the quantity is redefined. 0451 /// 0452 /// The old value of the column can be used as an input for the expression. 0453 /// 0454 /// An exception is thrown in case the column to redefine does not already exist. 0455 /// See Define() for more information. 0456 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0457 RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0458 { 0459 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine"); 0460 } 0461 0462 // clang-format off 0463 //////////////////////////////////////////////////////////////////////////// 0464 /// \brief Overwrite the value and/or type of an existing column. 0465 /// \param[in] name The name of the column to redefine. 0466 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0467 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot). 0468 /// \return the first node of the computation graph for which the new quantity is defined. 0469 /// 0470 /// The old value of the column can be used as an input for the expression. 0471 /// An exception is thrown in case the column to redefine does not already exist. 0472 /// 0473 /// See DefineSlot() for more information. 0474 // clang-format on 0475 template <typename F> 0476 RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0477 { 0478 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot"); 0479 } 0480 0481 // clang-format off 0482 //////////////////////////////////////////////////////////////////////////// 0483 /// \brief Overwrite the value and/or type of an existing column. 0484 /// \param[in] name The name of the column to redefine. 0485 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0486 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry). 0487 /// \return the first node of the computation graph for which the new quantity is defined. 0488 /// 0489 /// The old value of the column can be used as an input for the expression. 0490 /// An exception is thrown in case the column to re-define does not already exist. 0491 /// 0492 /// See DefineSlotEntry() for more information. 0493 // clang-format on 0494 template <typename F> 0495 RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0496 { 0497 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns, 0498 "RedefineSlotEntry"); 0499 } 0500 0501 //////////////////////////////////////////////////////////////////////////// 0502 /// \brief Overwrite the value and/or type of an existing column. 0503 /// \param[in] name The name of the column to redefine. 0504 /// \param[in] expression An expression in C++ which represents the defined value 0505 /// \return the first node of the computation graph for which the new quantity is defined. 0506 /// 0507 /// The expression is just-in-time compiled and used to produce the column entries. 0508 /// It must be valid C++ syntax in which variable names are substituted with the names 0509 /// of branches/columns. 0510 /// 0511 /// The old value of the column can be used as an input for the expression. 0512 /// An exception is thrown in case the column to re-define does not already exist. 0513 /// 0514 /// Aliases cannot be overridden. See the corresponding Define() overload for more information. 0515 RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression) 0516 { 0517 constexpr auto where = "Redefine"; 0518 RDFInternal::CheckValidCppVarName(name, where); 0519 RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), 0520 fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); 0521 RDFInternal::CheckForNoVariations(where, name, fColRegister); 0522 0523 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0524 auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister, 0525 fLoopManager->GetBranchNames(), upcastNodeOnHeap); 0526 0527 RDFInternal::RColumnRegister newCols(fColRegister); 0528 newCols.AddDefine(std::move(jittedDefine)); 0529 0530 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0531 0532 return newInterface; 0533 } 0534 0535 // clang-format off 0536 //////////////////////////////////////////////////////////////////////////// 0537 /// \brief Define a new column that is updated when the input sample changes. 0538 /// \param[in] name The name of the defined column. 0539 /// \param[in] expression A C++ callable that computes the new value of the defined column. 0540 /// \return the first node of the computation graph for which the new quantity is defined. 0541 /// 0542 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)` 0543 /// where: 0544 /// - `T` is the type of the defined column 0545 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify 0546 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame. 0547 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is 0548 /// being processed (see the class docs for more information). 0549 /// 0550 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being 0551 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample 0552 /// starts rather than at every entry. 0553 /// 0554 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often. 0555 /// 0556 /// ### Example usage: 0557 /// ~~~{.cpp} 0558 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}}; 0559 /// df.DefinePerSample("weightbysample", 0560 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) 0561 /// { return id.Contains("sample1") ? 1.0f : 2.0f; }); 0562 /// ~~~ 0563 // clang-format on 0564 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch 0565 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type> 0566 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression) 0567 { 0568 RDFInternal::CheckValidCppVarName(name, "DefinePerSample"); 0569 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(), 0570 fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); 0571 0572 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t)); 0573 if (retTypeName.empty()) { 0574 // The type is not known to the interpreter. 0575 // We must not error out here, but if/when this column is used in jitted code 0576 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t)); 0577 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; 0578 } 0579 0580 auto newColumn = 0581 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager); 0582 0583 RDFInternal::RColumnRegister newCols(fColRegister); 0584 newCols.AddDefine(std::move(newColumn)); 0585 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0586 return newInterface; 0587 } 0588 0589 // clang-format off 0590 //////////////////////////////////////////////////////////////////////////// 0591 /// \brief Define a new column that is updated when the input sample changes. 0592 /// \param[in] name The name of the defined column. 0593 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value. 0594 /// \return the first node of the computation graph for which the new quantity is defined. 0595 /// 0596 /// The expression is just-in-time compiled and used to produce the column entries. 0597 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is 0598 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the 0599 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information. 0600 /// 0601 /// ### Example usage: 0602 /// ~~~{.py} 0603 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root']) 0604 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f') 0605 /// ~~~ 0606 /// 0607 /// \note 0608 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this 0609 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as 0610 /// input parameters. This is for example the correct way to call this overload when working in PyROOT: 0611 /// ~~~{.py} 0612 /// ROOT.gInterpreter.Declare( 0613 /// """ 0614 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){ 0615 /// return id.Contains("sample1") ? 1.0f : 2.0f; 0616 /// } 0617 /// """) 0618 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"]) 0619 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)") 0620 /// ~~~ 0621 /// 0622 /// \note 0623 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain 0624 /// column names other than those mentioned above: the expression is evaluated once before the processing of the 0625 /// sample even starts, so column values are not accessible. 0626 // clang-format on 0627 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression) 0628 { 0629 RDFInternal::CheckValidCppVarName(name, "DefinePerSample"); 0630 // these checks must be done before jitting lest we throw exceptions in jitted code 0631 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(), 0632 fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); 0633 0634 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0635 auto jittedDefine = 0636 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap); 0637 0638 RDFInternal::RColumnRegister newCols(fColRegister); 0639 newCols.AddDefine(std::move(jittedDefine)); 0640 0641 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0642 0643 return newInterface; 0644 } 0645 0646 /// \brief Register systematic variations for a single existing column using custom variation tags. 0647 /// \param[in] colName name of the column for which varied values are provided. 0648 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0649 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0650 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0651 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0652 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 0653 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0654 /// 0655 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to 0656 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for 0657 /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with 0658 /// ROOT::RDF::Experimental::VariationsFor (see the example below). 0659 /// 0660 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and 0661 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or 0662 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of 0663 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below. 0664 /// 0665 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt. 0666 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"): 0667 /// ~~~{.cpp} 0668 /// auto nominal_hx = 0669 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"}) 0670 /// .Filter("pt > k") 0671 /// .Define("x", someFunc, {"pt"}) 0672 /// .Histo1D("x"); 0673 /// 0674 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0675 /// hx["nominal"].Draw(); 0676 /// hx["pt:down"].Draw("SAME"); 0677 /// hx["pt:up"].Draw("SAME"); 0678 /// ~~~ 0679 /// RDataFrame computes all variations as part of a single loop over the data. 0680 /// In particular, this means that I/O and computation of values shared 0681 /// among variations only happen once for all variations. Thus, the event loop 0682 /// run-time typically scales much better than linearly with the number of 0683 /// variations. 0684 /// 0685 /// RDataFrame lazily computes the varied values required to produce the 0686 /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref 0687 /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only 0688 /// run for the nominal case. 0689 /// 0690 /// See other overloads for examples when variations are added for multiple existing columns, 0691 /// or when the tags are auto-generated instead of being directly defined. 0692 template <typename F> 0693 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, 0694 const std::vector<std::string> &variationTags, std::string_view variationName = "") 0695 { 0696 std::vector<std::string> colNames{{std::string(colName)}}; 0697 const std::string theVariationName{variationName.empty() ? colName : variationName}; 0698 0699 return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags, 0700 theVariationName); 0701 } 0702 0703 /// \brief Register systematic variations for a single existing column using auto-generated variation tags. 0704 /// \param[in] colName name of the column for which varied values are provided. 0705 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0706 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0707 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0708 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0709 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0710 /// `"1"`, etc. 0711 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0712 /// colName is used if none is provided. 0713 /// 0714 /// This overload of Vary takes an nVariations parameter instead of a list of tag names. 0715 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N` 0716 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`. 0717 /// 0718 /// Example usage: 0719 /// ~~~{.cpp} 0720 /// auto nominal_hx = 0721 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2) 0722 /// .Histo1D("x"); 0723 /// 0724 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0725 /// hx["nominal"].Draw(); 0726 /// hx["x:0"].Draw("SAME"); 0727 /// hx["x:1"].Draw("SAME"); 0728 /// ~~~ 0729 /// 0730 /// \sa This Vary() overload for more information. 0731 template <typename F> 0732 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, 0733 std::size_t nVariations, std::string_view variationName = "") 0734 { 0735 R__ASSERT(nVariations > 0 && "Must have at least one variation."); 0736 0737 std::vector<std::string> variationTags; 0738 variationTags.reserve(nVariations); 0739 for (std::size_t i = 0u; i < nVariations; ++i) 0740 variationTags.emplace_back(std::to_string(i)); 0741 0742 const std::string theVariationName{variationName.empty() ? colName : variationName}; 0743 0744 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName); 0745 } 0746 0747 /// \brief Register systematic variations for multiple existing columns using custom variation tags. 0748 /// \param[in] colNames set of names of the columns for which varied values are provided. 0749 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0750 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0751 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0752 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0753 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 0754 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"` 0755 /// 0756 /// This overload of Vary takes a list of column names as first argument and 0757 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each 0758 /// affected column. The `variationTags` are defined as `{"down", "up"}`. 0759 /// 0760 /// Example usage: 0761 /// ~~~{.cpp} 0762 /// // produce variations "ptAndEta:down" and "ptAndEta:up" 0763 /// auto nominal_hx = 0764 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously 0765 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; }, 0766 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied 0767 /// {"down", "up"}, // variation tags 0768 /// "ptAndEta") // variation name 0769 /// .Histo1D("pt", "eta"); 0770 /// 0771 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0772 /// hx["nominal"].Draw(); 0773 /// hx["ptAndEta:down"].Draw("SAME"); 0774 /// hx["ptAndEta:up"].Draw("SAME"); 0775 /// ~~~ 0776 /// 0777 /// \sa This Vary() overload for more information. 0778 0779 template <typename F> 0780 RInterface<Proxied, DS_t> 0781 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, 0782 const std::vector<std::string> &variationTags, std::string_view variationName) 0783 { 0784 return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName); 0785 } 0786 0787 /// \brief Register systematic variations for multiple existing columns using custom variation tags. 0788 /// \param[in] colNames set of names of the columns for which varied values are provided. 0789 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0790 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0791 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0792 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0793 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 0794 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0795 /// colName is used if none is provided. 0796 /// 0797 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list 0798 /// is avoided. 0799 /// 0800 /// \sa This Vary() overload for more information. 0801 template <typename F> 0802 RInterface<Proxied, DS_t> 0803 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns, 0804 const std::vector<std::string> &variationTags, std::string_view variationName) 0805 { 0806 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName); 0807 } 0808 0809 /// \brief Register systematic variations for multiple existing columns using auto-generated tags. 0810 /// \param[in] colNames set of names of the columns for which varied values are provided. 0811 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0812 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0813 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0814 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0815 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0816 /// `"1"`, etc. 0817 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0818 /// colName is used if none is provided. 0819 /// 0820 /// This overload of Vary takes a list of column names as first argument. 0821 /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names 0822 /// will be auto-generated as the sequence 0...``nVariations-1``. 0823 /// 0824 /// Example usage: 0825 /// ~~~{.cpp} 0826 /// auto nominal_hx = 0827 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously 0828 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; }, 0829 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied 0830 /// 2, // auto-generated variation tags 0831 /// "ptAndEta") // variation name 0832 /// .Histo1D("pt", "eta"); 0833 /// 0834 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0835 /// hx["nominal"].Draw(); 0836 /// hx["ptAndEta:0"].Draw("SAME"); 0837 /// hx["ptAndEta:1"].Draw("SAME"); 0838 /// ~~~ 0839 /// 0840 /// \sa This Vary() overload for more information. 0841 template <typename F> 0842 RInterface<Proxied, DS_t> 0843 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, 0844 std::size_t nVariations, std::string_view variationName) 0845 { 0846 R__ASSERT(nVariations > 0 && "Must have at least one variation."); 0847 0848 std::vector<std::string> variationTags; 0849 variationTags.reserve(nVariations); 0850 for (std::size_t i = 0u; i < nVariations; ++i) 0851 variationTags.emplace_back(std::to_string(i)); 0852 0853 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName); 0854 } 0855 0856 /// \brief Register systematic variations for for multiple existing columns using custom variation tags. 0857 /// \param[in] colNames set of names of the columns for which varied values are provided. 0858 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0859 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0860 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0861 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0862 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0863 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0864 /// `"1"`, etc. 0865 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0866 /// colName is used if none is provided. 0867 /// 0868 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list 0869 /// is avoided. 0870 /// 0871 /// \sa This Vary() overload for more information. 0872 template <typename F> 0873 RInterface<Proxied, DS_t> 0874 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns, 0875 std::size_t nVariations, std::string_view variationName) 0876 { 0877 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName); 0878 } 0879 0880 /// \brief Register systematic variations for a single existing column using custom variation tags. 0881 /// \param[in] colName name of the column for which varied values are provided. 0882 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied 0883 /// values for the specified column. 0884 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 0885 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0886 /// colName is used if none is provided. 0887 /// 0888 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time 0889 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are 0890 /// defined as `{"down", "up"}`. 0891 /// ~~~{.cpp} 0892 /// auto nominal_hx = 0893 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"}) 0894 /// .Filter("pt > k") 0895 /// .Define("x", someFunc, {"pt"}) 0896 /// .Histo1D("x"); 0897 /// 0898 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0899 /// hx["nominal"].Draw(); 0900 /// hx["pt:down"].Draw("SAME"); 0901 /// hx["pt:up"].Draw("SAME"); 0902 /// ~~~ 0903 /// 0904 /// \sa This Vary() overload for more information. 0905 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, 0906 const std::vector<std::string> &variationTags, std::string_view variationName = "") 0907 { 0908 std::vector<std::string> colNames{{std::string(colName)}}; 0909 const std::string theVariationName{variationName.empty() ? colName : variationName}; 0910 0911 return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true); 0912 } 0913 0914 /// \brief Register systematic variations for a single existing column using auto-generated variation tags. 0915 /// \param[in] colName name of the column for which varied values are provided. 0916 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied 0917 /// values for the specified column. 0918 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0919 /// `"1"`, etc. 0920 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0921 /// colName is used if none is provided. 0922 /// 0923 /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time 0924 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are 0925 /// auto-generated. 0926 /// ~~~{.cpp} 0927 /// auto nominal_hx = 0928 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2) 0929 /// .Histo1D("pt"); 0930 /// 0931 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0932 /// hx["nominal"].Draw(); 0933 /// hx["pt:0"].Draw("SAME"); 0934 /// hx["pt:1"].Draw("SAME"); 0935 /// ~~~ 0936 /// 0937 /// \sa This Vary() overload for more information. 0938 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, 0939 std::string_view variationName = "") 0940 { 0941 std::vector<std::string> variationTags; 0942 variationTags.reserve(nVariations); 0943 for (std::size_t i = 0u; i < nVariations; ++i) 0944 variationTags.emplace_back(std::to_string(i)); 0945 0946 return Vary(colName, expression, std::move(variationTags), variationName); 0947 } 0948 0949 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags. 0950 /// \param[in] colNames set of names of the columns for which varied values are provided. 0951 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied 0952 /// values for the specified columns. 0953 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0954 /// `"1"`, etc. 0955 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0956 /// 0957 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time 0958 /// compiled. It takes an nVariations parameter instead of a list of tag names. 0959 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N` 0960 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`. 0961 /// The example below shows how Vary() is used while dealing with multiple columns. 0962 /// 0963 /// ~~~{.cpp} 0964 /// auto nominal_hx = 0965 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy") 0966 /// .Histo1D("x", "y"); 0967 /// 0968 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0969 /// hx["nominal"].Draw(); 0970 /// hx["xy:0"].Draw("SAME"); 0971 /// hx["xy:1"].Draw("SAME"); 0972 /// ~~~ 0973 /// 0974 /// \sa This Vary() overload for more information. 0975 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression, 0976 std::size_t nVariations, std::string_view variationName) 0977 { 0978 std::vector<std::string> variationTags; 0979 variationTags.reserve(nVariations); 0980 for (std::size_t i = 0u; i < nVariations; ++i) 0981 variationTags.emplace_back(std::to_string(i)); 0982 0983 return Vary(colNames, expression, std::move(variationTags), variationName); 0984 } 0985 0986 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags. 0987 /// \param[in] colNames set of names of the columns for which varied values are provided. 0988 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied 0989 /// values for the specified column. 0990 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0991 /// `"1"`, etc. 0992 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0993 /// colName is used if none is provided. 0994 /// 0995 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list 0996 /// is avoided. 0997 /// 0998 /// \sa This Vary() overload for more information. 0999 RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression, 1000 std::size_t nVariations, std::string_view variationName) 1001 { 1002 return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName); 1003 } 1004 1005 /// \brief Register systematic variations for multiple existing columns using custom variation tags. 1006 /// \param[in] colNames set of names of the columns for which varied values are provided. 1007 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied 1008 /// values for the specified columns. 1009 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 1010 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1011 /// 1012 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time 1013 /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as 1014 /// `{"down", "up"}`. 1015 /// ~~~{.cpp} 1016 /// auto nominal_hx = 1017 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy") 1018 /// .Histo1D("x", "y"); 1019 /// 1020 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 1021 /// hx["nominal"].Draw(); 1022 /// hx["xy:down"].Draw("SAME"); 1023 /// hx["xy:up"].Draw("SAME"); 1024 /// ~~~ 1025 /// 1026 /// \sa This Vary() overload for more information. 1027 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression, 1028 const std::vector<std::string> &variationTags, std::string_view variationName) 1029 { 1030 return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false); 1031 } 1032 1033 //////////////////////////////////////////////////////////////////////////// 1034 /// \brief Allow to refer to a column with a different name. 1035 /// \param[in] alias name of the column alias 1036 /// \param[in] columnName of the column to be aliased 1037 /// \return the first node of the computation graph for which the alias is available. 1038 /// 1039 /// Aliasing an alias is supported. 1040 /// 1041 /// ### Example usage: 1042 /// ~~~{.cpp} 1043 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!"); 1044 /// ~~~ 1045 RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName) 1046 { 1047 // The symmetry with Define is clear. We want to: 1048 // - Create globally the alias and return this very node, unchanged 1049 // - Make aliases accessible based on chains and not globally 1050 1051 // Helper to find out if a name is a column 1052 auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}; 1053 1054 constexpr auto where = "Alias"; 1055 RDFInternal::CheckValidCppVarName(alias, where); 1056 // If the alias name is a column name, there is a problem 1057 RDFInternal::CheckForRedefinition(where, alias, fColRegister, fLoopManager->GetBranchNames(), dsColumnNames); 1058 1059 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0]; 1060 1061 RDFInternal::RColumnRegister newCols(fColRegister); 1062 newCols.AddAlias(alias, validColumnName); 1063 1064 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 1065 1066 return newInterface; 1067 } 1068 1069 //////////////////////////////////////////////////////////////////////////// 1070 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. 1071 /// \tparam ColumnTypes variadic list of branch/column types. 1072 /// \param[in] treename The name of the output TTree. 1073 /// \param[in] filename The name of the output TFile. 1074 /// \param[in] columnList The list of names of the columns/branches to be written. 1075 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree. 1076 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1077 /// 1078 /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.') 1079 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot. 1080 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also 1081 /// written out and it appears before the array in the columnList. 1082 /// 1083 /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other 1084 /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are 1085 /// present, by default all friend top-level branches that have names that do not collide with 1086 /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out 1087 /// friend branches with the same names of branches in the main TTree/TChain with names of the form 1088 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain. 1089 /// 1090 /// ### Writing to a sub-directory 1091 /// 1092 /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to 1093 /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the 1094 /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed). 1095 /// 1096 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of 1097 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with 1098 /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong 1099 /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will 1100 /// error out if such a "shuffled" TTree is used in a friendship. 1101 /// 1102 /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in 1103 /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with 1104 /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk. 1105 /// 1106 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns 1107 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are 1108 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an 1109 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`. 1110 /// 1111 /// ### Example invocations: 1112 /// 1113 /// ~~~{.cpp} 1114 /// // without specifying template parameters (column types automatically deduced) 1115 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"}); 1116 /// 1117 /// // specifying template parameters ("x" is `int`, "y" is `float`) 1118 /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"}); 1119 /// ~~~ 1120 /// 1121 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in 1122 /// `RSnapshotOptions`: 1123 /// ~~~{.cpp} 1124 /// RSnapshotOptions opts; 1125 /// opts.fLazy = true; 1126 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts); 1127 /// ~~~ 1128 template <typename... ColumnTypes> 1129 RResultPtr<RInterface<RLoopManager>> 1130 Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList, 1131 const RSnapshotOptions &options = RSnapshotOptions()) 1132 { 1133 return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options); 1134 } 1135 1136 //////////////////////////////////////////////////////////////////////////// 1137 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. 1138 /// \param[in] treename The name of the output TTree. 1139 /// \param[in] filename The name of the output TFile. 1140 /// \param[in] columnList The list of names of the columns/branches to be written. 1141 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree. 1142 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1143 /// 1144 /// This function returns a `RDataFrame` built with the output tree as a source. 1145 /// The types of the columns are automatically inferred and do not need to be specified. 1146 /// 1147 /// See above for a more complete description and example usages. 1148 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, 1149 const ColumnNames_t &columnList, 1150 const RSnapshotOptions &options = RSnapshotOptions()) 1151 { 1152 // like columnList but with `#var` columns removed 1153 auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); 1154 // like columnListWithoutSizeColumns but with aliases resolved 1155 auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes); 1156 RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases); 1157 // like validCols but with missing size branches required by array branches added in the right positions 1158 const auto pairOfColumnLists = 1159 RDFInternal::AddSizeBranches(fLoopManager->GetBranchNames(), fLoopManager->GetTree(), 1160 std::move(colListNoAliases), std::move(colListNoPoundSizes)); 1161 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first; 1162 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second; 1163 1164 1165 const auto fullTreeName = treename; 1166 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName); 1167 treename = parsedTreePath.fTreeName; 1168 const auto &dirname = parsedTreePath.fDirName; 1169 1170 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>( 1171 RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename), 1172 colListWithAliasesAndSizeBranches, options}); 1173 1174 ::TDirectory::TContext ctxt; 1175 1176 // The CreateLMFromTTree function by default opens the file passed as input 1177 // to check for the presence of the TTree inside. But at this moment the 1178 // filename we are using here corresponds to a file which does not exist yet, 1179 // i.e. the output file of the Snapshot call. Thus, checkFile=false will 1180 // prevent the function from trying to open a non-existent file. 1181 auto newRDF = std::make_shared<RInterface<RLoopManager>>(ROOT::Detail::RDF::CreateLMFromTTree( 1182 fullTreeName, filename, colListNoAliasesWithSizeBranches, /*checkFile*/ false)); 1183 1184 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>( 1185 colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr, 1186 colListNoAliasesWithSizeBranches.size()); 1187 1188 if (!options.fLazy) 1189 *resPtr; 1190 return resPtr; 1191 } 1192 1193 // clang-format off 1194 //////////////////////////////////////////////////////////////////////////// 1195 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. 1196 /// \param[in] treename The name of the output TTree. 1197 /// \param[in] filename The name of the output TFile. 1198 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns. 1199 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree 1200 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1201 /// 1202 /// This function returns a `RDataFrame` built with the output tree as a source. 1203 /// The types of the columns are automatically inferred and do not need to be specified. 1204 /// 1205 /// See above for a more complete description and example usages. 1206 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, 1207 std::string_view columnNameRegexp = "", 1208 const RSnapshotOptions &options = RSnapshotOptions()) 1209 { 1210 const auto definedColumns = fColRegister.GenerateColumnNames(); 1211 auto *tree = fLoopManager->GetTree(); 1212 const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{}; 1213 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}; 1214 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those 1215 ColumnNames_t dsColumnsWithoutSizeColumns; 1216 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns), 1217 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; }); 1218 ColumnNames_t columnNames; 1219 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size()); 1220 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end()); 1221 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end()); 1222 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end()); 1223 1224 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd. 1225 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful. 1226 RDFInternal::RemoveDuplicates(columnNames); 1227 1228 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot"); 1229 return Snapshot(treename, filename, selectedColumns, options); 1230 } 1231 // clang-format on 1232 1233 // clang-format off 1234 //////////////////////////////////////////////////////////////////////////// 1235 /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`. 1236 /// \param[in] treename The name of the output TTree. 1237 /// \param[in] filename The name of the output TFile. 1238 /// \param[in] columnList The list of names of the columns/branches to be written. 1239 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree. 1240 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1241 /// 1242 /// This function returns a `RDataFrame` built with the output tree as a source. 1243 /// The types of the columns are automatically inferred and do not need to be specified. 1244 /// 1245 /// See above for a more complete description and example usages. 1246 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, 1247 std::initializer_list<std::string> columnList, 1248 const RSnapshotOptions &options = RSnapshotOptions()) 1249 { 1250 ColumnNames_t selectedColumns(columnList); 1251 return Snapshot(treename, filename, selectedColumns, options); 1252 } 1253 // clang-format on 1254 1255 //////////////////////////////////////////////////////////////////////////// 1256 /// \brief Save selected columns in memory. 1257 /// \tparam ColumnTypes variadic list of branch/column types. 1258 /// \param[in] columnList columns to be cached in memory. 1259 /// \return a `RDataFrame` that wraps the cached dataset. 1260 /// 1261 /// This action returns a new `RDataFrame` object, completely detached from 1262 /// the originating `RDataFrame`. The new dataframe only contains the cached 1263 /// columns and stores their content in memory for fast, zero-copy subsequent access. 1264 /// 1265 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that 1266 /// fits in memory and that will be accessed many times. 1267 /// 1268 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns 1269 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are 1270 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an 1271 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`. 1272 /// 1273 /// ### Example usage: 1274 /// 1275 /// **Types and columns specified:** 1276 /// ~~~{.cpp} 1277 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"}); 1278 /// ~~~ 1279 /// 1280 /// **Types inferred and columns specified (this invocation relies on jitting):** 1281 /// ~~~{.cpp} 1282 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"}); 1283 /// ~~~ 1284 /// 1285 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):** 1286 /// ~~~{.cpp} 1287 /// auto cache_all_cols_df = df.Cache(myRegexp); 1288 /// ~~~ 1289 template <typename... ColumnTypes> 1290 RInterface<RLoopManager> Cache(const ColumnNames_t &columnList) 1291 { 1292 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>(); 1293 return CacheImpl<ColumnTypes...>(columnList, staticSeq); 1294 } 1295 1296 //////////////////////////////////////////////////////////////////////////// 1297 /// \brief Save selected columns in memory. 1298 /// \param[in] columnList columns to be cached in memory 1299 /// \return a `RDataFrame` that wraps the cached dataset. 1300 /// 1301 /// See the previous overloads for more information. 1302 RInterface<RLoopManager> Cache(const ColumnNames_t &columnList) 1303 { 1304 // Early return: if the list of columns is empty, just return an empty RDF 1305 // If we proceed, the jitted call will not compile! 1306 if (columnList.empty()) { 1307 auto nEntries = *this->Count(); 1308 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries)); 1309 return emptyRDF; 1310 } 1311 1312 std::stringstream cacheCall; 1313 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr); 1314 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager, 1315 fColRegister); 1316 // build a string equivalent to 1317 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))" 1318 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0)); 1319 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>(" 1320 << RDFInternal::PrettyPrintAddr(&resRDF) 1321 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>(" 1322 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<"; 1323 1324 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache"); 1325 1326 const auto validColumnNames = 1327 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns); 1328 const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource, 1329 "Cache", /*vector2rvec=*/false); 1330 for (const auto &colType : colTypes) 1331 cacheCall << colType << ", "; 1332 if (!columnListWithoutSizeColumns.empty()) 1333 cacheCall.seekp(-2, cacheCall.cur); // remove the last ", 1334 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t 1335 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));"; 1336 1337 // book the code to jit with the RLoopManager and trigger the event loop 1338 fLoopManager->ToJitExec(cacheCall.str()); 1339 fLoopManager->Jit(); 1340 1341 return resRDF; 1342 } 1343 1344 //////////////////////////////////////////////////////////////////////////// 1345 /// \brief Save selected columns in memory. 1346 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns. 1347 /// \return a `RDataFrame` that wraps the cached dataset. 1348 /// 1349 /// The existing columns are matched against the regular expression. If the string provided 1350 /// is empty, all columns are selected. See the previous overloads for more information. 1351 RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "") 1352 { 1353 const auto definedColumns = fColRegister.GenerateColumnNames(); 1354 auto *tree = fLoopManager->GetTree(); 1355 const auto treeBranchNames = 1356 tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{}; 1357 const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}; 1358 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those 1359 ColumnNames_t dsColumnsWithoutSizeColumns; 1360 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns), 1361 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; }); 1362 ColumnNames_t columnNames; 1363 columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size()); 1364 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end()); 1365 columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end()); 1366 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end()); 1367 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache"); 1368 return Cache(selectedColumns); 1369 } 1370 1371 //////////////////////////////////////////////////////////////////////////// 1372 /// \brief Save selected columns in memory. 1373 /// \param[in] columnList columns to be cached in memory. 1374 /// \return a `RDataFrame` that wraps the cached dataset. 1375 /// 1376 /// See the previous overloads for more information. 1377 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList) 1378 { 1379 ColumnNames_t selectedColumns(columnList); 1380 return Cache(selectedColumns); 1381 } 1382 1383 // clang-format off 1384 //////////////////////////////////////////////////////////////////////////// 1385 /// \brief Creates a node that filters entries based on range: [begin, end). 1386 /// \param[in] begin Initial entry number considered for this range. 1387 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset. 1388 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0. 1389 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries. 1390 /// 1391 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset. 1392 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported. 1393 /// 1394 /// ### Example usage: 1395 /// ~~~{.cpp} 1396 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries 1397 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards 1398 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3 1399 /// ~~~ 1400 // clang-format on 1401 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1) 1402 { 1403 // check invariants 1404 if (stride == 0 || (end != 0 && end < begin)) 1405 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin."); 1406 CheckIMTDisabled("Range"); 1407 1408 using Range_t = RDFDetail::RRange<Proxied>; 1409 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr); 1410 RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister); 1411 return newInterface; 1412 } 1413 1414 // clang-format off 1415 //////////////////////////////////////////////////////////////////////////// 1416 /// \brief Creates a node that filters entries based on range. 1417 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset. 1418 /// \return a node of the computation graph for which the range is defined. 1419 /// 1420 /// See the other Range overload for a detailed description. 1421 // clang-format on 1422 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); } 1423 1424 // clang-format off 1425 //////////////////////////////////////////////////////////////////////////// 1426 /// \brief Execute a user-defined function on each entry (*instant action*). 1427 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations. 1428 /// \param[in] columns Names of the columns/branches in input to the user function. 1429 /// 1430 /// The callable `f` is invoked once per entry. This is an *instant action*: 1431 /// upon invocation, an event loop as well as execution of all scheduled actions 1432 /// is triggered. 1433 /// Users are responsible for the thread-safety of this callable when executing 1434 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT). 1435 /// 1436 /// ### Example usage: 1437 /// ~~~{.cpp} 1438 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"}); 1439 /// ~~~ 1440 // clang-format on 1441 template <typename F> 1442 void Foreach(F f, const ColumnNames_t &columns = {}) 1443 { 1444 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay; 1445 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type; 1446 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns); 1447 } 1448 1449 // clang-format off 1450 //////////////////////////////////////////////////////////////////////////// 1451 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*). 1452 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations. 1453 /// \param[in] columns Names of the columns/branches in input to the user function. 1454 /// 1455 /// Same as `Foreach`, but the user-defined function takes an extra 1456 /// `unsigned int` as its first parameter, the *processing slot index*. 1457 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`, 1458 /// for each thread of execution. 1459 /// This is meant as a helper in writing thread-safe `Foreach` 1460 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`. 1461 /// The user-defined processing callable is able to follow different 1462 /// *streams of processing* indexed by the first parameter. 1463 /// `ForeachSlot` works just as well with single-thread execution: in that 1464 /// case `slot` will always be `0`. 1465 /// 1466 /// ### Example usage: 1467 /// ~~~{.cpp} 1468 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"}); 1469 /// ~~~ 1470 // clang-format on 1471 template <typename F> 1472 void ForeachSlot(F f, const ColumnNames_t &columns = {}) 1473 { 1474 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>; 1475 constexpr auto nColumns = ColTypes_t::list_size; 1476 1477 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); 1478 CheckAndFillDSColumns(validColumnNames, ColTypes_t()); 1479 1480 using Helper_t = RDFInternal::ForeachSlotHelper<F>; 1481 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 1482 1483 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister); 1484 1485 fLoopManager->Run(); 1486 } 1487 1488 // clang-format off 1489 //////////////////////////////////////////////////////////////////////////// 1490 /// \brief Execute a user-defined reduce operation on the values of a column. 1491 /// \tparam F The type of the reduce callable. Automatically deduced. 1492 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 1493 /// \param[in] f A callable with signature `T(T,T)` 1494 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. 1495 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr. 1496 /// 1497 /// A reduction takes two values of a column and merges them into one (e.g. 1498 /// by summing them, taking the maximum, etc). This action performs the 1499 /// specified reduction operation on all processed column values, returning 1500 /// a single value of the same type. The callable f must satisfy the general 1501 /// requirements of a *processing function* besides having signature `T(T,T)` 1502 /// where `T` is the type of column columnName. 1503 /// 1504 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a 1505 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific 1506 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this 1507 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce` 1508 /// overload. 1509 /// 1510 /// ### Example usage: 1511 /// ~~~{.cpp} 1512 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol"); 1513 /// ~~~ 1514 /// 1515 /// This action is *lazy*: upon invocation of this method the calculation is 1516 /// booked but not executed. Also see RResultPtr. 1517 // clang-format on 1518 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type> 1519 RResultPtr<T> Reduce(F f, std::string_view columnName = "") 1520 { 1521 static_assert( 1522 std::is_default_constructible<T>::value, 1523 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)"); 1524 return Reduce(std::move(f), columnName, T()); 1525 } 1526 1527 //////////////////////////////////////////////////////////////////////////// 1528 /// \brief Execute a user-defined reduce operation on the values of a column. 1529 /// \tparam F The type of the reduce callable. Automatically deduced. 1530 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 1531 /// \param[in] f A callable with signature `T(T,T)` 1532 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. 1533 /// \param[in] redIdentity The reduced object of each thread is initialized to this value. 1534 /// \return the reduced quantity wrapped in a RResultPtr. 1535 /// 1536 /// ### Example usage: 1537 /// ~~~{.cpp} 1538 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42); 1539 /// ~~~ 1540 /// See the description of the first Reduce overload for more information. 1541 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type> 1542 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity) 1543 { 1544 return Aggregate(f, f, columnName, redIdentity); 1545 } 1546 1547 //////////////////////////////////////////////////////////////////////////// 1548 /// \brief Return the number of entries processed (*lazy action*). 1549 /// \return the number of entries wrapped in a RResultPtr. 1550 /// 1551 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`). 1552 /// This action is *lazy*: upon invocation of this method the calculation is 1553 /// booked but not executed. Also see RResultPtr. 1554 /// 1555 /// ### Example usage: 1556 /// ~~~{.cpp} 1557 /// auto nEntriesAfterCuts = myFilteredDf.Count(); 1558 /// ~~~ 1559 /// 1560 RResultPtr<ULong64_t> Count() 1561 { 1562 const auto nSlots = fLoopManager->GetNSlots(); 1563 auto cSPtr = std::make_shared<ULong64_t>(0); 1564 using Helper_t = RDFInternal::CountHelper; 1565 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 1566 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, 1567 RDFInternal::RColumnRegister(fColRegister)); 1568 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action)); 1569 } 1570 1571 //////////////////////////////////////////////////////////////////////////// 1572 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default). 1573 /// \tparam T The type of the column. 1574 /// \tparam COLL The type of collection used to store the values. 1575 /// \param[in] column The name of the column to collect the values of. 1576 /// \return the content of the selected column wrapped in a RResultPtr. 1577 /// 1578 /// The collection type to be specified for C-style array columns is `RVec<T>`: 1579 /// in this case the returned collection is a `std::vector<RVec<T>>`. 1580 /// ### Example usage: 1581 /// ~~~{.cpp} 1582 /// // In this case intCol is a std::vector<int> 1583 /// auto intCol = rdf.Take<int>("integerColumn"); 1584 /// // Same content as above but in this case taken as a RVec<int> 1585 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn"); 1586 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections 1587 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt"); 1588 /// ~~~ 1589 /// This action is *lazy*: upon invocation of this method the calculation is 1590 /// booked but not executed. Also see RResultPtr. 1591 template <typename T, typename COLL = std::vector<T>> 1592 RResultPtr<COLL> Take(std::string_view column = "") 1593 { 1594 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)}); 1595 1596 const auto validColumnNames = GetValidatedColumnNames(1, columns); 1597 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>()); 1598 1599 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>; 1600 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 1601 auto valuesPtr = std::make_shared<COLL>(); 1602 const auto nSlots = fLoopManager->GetNSlots(); 1603 1604 auto action = 1605 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister); 1606 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action)); 1607 } 1608 1609 //////////////////////////////////////////////////////////////////////////// 1610 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*). 1611 /// \tparam V The type of the column used to fill the histogram. 1612 /// \param[in] model The returned histogram will be constructed using this as a model. 1613 /// \param[in] vName The name of the column that will fill the histogram. 1614 /// \return the monodimensional histogram wrapped in a RResultPtr. 1615 /// 1616 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram 1617 /// is filled with each one of the elements of the container. In case multiple columns of container type 1618 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but 1619 /// possibly different lengths between events). 1620 /// This action is *lazy*: upon invocation of this method the calculation is 1621 /// booked but not executed. Also see RResultPtr. 1622 /// 1623 /// ### Example usage: 1624 /// ~~~{.cpp} 1625 /// // Deduce column type (this invocation needs jitting internally) 1626 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn"); 1627 /// // Explicit column type 1628 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn"); 1629 /// ~~~ 1630 /// 1631 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory 1632 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 1633 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 1634 template <typename V = RDFDetail::RInferredType> 1635 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "") 1636 { 1637 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)}); 1638 1639 const auto validatedColumns = GetValidatedColumnNames(1, userColumns); 1640 1641 std::shared_ptr<::TH1D> h(nullptr); 1642 { 1643 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1644 h = model.GetHistogram(); 1645 h->SetDirectory(nullptr); 1646 } 1647 1648 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin()) 1649 RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h); 1650 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr); 1651 } 1652 1653 //////////////////////////////////////////////////////////////////////////// 1654 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*). 1655 /// \tparam V The type of the column used to fill the histogram. 1656 /// \param[in] vName The name of the column that will fill the histogram. 1657 /// \return the monodimensional histogram wrapped in a RResultPtr. 1658 /// 1659 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). 1660 /// The "name" and "title" strings are built starting from the input column name. 1661 /// See the description of the first Histo1D() overload for more details. 1662 /// 1663 /// ### Example usage: 1664 /// ~~~{.cpp} 1665 /// // Deduce column type (this invocation needs jitting internally) 1666 /// auto myHist1 = myDf.Histo1D("myColumn"); 1667 /// // Explicit column type 1668 /// auto myHist2 = myDf.Histo1D<float>("myColumn"); 1669 /// ~~~ 1670 template <typename V = RDFDetail::RInferredType> 1671 RResultPtr<::TH1D> Histo1D(std::string_view vName) 1672 { 1673 const auto h_name = std::string(vName); 1674 const auto h_title = h_name + ";" + h_name + ";count"; 1675 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName); 1676 } 1677 1678 //////////////////////////////////////////////////////////////////////////// 1679 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). 1680 /// \tparam V The type of the column used to fill the histogram. 1681 /// \tparam W The type of the column used as weights. 1682 /// \param[in] model The returned histogram will be constructed using this as a model. 1683 /// \param[in] vName The name of the column that will fill the histogram. 1684 /// \param[in] wName The name of the column that will provide the weights. 1685 /// \return the monodimensional histogram wrapped in a RResultPtr. 1686 /// 1687 /// See the description of the first Histo1D() overload for more details. 1688 /// 1689 /// ### Example usage: 1690 /// ~~~{.cpp} 1691 /// // Deduce column type (this invocation needs jitting internally) 1692 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight"); 1693 /// // Explicit column type 1694 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight"); 1695 /// ~~~ 1696 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 1697 RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName) 1698 { 1699 const std::vector<std::string_view> columnViews = {vName, wName}; 1700 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 1701 ? ColumnNames_t() 1702 : ColumnNames_t(columnViews.begin(), columnViews.end()); 1703 std::shared_ptr<::TH1D> h(nullptr); 1704 { 1705 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1706 h = model.GetHistogram(); 1707 } 1708 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr); 1709 } 1710 1711 //////////////////////////////////////////////////////////////////////////// 1712 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). 1713 /// \tparam V The type of the column used to fill the histogram. 1714 /// \tparam W The type of the column used as weights. 1715 /// \param[in] vName The name of the column that will fill the histogram. 1716 /// \param[in] wName The name of the column that will provide the weights. 1717 /// \return the monodimensional histogram wrapped in a RResultPtr. 1718 /// 1719 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). 1720 /// The "name" and "title" strings are built starting from the input column names. 1721 /// See the description of the first Histo1D() overload for more details. 1722 /// 1723 /// ### Example usage: 1724 /// ~~~{.cpp} 1725 /// // Deduce column types (this invocation needs jitting internally) 1726 /// auto myHist1 = myDf.Histo1D("myValue", "myweight"); 1727 /// // Explicit column types 1728 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight"); 1729 /// ~~~ 1730 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 1731 RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName) 1732 { 1733 // We build name and title based on the value and weight column names 1734 std::string str_vName{vName}; 1735 std::string str_wName{wName}; 1736 const auto h_name = str_vName + "_weighted_" + str_wName; 1737 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName; 1738 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName); 1739 } 1740 1741 //////////////////////////////////////////////////////////////////////////// 1742 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). 1743 /// \tparam V The type of the column used to fill the histogram. 1744 /// \tparam W The type of the column used as weights. 1745 /// \param[in] model The returned histogram will be constructed using this as a model. 1746 /// \return the monodimensional histogram wrapped in a RResultPtr. 1747 /// 1748 /// This overload will use the first two default columns as column names. 1749 /// See the description of the first Histo1D() overload for more details. 1750 template <typename V, typename W> 1751 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}) 1752 { 1753 return Histo1D<V, W>(model, "", ""); 1754 } 1755 1756 //////////////////////////////////////////////////////////////////////////// 1757 /// \brief Fill and return a two-dimensional histogram (*lazy action*). 1758 /// \tparam V1 The type of the column used to fill the x axis of the histogram. 1759 /// \tparam V2 The type of the column used to fill the y axis of the histogram. 1760 /// \param[in] model The returned histogram will be constructed using this as a model. 1761 /// \param[in] v1Name The name of the column that will fill the x axis. 1762 /// \param[in] v2Name The name of the column that will fill the y axis. 1763 /// \return the bidimensional histogram wrapped in a RResultPtr. 1764 /// 1765 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram 1766 /// is filled with each one of the elements of the container. In case multiple columns of container type 1767 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but 1768 /// possibly different lengths between events). 1769 /// This action is *lazy*: upon invocation of this method the calculation is 1770 /// booked but not executed. Also see RResultPtr. 1771 /// 1772 /// ### Example usage: 1773 /// ~~~{.cpp} 1774 /// // Deduce column types (this invocation needs jitting internally) 1775 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY"); 1776 /// // Explicit column types 1777 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY"); 1778 /// ~~~ 1779 /// 1780 /// 1781 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory 1782 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 1783 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 1784 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType> 1785 RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "") 1786 { 1787 std::shared_ptr<::TH2D> h(nullptr); 1788 { 1789 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1790 h = model.GetHistogram(); 1791 } 1792 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { 1793 throw std::runtime_error("2D histograms with no axes limits are not supported yet."); 1794 } 1795 const std::vector<std::string_view> columnViews = {v1Name, v2Name}; 1796 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 1797 ? ColumnNames_t() 1798 : ColumnNames_t(columnViews.begin(), columnViews.end()); 1799 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr); 1800 } 1801 1802 //////////////////////////////////////////////////////////////////////////// 1803 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*). 1804 /// \tparam V1 The type of the column used to fill the x axis of the histogram. 1805 /// \tparam V2 The type of the column used to fill the y axis of the histogram. 1806 /// \tparam W The type of the column used for the weights of the histogram. 1807 /// \param[in] model The returned histogram will be constructed using this as a model. 1808 /// \param[in] v1Name The name of the column that will fill the x axis. 1809 /// \param[in] v2Name The name of the column that will fill the y axis. 1810 /// \param[in] wName The name of the column that will provide the weights. 1811 /// \return the bidimensional histogram wrapped in a RResultPtr. 1812 /// 1813 /// This action is *lazy*: upon invocation of this method the calculation is 1814 /// booked but not executed. Also see RResultPtr. 1815 /// 1816 /// ### Example usage: 1817 /// ~~~{.cpp} 1818 /// // Deduce column types (this invocation needs jitting internally) 1819 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight"); 1820 /// // Explicit column types 1821 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight"); 1822 /// ~~~ 1823 /// 1824 /// See the documentation of the first Histo2D() overload for more details. 1825 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 1826 typename W = RDFDetail::RInferredType> 1827 RResultPtr<::TH2D> 1828 Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName) 1829 { 1830 std::shared_ptr<::TH2D> h(nullptr); 1831 { 1832 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1833 h = model.GetHistogram(); 1834 } 1835 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { 1836 throw std::runtime_error("2D histograms with no axes limits are not supported yet."); 1837 } 1838 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName}; 1839 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 1840 ? ColumnNames_t() 1841 : ColumnNames_t(columnViews.begin(), columnViews.end()); 1842 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr); 1843 } 1844 1845 template <typename V1, typename V2, typename W> 1846 RResultPtr<::TH2D> Histo2D(const TH2DModel &model) 1847 { 1848 return Histo2D<V1, V2, W>(model, "", "", ""); 1849 } 1850 1851 //////////////////////////////////////////////////////////////////////////// 1852 /// \brief Fill and return a three-dimensional histogram (*lazy action*). 1853 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 1854 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 1855 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 1856 /// \param[in] model The returned histogram will be constructed using this as a model. 1857 /// \param[in] v1Name The name of the column that will fill the x axis. 1858 /// \param[in] v2Name The name of the column that will fill the y axis. 1859 /// \param[in] v3Name The name of the column that will fill the z axis. 1860 /// \return the tridimensional histogram wrapped in a RResultPtr. 1861 /// 1862 /// This action is *lazy*: upon invocation of this method the calculation is 1863 /// booked but not executed. Also see RResultPtr. 1864 /// 1865 /// ### Example usage: 1866 /// ~~~{.cpp} 1867 /// // Deduce column types (this invocation needs jitting internally) 1868 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 1869 /// "myValueX", "myValueY", "myValueZ"); 1870 /// // Explicit column types 1871 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 1872 /// "myValueX", "myValueY", "myValueZ"); 1873 /// ~~~ 1874 /// 1875 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory 1876 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 1877 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 1878 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 1879 typename V3 = RDFDetail::RInferredType> 1880 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "", 1881 std::string_view v3Name = "") 1882 { 1883 std::shared_ptr<::TH3D> h(nullptr); 1884 { 1885 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1886 h = model.GetHistogram(); 1887 } 1888 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { 1889 throw std::runtime_error("3D histograms with no axes limits are not supported yet."); 1890 } 1891 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name}; 1892 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 1893 ? ColumnNames_t() 1894 : ColumnNames_t(columnViews.begin(), columnViews.end()); 1895 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr); 1896 } 1897 1898 //////////////////////////////////////////////////////////////////////////// 1899 /// \brief Fill and return a three-dimensional histogram (*lazy action*). 1900 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 1901 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 1902 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 1903 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present. 1904 /// \param[in] model The returned histogram will be constructed using this as a model. 1905 /// \param[in] v1Name The name of the column that will fill the x axis. 1906 /// \param[in] v2Name The name of the column that will fill the y axis. 1907 /// \param[in] v3Name The name of the column that will fill the z axis. 1908 /// \param[in] wName The name of the column that will provide the weights. 1909 /// \return the tridimensional histogram wrapped in a RResultPtr. 1910 /// 1911 /// This action is *lazy*: upon invocation of this method the calculation is 1912 /// booked but not executed. Also see RResultPtr. 1913 /// 1914 /// ### Example usage: 1915 /// ~~~{.cpp} 1916 /// // Deduce column types (this invocation needs jitting internally) 1917 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 1918 /// "myValueX", "myValueY", "myValueZ", "myWeight"); 1919 /// // Explicit column types 1920 /// using d_t = double; 1921 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 1922 /// "myValueX", "myValueY", "myValueZ", "myWeight"); 1923 /// ~~~ 1924 /// 1925 /// 1926 /// See the documentation of the first Histo2D() overload for more details. 1927 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 1928 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 1929 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, 1930 std::string_view v3Name, std::string_view wName) 1931 { 1932 std::shared_ptr<::TH3D> h(nullptr); 1933 { 1934 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1935 h = model.GetHistogram(); 1936 } 1937 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { 1938 throw std::runtime_error("3D histograms with no axes limits are not supported yet."); 1939 } 1940 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName}; 1941 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 1942 ? ColumnNames_t() 1943 : ColumnNames_t(columnViews.begin(), columnViews.end()); 1944 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr); 1945 } 1946 1947 template <typename V1, typename V2, typename V3, typename W> 1948 RResultPtr<::TH3D> Histo3D(const TH3DModel &model) 1949 { 1950 return Histo3D<V1, V2, V3, W>(model, "", "", "", ""); 1951 } 1952 1953 //////////////////////////////////////////////////////////////////////////// 1954 /// \brief Fill and return an N-dimensional histogram (*lazy action*). 1955 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not 1956 /// present. 1957 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the 1958 /// object. 1959 /// \param[in] model The returned histogram will be constructed using this as a model. 1960 /// \param[in] columnList 1961 /// A list containing the names of the columns that will be passed when calling `Fill`. 1962 /// (N columns for unweighted filling, or N+1 columns for weighted filling) 1963 /// \return the N-dimensional histogram wrapped in a RResultPtr. 1964 /// 1965 /// This action is *lazy*: upon invocation of this method the calculation is 1966 /// booked but not executed. See RResultPtr documentation. 1967 /// 1968 /// ### Example usage: 1969 /// ~~~{.cpp} 1970 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4, 1971 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, 1972 /// {"col0", "col1", "col2", "col3"}); 1973 /// ~~~ 1974 /// 1975 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads 1976 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) 1977 { 1978 std::shared_ptr<::THnD> h(nullptr); 1979 { 1980 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1981 h = model.GetHistogram(); 1982 1983 if (int(columnList.size()) == (h->GetNdimensions() + 1)) { 1984 h->Sumw2(); 1985 } else if (int(columnList.size()) != h->GetNdimensions()) { 1986 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); 1987 } 1988 } 1989 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h, 1990 fProxiedPtr); 1991 } 1992 1993 //////////////////////////////////////////////////////////////////////////// 1994 /// \brief Fill and return an N-dimensional histogram (*lazy action*). 1995 /// \param[in] model The returned histogram will be constructed using this as a model. 1996 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` 1997 /// (N columns for unweighted filling, or N+1 columns for weighted filling) 1998 /// \return the N-dimensional histogram wrapped in a RResultPtr. 1999 /// 2000 /// This action is *lazy*: upon invocation of this method the calculation is 2001 /// booked but not executed. Also see RResultPtr. 2002 /// 2003 /// ### Example usage: 2004 /// ~~~{.cpp} 2005 /// auto myFilledObj = myDf.HistoND({"name","title", 4, 2006 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, 2007 /// {"col0", "col1", "col2", "col3"}); 2008 /// ~~~ 2009 /// 2010 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) 2011 { 2012 std::shared_ptr<::THnD> h(nullptr); 2013 { 2014 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2015 h = model.GetHistogram(); 2016 2017 if (int(columnList.size()) == (h->GetNdimensions() + 1)) { 2018 h->Sumw2(); 2019 } else if (int(columnList.size()) != h->GetNdimensions()) { 2020 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); 2021 } 2022 } 2023 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr, 2024 columnList.size()); 2025 } 2026 2027 //////////////////////////////////////////////////////////////////////////// 2028 /// \brief Fill and return a TGraph object (*lazy action*). 2029 /// \tparam X The type of the column used to fill the x axis. 2030 /// \tparam Y The type of the column used to fill the y axis. 2031 /// \param[in] x The name of the column that will fill the x axis. 2032 /// \param[in] y The name of the column that will fill the y axis. 2033 /// \return the TGraph wrapped in a RResultPtr. 2034 /// 2035 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph 2036 /// is filled with each one of the elements of the container. 2037 /// If Multithreading is enabled, the order in which points are inserted is undefined. 2038 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing. 2039 /// A name and a title to the TGraph is given based on the input column names. 2040 /// 2041 /// This action is *lazy*: upon invocation of this method the calculation is 2042 /// booked but not executed. Also see RResultPtr. 2043 /// 2044 /// ### Example usage: 2045 /// ~~~{.cpp} 2046 /// // Deduce column types (this invocation needs jitting internally) 2047 /// auto myGraph1 = myDf.Graph("xValues", "yValues"); 2048 /// // Explicit column types 2049 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues"); 2050 /// ~~~ 2051 /// 2052 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory 2053 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2054 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2055 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType> 2056 RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "") 2057 { 2058 auto graph = std::make_shared<::TGraph>(); 2059 const std::vector<std::string_view> columnViews = {x, y}; 2060 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2061 ? ColumnNames_t() 2062 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2063 2064 const auto validatedColumns = GetValidatedColumnNames(2, userColumns); 2065 2066 // We build a default name and title based on the input columns 2067 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0]; 2068 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0]; 2069 graph->SetNameTitle(g_name.c_str(), g_title.c_str()); 2070 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str()); 2071 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str()); 2072 2073 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr); 2074 } 2075 2076 //////////////////////////////////////////////////////////////////////////// 2077 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*). 2078 /// \param[in] x The name of the column that will fill the x axis. 2079 /// \param[in] y The name of the column that will fill the y axis. 2080 /// \param[in] exl The name of the column of X low errors 2081 /// \param[in] exh The name of the column of X high errors 2082 /// \param[in] eyl The name of the column of Y low errors 2083 /// \param[in] eyh The name of the column of Y high errors 2084 /// \return the TGraphAsymmErrors wrapped in a RResultPtr. 2085 /// 2086 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph 2087 /// is filled with each one of the elements of the container. 2088 /// If Multithreading is enabled, the order in which points are inserted is undefined. 2089 /// 2090 /// This action is *lazy*: upon invocation of this method the calculation is 2091 /// booked but not executed. Also see RResultPtr. 2092 /// 2093 /// ### Example usage: 2094 /// ~~~{.cpp} 2095 /// // Deduce column types (this invocation needs jitting internally) 2096 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh"); 2097 /// // Explicit column types 2098 /// using f = float 2099 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh"); 2100 /// ~~~ 2101 /// 2102 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory 2103 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2104 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2105 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType, 2106 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType, 2107 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType> 2108 RResultPtr<::TGraphAsymmErrors> 2109 GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "", 2110 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "") 2111 { 2112 auto graph = std::make_shared<::TGraphAsymmErrors>(); 2113 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh}; 2114 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2115 ? ColumnNames_t() 2116 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2117 2118 const auto validatedColumns = GetValidatedColumnNames(6, userColumns); 2119 2120 // We build a default name and title based on the input columns 2121 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0]; 2122 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0]; 2123 graph->SetNameTitle(g_name.c_str(), g_title.c_str()); 2124 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str()); 2125 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str()); 2126 2127 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph, 2128 graph, fProxiedPtr); 2129 } 2130 2131 //////////////////////////////////////////////////////////////////////////// 2132 /// \brief Fill and return a one-dimensional profile (*lazy action*). 2133 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present. 2134 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present. 2135 /// \param[in] model The model to be considered to build the new return value. 2136 /// \param[in] v1Name The name of the column that will fill the x axis. 2137 /// \param[in] v2Name The name of the column that will fill the y axis. 2138 /// \return the monodimensional profile wrapped in a RResultPtr. 2139 /// 2140 /// This action is *lazy*: upon invocation of this method the calculation is 2141 /// booked but not executed. Also see RResultPtr. 2142 /// 2143 /// ### Example usage: 2144 /// ~~~{.cpp} 2145 /// // Deduce column types (this invocation needs jitting internally) 2146 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues"); 2147 /// // Explicit column types 2148 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues"); 2149 /// ~~~ 2150 /// 2151 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory 2152 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2153 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2154 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType> 2155 RResultPtr<::TProfile> 2156 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "") 2157 { 2158 std::shared_ptr<::TProfile> h(nullptr); 2159 { 2160 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2161 h = model.GetProfile(); 2162 } 2163 2164 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { 2165 throw std::runtime_error("Profiles with no axes limits are not supported yet."); 2166 } 2167 const std::vector<std::string_view> columnViews = {v1Name, v2Name}; 2168 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2169 ? ColumnNames_t() 2170 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2171 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr); 2172 } 2173 2174 //////////////////////////////////////////////////////////////////////////// 2175 /// \brief Fill and return a one-dimensional profile (*lazy action*). 2176 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present. 2177 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present. 2178 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present. 2179 /// \param[in] model The model to be considered to build the new return value. 2180 /// \param[in] v1Name The name of the column that will fill the x axis. 2181 /// \param[in] v2Name The name of the column that will fill the y axis. 2182 /// \param[in] wName The name of the column that will provide the weights. 2183 /// \return the monodimensional profile wrapped in a RResultPtr. 2184 /// 2185 /// This action is *lazy*: upon invocation of this method the calculation is 2186 /// booked but not executed. Also see RResultPtr. 2187 /// 2188 /// ### Example usage: 2189 /// ~~~{.cpp} 2190 /// // Deduce column types (this invocation needs jitting internally) 2191 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight"); 2192 /// // Explicit column types 2193 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.}, 2194 /// "xValues", "yValues", "weight"); 2195 /// ~~~ 2196 /// 2197 /// See the first Profile1D() overload for more details. 2198 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2199 typename W = RDFDetail::RInferredType> 2200 RResultPtr<::TProfile> 2201 Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName) 2202 { 2203 std::shared_ptr<::TProfile> h(nullptr); 2204 { 2205 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2206 h = model.GetProfile(); 2207 } 2208 2209 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { 2210 throw std::runtime_error("Profile histograms with no axes limits are not supported yet."); 2211 } 2212 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName}; 2213 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2214 ? ColumnNames_t() 2215 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2216 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr); 2217 } 2218 2219 //////////////////////////////////////////////////////////////////////////// 2220 /// \brief Fill and return a one-dimensional profile (*lazy action*). 2221 /// See the first Profile1D() overload for more details. 2222 template <typename V1, typename V2, typename W> 2223 RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model) 2224 { 2225 return Profile1D<V1, V2, W>(model, "", "", ""); 2226 } 2227 2228 //////////////////////////////////////////////////////////////////////////// 2229 /// \brief Fill and return a two-dimensional profile (*lazy action*). 2230 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 2231 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 2232 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 2233 /// \param[in] model The returned profile will be constructed using this as a model. 2234 /// \param[in] v1Name The name of the column that will fill the x axis. 2235 /// \param[in] v2Name The name of the column that will fill the y axis. 2236 /// \param[in] v3Name The name of the column that will fill the z axis. 2237 /// \return the bidimensional profile wrapped in a RResultPtr. 2238 /// 2239 /// This action is *lazy*: upon invocation of this method the calculation is 2240 /// booked but not executed. Also see RResultPtr. 2241 /// 2242 /// ### Example usage: 2243 /// ~~~{.cpp} 2244 /// // Deduce column types (this invocation needs jitting internally) 2245 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2246 /// "xValues", "yValues", "zValues"); 2247 /// // Explicit column types 2248 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2249 /// "xValues", "yValues", "zValues"); 2250 /// ~~~ 2251 /// 2252 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory 2253 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2254 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2255 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2256 typename V3 = RDFDetail::RInferredType> 2257 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "", 2258 std::string_view v2Name = "", std::string_view v3Name = "") 2259 { 2260 std::shared_ptr<::TProfile2D> h(nullptr); 2261 { 2262 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2263 h = model.GetProfile(); 2264 } 2265 2266 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { 2267 throw std::runtime_error("2D profiles with no axes limits are not supported yet."); 2268 } 2269 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name}; 2270 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2271 ? ColumnNames_t() 2272 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2273 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr); 2274 } 2275 2276 //////////////////////////////////////////////////////////////////////////// 2277 /// \brief Fill and return a two-dimensional profile (*lazy action*). 2278 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 2279 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 2280 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 2281 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present. 2282 /// \param[in] model The returned histogram will be constructed using this as a model. 2283 /// \param[in] v1Name The name of the column that will fill the x axis. 2284 /// \param[in] v2Name The name of the column that will fill the y axis. 2285 /// \param[in] v3Name The name of the column that will fill the z axis. 2286 /// \param[in] wName The name of the column that will provide the weights. 2287 /// \return the bidimensional profile wrapped in a RResultPtr. 2288 /// 2289 /// This action is *lazy*: upon invocation of this method the calculation is 2290 /// booked but not executed. Also see RResultPtr. 2291 /// 2292 /// ### Example usage: 2293 /// ~~~{.cpp} 2294 /// // Deduce column types (this invocation needs jitting internally) 2295 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2296 /// "xValues", "yValues", "zValues", "weight"); 2297 /// // Explicit column types 2298 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2299 /// "xValues", "yValues", "zValues", "weight"); 2300 /// ~~~ 2301 /// 2302 /// See the first Profile2D() overload for more details. 2303 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2304 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 2305 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, 2306 std::string_view v3Name, std::string_view wName) 2307 { 2308 std::shared_ptr<::TProfile2D> h(nullptr); 2309 { 2310 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2311 h = model.GetProfile(); 2312 } 2313 2314 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { 2315 throw std::runtime_error("2D profiles with no axes limits are not supported yet."); 2316 } 2317 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName}; 2318 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2319 ? ColumnNames_t() 2320 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2321 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr); 2322 } 2323 2324 /// \brief Fill and return a two-dimensional profile (*lazy action*). 2325 /// See the first Profile2D() overload for more details. 2326 template <typename V1, typename V2, typename V3, typename W> 2327 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model) 2328 { 2329 return Profile2D<V1, V2, V3, W>(model, "", "", "", ""); 2330 } 2331 2332 //////////////////////////////////////////////////////////////////////////// 2333 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*). 2334 /// 2335 /// Type T must provide at least: 2336 /// - a copy-constructor 2337 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList 2338 /// (these types can also be passed as template parameters to this method) 2339 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the 2340 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that 2341 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in 2342 /// the TCollection*). 2343 /// 2344 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present. 2345 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object. 2346 /// \tparam T The type of the object to fill. Automatically deduced. 2347 /// \param[in] model The model to be considered to build the new return value. 2348 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` 2349 /// \return the filled object wrapped in a RResultPtr. 2350 /// 2351 /// The user gives up ownership of the model object. 2352 /// The list of column names to be used for filling must always be specified. 2353 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. 2354 /// Also see RResultPtr. 2355 /// 2356 /// ### Example usage: 2357 /// ~~~{.cpp} 2358 /// MyClass obj; 2359 /// // Deduce column types (this invocation needs jitting internally, and in this case 2360 /// // MyClass needs to be known to the interpreter) 2361 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"}); 2362 /// // explicit column types 2363 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"}); 2364 /// ~~~ 2365 /// 2366 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T> 2367 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList) 2368 { 2369 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model)); 2370 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) { 2371 throw std::runtime_error("The absence of axes limits is not supported yet."); 2372 } 2373 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr, 2374 columnList.size()); 2375 } 2376 2377 //////////////////////////////////////////////////////////////////////////// 2378 /// \brief Return a TStatistic object, filled once per event (*lazy action*). 2379 /// 2380 /// \tparam V The type of the value column 2381 /// \param[in] value The name of the column with the values to fill the statistics with. 2382 /// \return the filled TStatistic object wrapped in a RResultPtr. 2383 /// 2384 /// ### Example usage: 2385 /// ~~~{.cpp} 2386 /// // Deduce column type (this invocation needs jitting internally) 2387 /// auto stats0 = myDf.Stats("values"); 2388 /// // Explicit column type 2389 /// auto stats1 = myDf.Stats<float>("values"); 2390 /// ~~~ 2391 /// 2392 template <typename V = RDFDetail::RInferredType> 2393 RResultPtr<TStatistic> Stats(std::string_view value = "") 2394 { 2395 ColumnNames_t columns; 2396 if (!value.empty()) { 2397 columns.emplace_back(std::string(value)); 2398 } 2399 const auto validColumnNames = GetValidatedColumnNames(1, columns); 2400 if (std::is_same<V, RDFDetail::RInferredType>::value) { 2401 return Fill(TStatistic(), validColumnNames); 2402 } else { 2403 return Fill<V>(TStatistic(), validColumnNames); 2404 } 2405 } 2406 2407 //////////////////////////////////////////////////////////////////////////// 2408 /// \brief Return a TStatistic object, filled once per event (*lazy action*). 2409 /// 2410 /// \tparam V The type of the value column 2411 /// \tparam W The type of the weight column 2412 /// \param[in] value The name of the column with the values to fill the statistics with. 2413 /// \param[in] weight The name of the column with the weights to fill the statistics with. 2414 /// \return the filled TStatistic object wrapped in a RResultPtr. 2415 /// 2416 /// ### Example usage: 2417 /// ~~~{.cpp} 2418 /// // Deduce column types (this invocation needs jitting internally) 2419 /// auto stats0 = myDf.Stats("values", "weights"); 2420 /// // Explicit column types 2421 /// auto stats1 = myDf.Stats<int, float>("values", "weights"); 2422 /// ~~~ 2423 /// 2424 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 2425 RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight) 2426 { 2427 ColumnNames_t columns{std::string(value), std::string(weight)}; 2428 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value; 2429 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value; 2430 const auto validColumnNames = GetValidatedColumnNames(2, columns); 2431 // We have 3 cases: 2432 // 1. Both types are inferred: we use Fill and let the jit kick in. 2433 // 2. One of the two types is explicit and the other one is inferred: the case is not supported. 2434 // 3. Both types are explicit: we invoke the fully compiled Fill method. 2435 if (vIsInferred && wIsInferred) { 2436 return Fill(TStatistic(), validColumnNames); 2437 } else if (vIsInferred != wIsInferred) { 2438 std::string error("The "); 2439 error += vIsInferred ? "value " : "weight "; 2440 error += "column type is explicit, while the "; 2441 error += vIsInferred ? "weight " : "value "; 2442 error += " is specified to be inferred. This case is not supported: please specify both types or none."; 2443 throw std::runtime_error(error); 2444 } else { 2445 return Fill<V, W>(TStatistic(), validColumnNames); 2446 } 2447 } 2448 2449 //////////////////////////////////////////////////////////////////////////// 2450 /// \brief Return the minimum of processed column values (*lazy action*). 2451 /// \tparam T The type of the branch/column. 2452 /// \param[in] columnName The name of the branch/column to be treated. 2453 /// \return the minimum value of the selected column wrapped in a RResultPtr. 2454 /// 2455 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2456 /// template specialization of this method. 2457 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. 2458 /// 2459 /// This action is *lazy*: upon invocation of this method the calculation is 2460 /// booked but not executed. Also see RResultPtr. 2461 /// 2462 /// ### Example usage: 2463 /// ~~~{.cpp} 2464 /// // Deduce column type (this invocation needs jitting internally) 2465 /// auto minVal0 = myDf.Min("values"); 2466 /// // Explicit column type 2467 /// auto minVal1 = myDf.Min<double>("values"); 2468 /// ~~~ 2469 /// 2470 template <typename T = RDFDetail::RInferredType> 2471 RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "") 2472 { 2473 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2474 using RetType_t = RDFDetail::MinReturnType_t<T>; 2475 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max()); 2476 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr); 2477 } 2478 2479 //////////////////////////////////////////////////////////////////////////// 2480 /// \brief Return the maximum of processed column values (*lazy action*). 2481 /// \tparam T The type of the branch/column. 2482 /// \param[in] columnName The name of the branch/column to be treated. 2483 /// \return the maximum value of the selected column wrapped in a RResultPtr. 2484 /// 2485 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2486 /// template specialization of this method. 2487 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. 2488 /// 2489 /// This action is *lazy*: upon invocation of this method the calculation is 2490 /// booked but not executed. Also see RResultPtr. 2491 /// 2492 /// ### Example usage: 2493 /// ~~~{.cpp} 2494 /// // Deduce column type (this invocation needs jitting internally) 2495 /// auto maxVal0 = myDf.Max("values"); 2496 /// // Explicit column type 2497 /// auto maxVal1 = myDf.Max<double>("values"); 2498 /// ~~~ 2499 /// 2500 template <typename T = RDFDetail::RInferredType> 2501 RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "") 2502 { 2503 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2504 using RetType_t = RDFDetail::MaxReturnType_t<T>; 2505 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest()); 2506 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr); 2507 } 2508 2509 //////////////////////////////////////////////////////////////////////////// 2510 /// \brief Return the mean of processed column values (*lazy action*). 2511 /// \tparam T The type of the branch/column. 2512 /// \param[in] columnName The name of the branch/column to be treated. 2513 /// \return the mean value of the selected column wrapped in a RResultPtr. 2514 /// 2515 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2516 /// template specialization of this method. 2517 /// 2518 /// This action is *lazy*: upon invocation of this method the calculation is 2519 /// booked but not executed. Also see RResultPtr. 2520 /// 2521 /// ### Example usage: 2522 /// ~~~{.cpp} 2523 /// // Deduce column type (this invocation needs jitting internally) 2524 /// auto meanVal0 = myDf.Mean("values"); 2525 /// // Explicit column type 2526 /// auto meanVal1 = myDf.Mean<double>("values"); 2527 /// ~~~ 2528 /// 2529 template <typename T = RDFDetail::RInferredType> 2530 RResultPtr<double> Mean(std::string_view columnName = "") 2531 { 2532 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2533 auto meanV = std::make_shared<double>(0); 2534 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr); 2535 } 2536 2537 //////////////////////////////////////////////////////////////////////////// 2538 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*). 2539 /// \tparam T The type of the branch/column. 2540 /// \param[in] columnName The name of the branch/column to be treated. 2541 /// \return the standard deviation value of the selected column wrapped in a RResultPtr. 2542 /// 2543 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2544 /// template specialization of this method. 2545 /// 2546 /// This action is *lazy*: upon invocation of this method the calculation is 2547 /// booked but not executed. Also see RResultPtr. 2548 /// 2549 /// ### Example usage: 2550 /// ~~~{.cpp} 2551 /// // Deduce column type (this invocation needs jitting internally) 2552 /// auto stdDev0 = myDf.StdDev("values"); 2553 /// // Explicit column type 2554 /// auto stdDev1 = myDf.StdDev<double>("values"); 2555 /// ~~~ 2556 /// 2557 template <typename T = RDFDetail::RInferredType> 2558 RResultPtr<double> StdDev(std::string_view columnName = "") 2559 { 2560 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2561 auto stdDeviationV = std::make_shared<double>(0); 2562 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr); 2563 } 2564 2565 // clang-format off 2566 //////////////////////////////////////////////////////////////////////////// 2567 /// \brief Return the sum of processed column values (*lazy action*). 2568 /// \tparam T The type of the branch/column. 2569 /// \param[in] columnName The name of the branch/column. 2570 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible. 2571 /// \return the sum of the selected column wrapped in a RResultPtr. 2572 /// 2573 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2574 /// template specialization of this method. 2575 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. 2576 /// 2577 /// This action is *lazy*: upon invocation of this method the calculation is 2578 /// booked but not executed. Also see RResultPtr. 2579 /// 2580 /// ### Example usage: 2581 /// ~~~{.cpp} 2582 /// // Deduce column type (this invocation needs jitting internally) 2583 /// auto sum0 = myDf.Sum("values"); 2584 /// // Explicit column type 2585 /// auto sum1 = myDf.Sum<double>("values"); 2586 /// ~~~ 2587 /// 2588 template <typename T = RDFDetail::RInferredType> 2589 RResultPtr<RDFDetail::SumReturnType_t<T>> 2590 Sum(std::string_view columnName = "", 2591 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{}) 2592 { 2593 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2594 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue); 2595 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr); 2596 } 2597 // clang-format on 2598 2599 //////////////////////////////////////////////////////////////////////////// 2600 /// \brief Gather filtering statistics. 2601 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr. 2602 /// 2603 /// Calling `Report` on the main `RDataFrame` object gathers stats for 2604 /// all named filters in the call graph. Calling this method on a 2605 /// stored chain state (i.e. a graph node different from the first) gathers 2606 /// the stats for all named filters in the chain section between the original 2607 /// `RDataFrame` and that node (included). Stats are gathered in the same 2608 /// order as the named filters have been added to the graph. 2609 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the 2610 /// effects cuts had. 2611 /// 2612 /// This action is *lazy*: upon invocation of 2613 /// this method the calculation is booked but not executed. See RResultPtr 2614 /// documentation. 2615 /// 2616 /// ### Example usage: 2617 /// ~~~{.cpp} 2618 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2"); 2619 /// auto cutReport = filtered3.Report(); 2620 /// cutReport->Print(); 2621 /// ~~~ 2622 /// 2623 RResultPtr<RCutFlowReport> Report() 2624 { 2625 bool returnEmptyReport = false; 2626 // if this is a RInterface<RLoopManager> on which `Define` has been called, users 2627 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which 2628 // certainly does not contain named filters. 2629 // The number 4 takes into account the implicit columns for entry and slot number 2630 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_) 2631 if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4) 2632 returnEmptyReport = true; 2633 2634 auto rep = std::make_shared<RCutFlowReport>(); 2635 using Helper_t = RDFInternal::ReportHelper<Proxied>; 2636 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 2637 2638 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}), 2639 fProxiedPtr, RDFInternal::RColumnRegister(fColRegister)); 2640 2641 return MakeResultPtr(rep, *fLoopManager, std::move(action)); 2642 } 2643 2644 /// \brief Returns the names of the filters created. 2645 /// \return the container of filters names. 2646 /// 2647 /// If called on a root node, all the filters in the computation graph will 2648 /// be printed. For any other node, only the filters upstream of that node. 2649 /// Filters without a name are printed as "Unnamed Filter" 2650 /// This is not an action nor a transformation, just a query to the RDataFrame object. 2651 /// 2652 /// ### Example usage: 2653 /// ~~~{.cpp} 2654 /// auto filtNames = d.GetFilterNames(); 2655 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl; 2656 /// ~~~ 2657 /// 2658 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); } 2659 2660 // clang-format off 2661 //////////////////////////////////////////////////////////////////////////// 2662 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot. 2663 /// \tparam F The type of the aggregator callable. Automatically deduced. 2664 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced. 2665 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 2666 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable 2667 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread 2668 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead. 2669 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted) 2670 /// \return the result of the aggregation wrapped in a RResultPtr. 2671 /// 2672 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is 2673 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted. 2674 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and 2675 /// the value of the column columnName. 2676 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable. 2677 /// Otherwise the signature of aggregator must be `void(U&,T)`. 2678 /// 2679 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions. 2680 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two. 2681 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0]. 2682 /// 2683 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr. 2684 /// 2685 /// Example usage: 2686 /// ~~~{.cpp} 2687 /// auto aggregator = [](double acc, double x) { return acc * x; }; 2688 /// ROOT::EnableImplicitMT(); 2689 /// // If multithread is enabled, the aggregator function will be called by more threads 2690 /// // and will produce a vector of partial accumulators. 2691 /// // The merger function performs the final aggregation of these partial results. 2692 /// auto merger = [](std::vector<double> &accumulators) { 2693 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) { 2694 /// accumulators[0] *= accumulators[i]; 2695 /// } 2696 /// }; 2697 /// 2698 /// // The accumulator is initialized at this value by every thread. 2699 /// double initValue = 1.; 2700 /// 2701 /// // Multiplies all elements of the column "x" 2702 /// auto result = d.Aggregate(aggregator, merger, "x", initValue); 2703 /// ~~~ 2704 // clang-format on 2705 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type, 2706 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, 2707 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay, 2708 typename U = TTraits::TakeFirstParameter_t<ArgTypes>, 2709 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>> 2710 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity) 2711 { 2712 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay()); 2713 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2714 2715 const auto validColumnNames = GetValidatedColumnNames(1, columns); 2716 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>()); 2717 2718 auto accObjPtr = std::make_shared<U>(aggIdentity); 2719 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>; 2720 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 2721 auto action = std::make_unique<Action_t>( 2722 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames, 2723 fProxiedPtr, fColRegister); 2724 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action)); 2725 } 2726 2727 // clang-format off 2728 //////////////////////////////////////////////////////////////////////////// 2729 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot. 2730 /// \tparam F The type of the aggregator callable. Automatically deduced. 2731 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced. 2732 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 2733 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable 2734 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread 2735 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead. 2736 /// \return the result of the aggregation wrapped in a RResultPtr. 2737 /// 2738 /// See previous Aggregate overload for more information. 2739 // clang-format on 2740 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type, 2741 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, 2742 typename U = TTraits::TakeFirstParameter_t<ArgTypes>, 2743 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>> 2744 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "") 2745 { 2746 static_assert( 2747 std::is_default_constructible<U>::value, 2748 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)"); 2749 return Aggregate(std::move(aggregator), std::move(merger), columnName, U()); 2750 } 2751 2752 // clang-format off 2753 //////////////////////////////////////////////////////////////////////////// 2754 /// \brief Book execution of a custom action using a user-defined helper object. 2755 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present. 2756 /// \tparam OtherColumns A list of the types of the other columns used by this action 2757 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose. 2758 /// \param[in] helper The Action Helper to be scheduled. 2759 /// \param[in] columns The names of the columns on which the helper acts. 2760 /// \return the result of the helper wrapped in a RResultPtr. 2761 /// 2762 /// This method books a custom action for execution. The behavior of the action is completely dependent on the 2763 /// Helper object provided by the caller. The required interface for the helper is described below (more 2764 /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful): 2765 /// 2766 /// ### Mandatory interface 2767 /// 2768 /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>` 2769 /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible. 2770 /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged. 2771 /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type 2772 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called 2773 /// _before_ Initialize(), because the RResultPtr is constructed before the event loop is started. 2774 /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations. 2775 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper, 2776 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations). 2777 /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event 2778 /// loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader 2779 /// that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations 2780 /// it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers. 2781 /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method 2782 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value: 2783 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of 2784 /// the requested columns for the particular entry being processed. 2785 /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result. 2786 /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in 2787 /// diagnostics, SaveGraph(), etc. 2788 /// 2789 /// ### Optional methods 2790 /// 2791 /// If these methods are implemented they enable extra functionality as per the description below. 2792 /// 2793 /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'. 2794 /// Different threads might call this method concurrently, but will do so with different 'slot' numbers. 2795 /// RDataFrame leverages this method to implement RResultPtr::OnPartialResult(). 2796 /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the 2797 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing 2798 /// of every sample, as in DefinePerSample(). 2799 /// * `Helper MakeNew(void *newResult)`: if implemented, it enables varying the action's result with VariationsFor(). It takes a 2800 /// type-erased new result that can be safely cast to a `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should 2801 /// be used as the action's output result. 2802 /// 2803 /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled 2804 /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter. 2805 /// 2806 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr. 2807 /// 2808 /// ### Examples 2809 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper. 2810 /// 2811 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx. 2812 /// 2813 // clang-format on 2814 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper> 2815 RResultPtr<typename std::decay_t<Helper>::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {}) 2816 { 2817 using HelperT = std::decay_t<Helper>; 2818 // TODO add more static sanity checks on Helper 2819 using AH = RDFDetail::RActionImpl<HelperT>; 2820 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value, 2821 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>"); 2822 2823 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper)); 2824 auto resPtr = hPtr->GetResultPtr(); 2825 2826 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) { 2827 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{}); 2828 } else { 2829 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr, 2830 fProxiedPtr, columns.size()); 2831 } 2832 } 2833 2834 //////////////////////////////////////////////////////////////////////////// 2835 /// \brief Provides a representation of the columns in the dataset. 2836 /// \tparam ColumnTypes variadic list of branch/column types. 2837 /// \param[in] columnList Names of the columns to be displayed. 2838 /// \param[in] nRows Number of events for each column to be displayed. 2839 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. 2840 /// \return the `RDisplay` instance wrapped in a RResultPtr. 2841 /// 2842 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular 2843 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will 2844 /// return a complete version through `RDisplay::AsString()`. 2845 /// 2846 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see 2847 /// RResultPtr. 2848 /// 2849 /// Example usage: 2850 /// ~~~{.cpp} 2851 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries 2852 /// auto d1 = rdf.Display(""); 2853 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries 2854 /// auto d2 = d.Display({"x", "y"}, 128); 2855 /// // Printing the short representations, the event loop will run 2856 /// d1->Print(); 2857 /// d2->Print(); 2858 /// ~~~ 2859 template <typename... ColumnTypes> 2860 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) 2861 { 2862 CheckIMTDisabled("Display"); 2863 auto newCols = columnList; 2864 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column 2865 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements); 2866 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>; 2867 // Need to add ULong64_t type corresponding to the first column rdfentry_ 2868 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>( 2869 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr); 2870 } 2871 2872 //////////////////////////////////////////////////////////////////////////// 2873 /// \brief Provides a representation of the columns in the dataset. 2874 /// \param[in] columnList Names of the columns to be displayed. 2875 /// \param[in] nRows Number of events for each column to be displayed. 2876 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. 2877 /// \return the `RDisplay` instance wrapped in a RResultPtr. 2878 /// 2879 /// This overload automatically infers the column types. 2880 /// See the previous overloads for further details. 2881 /// 2882 /// Invoked when no types are specified to Display 2883 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) 2884 { 2885 CheckIMTDisabled("Display"); 2886 auto newCols = columnList; 2887 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column 2888 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements); 2889 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>; 2890 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>( 2891 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr, 2892 columnList.size() + 1); 2893 } 2894 2895 //////////////////////////////////////////////////////////////////////////// 2896 /// \brief Provides a representation of the columns in the dataset. 2897 /// \param[in] columnNameRegexp A regular expression to select the columns. 2898 /// \param[in] nRows Number of events for each column to be displayed. 2899 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. 2900 /// \return the `RDisplay` instance wrapped in a RResultPtr. 2901 /// 2902 /// The existing columns are matched against the regular expression. If the string provided 2903 /// is empty, all columns are selected. 2904 /// See the previous overloads for further details. 2905 RResultPtr<RDisplay> 2906 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10) 2907 { 2908 const auto columnNames = GetColumnNames(); 2909 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display"); 2910 return Display(selectedColumns, nRows, nMaxCollectionElements); 2911 } 2912 2913 //////////////////////////////////////////////////////////////////////////// 2914 /// \brief Provides a representation of the columns in the dataset. 2915 /// \param[in] columnList Names of the columns to be displayed. 2916 /// \param[in] nRows Number of events for each column to be displayed. 2917 /// \param[in] nMaxCollectionElements Number of maximum elements in collection. 2918 /// \return the `RDisplay` instance wrapped in a RResultPtr. 2919 /// 2920 /// See the previous overloads for further details. 2921 RResultPtr<RDisplay> 2922 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) 2923 { 2924 ColumnNames_t selectedColumns(columnList); 2925 return Display(selectedColumns, nRows, nMaxCollectionElements); 2926 } 2927 2928 private: 2929 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type> 2930 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>> 2931 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where) 2932 { 2933 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine 2934 RDFInternal::CheckValidCppVarName(name, where); 2935 RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), 2936 fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); 2937 } else { 2938 RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(), 2939 fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); 2940 RDFInternal::CheckForNoVariations(where, name, fColRegister); 2941 } 2942 2943 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types; 2944 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf< 2945 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type; 2946 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf< 2947 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type; 2948 2949 constexpr auto nColumns = ColTypes_t::list_size; 2950 2951 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); 2952 CheckAndFillDSColumns(validColumnNames, ColTypes_t()); 2953 2954 // Declare return type to the interpreter, for future use by jitted actions 2955 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType)); 2956 if (retTypeName.empty()) { 2957 // The type is not known to the interpreter. 2958 // We must not error out here, but if/when this column is used in jitted code 2959 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType)); 2960 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; 2961 } 2962 2963 using NewCol_t = RDFDetail::RDefine<F, DefineType>; 2964 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames, 2965 fColRegister, *fLoopManager); 2966 2967 RDFInternal::RColumnRegister newCols(fColRegister); 2968 newCols.AddDefine(std::move(newColumn)); 2969 2970 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 2971 2972 return newInterface; 2973 } 2974 2975 // This overload is chosen when the callable passed to Define or DefineSlot returns void. 2976 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because 2977 // this way compilation of `Define` has no way to continue after throwing the error. 2978 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type, 2979 bool IsFStringConv = std::is_convertible<F, std::string>::value, 2980 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value> 2981 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>> 2982 DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &) 2983 { 2984 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value, 2985 "Error in `Define`: type returned by expression is not default-constructible"); 2986 return *this; // never reached 2987 } 2988 2989 template <typename... ColumnTypes> 2990 RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename, 2991 const ColumnNames_t &columnList, const RSnapshotOptions &options) 2992 { 2993 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); 2994 2995 RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size()); 2996 // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it. 2997 const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns); 2998 RDFInternal::CheckForDuplicateSnapshotColumns(validCols); 2999 CheckAndFillDSColumns(validCols, TTraits::TypeList<ColumnTypes...>()); 3000 3001 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName); 3002 const auto &treename = parsedTreePath.fTreeName; 3003 const auto &dirname = parsedTreePath.fDirName; 3004 3005 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{ 3006 std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options}); 3007 3008 ::TDirectory::TContext ctxt; 3009 3010 // The CreateLMFromTTree function by default opens the file passed as input 3011 // to check for the presence of the TTree inside. But at this moment the 3012 // filename we are using here corresponds to a file which does not exist yet, 3013 // i.e. the output file of the Snapshot call. Thus, checkFile=false will 3014 // prevent the function from trying to open a non-existent file. 3015 auto newRDF = std::make_shared<RInterface<RLoopManager>>(ROOT::Detail::RDF::CreateLMFromTTree( 3016 fullTreeName, filename, /*defaultColumns=*/columnListWithoutSizeColumns, /*checkFile=*/false)); 3017 3018 // The Snapshot helper will use validCols (with aliases resolved) as input columns, and 3019 // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column names. 3020 auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs, 3021 fProxiedPtr); 3022 3023 if (!options.fLazy) 3024 *resPtr; 3025 return resPtr; 3026 } 3027 3028 //////////////////////////////////////////////////////////////////////////// 3029 /// \brief Implementation of cache. 3030 template <typename... ColTypes, std::size_t... S> 3031 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>) 3032 { 3033 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); 3034 3035 // Check at compile time that the columns types are copy constructible 3036 constexpr bool areCopyConstructible = 3037 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value; 3038 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet."); 3039 3040 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size()); 3041 3042 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...); 3043 auto ds = std::make_unique<RLazyDS<ColTypes...>>( 3044 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...); 3045 3046 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns)); 3047 3048 return cachedRDF; 3049 } 3050 3051 template <bool IsSingleColumn, typename F> 3052 RInterface<Proxied, DS_t> 3053 VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, 3054 const std::vector<std::string> &variationTags, std::string_view variationName) 3055 { 3056 using F_t = std::decay_t<F>; 3057 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types; 3058 using RetType = typename TTraits::CallableTraits<F_t>::ret_type; 3059 constexpr auto nColumns = ColTypes_t::list_size; 3060 3061 SanityChecksForVary<RetType>(colNames, variationTags, variationName); 3062 3063 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns); 3064 CheckAndFillDSColumns(validColumnNames, ColTypes_t{}); 3065 3066 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType)); 3067 if (retTypeName.empty()) { 3068 // The type is not known to the interpreter, but we don't want to error out 3069 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name. 3070 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType)); 3071 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; 3072 } 3073 3074 auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>( 3075 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager, 3076 validColumnNames); 3077 3078 RDFInternal::RColumnRegister newCols(fColRegister); 3079 newCols.AddVariation(std::move(variation)); 3080 3081 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 3082 3083 return newInterface; 3084 } 3085 3086 RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression, 3087 const std::vector<std::string> &variationTags, 3088 std::string_view variationName, bool isSingleColumn) 3089 { 3090 R__ASSERT(!variationTags.empty() && "Must have at least one variation."); 3091 R__ASSERT(!colNames.empty() && "Must have at least one varied column."); 3092 R__ASSERT(!variationName.empty() && "Must provide a variation name."); 3093 3094 for (auto &colName : colNames) { 3095 RDFInternal::CheckValidCppVarName(colName, "Vary"); 3096 RDFInternal::CheckForDefinition("Vary", colName, fColRegister, fLoopManager->GetBranchNames(), 3097 fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{}); 3098 } 3099 RDFInternal::CheckValidCppVarName(variationName, "Vary"); 3100 3101 // when varying multiple columns, they must be different columns 3102 if (colNames.size() > 1) { 3103 std::set<std::string> uniqueCols(colNames.begin(), colNames.end()); 3104 if (uniqueCols.size() != colNames.size()) 3105 throw std::logic_error("A column name was passed to the same Vary invocation multiple times."); 3106 } 3107 3108 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 3109 auto jittedVariation = 3110 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource, 3111 fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn); 3112 3113 RDFInternal::RColumnRegister newColRegister(fColRegister); 3114 newColRegister.AddVariation(std::move(jittedVariation)); 3115 3116 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister)); 3117 3118 return newInterface; 3119 } 3120 3121 template <typename Helper, typename ActionResultType> 3122 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr, 3123 const std::shared_ptr<Helper> &hPtr, 3124 TTraits::TypeList<RDFDetail::RInferredType>) 3125 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{}) 3126 { 3127 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u); 3128 } 3129 3130 template <typename Helper, typename ActionResultType, typename... Others> 3131 RResultPtr<ActionResultType> 3132 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &, 3133 const std::shared_ptr<Helper>& /*hPtr*/, 3134 Others...) 3135 { 3136 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires " 3137 "columns! The action helper type was ") + 3138 typeid(Helper).name()); 3139 return {}; 3140 } 3141 3142 protected: 3143 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, 3144 const RDFInternal::RColumnRegister &colRegister) 3145 : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied) 3146 { 3147 } 3148 3149 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; } 3150 }; 3151 3152 } // namespace RDF 3153 3154 } // namespace ROOT 3155 3156 #endif // ROOT_RDF_INTERFACE
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |