|
|
|||
File indexing completed on 2025-12-27 11:36:24
0001 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017 0002 0003 /************************************************************************* 0004 * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers. * 0005 * All rights reserved. * 0006 * * 0007 * For the licensing terms see $ROOTSYS/LICENSE. * 0008 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0009 *************************************************************************/ 0010 0011 #ifndef ROOT_RDF_TINTERFACE 0012 #define ROOT_RDF_TINTERFACE 0013 0014 #include "ROOT/RDataSource.hxx" 0015 #include "ROOT/RDF/ActionHelpers.hxx" 0016 #include "ROOT/RDF/HistoModels.hxx" 0017 #include "ROOT/RDF/InterfaceUtils.hxx" 0018 #include "ROOT/RDF/RColumnRegister.hxx" 0019 #include "ROOT/RDF/RDefaultValueFor.hxx" 0020 #include "ROOT/RDF/RDefine.hxx" 0021 #include "ROOT/RDF/RDefinePerSample.hxx" 0022 #include "ROOT/RDF/RFilter.hxx" 0023 #include "ROOT/RDF/RInterfaceBase.hxx" 0024 #include "ROOT/RDF/RVariation.hxx" 0025 #include "ROOT/RDF/RLazyDSImpl.hxx" 0026 #include "ROOT/RDF/RLoopManager.hxx" 0027 #include "ROOT/RDF/RRange.hxx" 0028 #include "ROOT/RDF/RFilterWithMissingValues.hxx" 0029 #include "ROOT/RDF/Utils.hxx" 0030 #include "ROOT/RDF/RDFDescription.hxx" 0031 #include "ROOT/RDF/RVariationsDescription.hxx" 0032 #include "ROOT/RResultPtr.hxx" 0033 #include "ROOT/RSnapshotOptions.hxx" 0034 #include <string_view> 0035 #include "ROOT/RVec.hxx" 0036 #include "ROOT/TypeTraits.hxx" 0037 #include "RtypesCore.h" // for ULong64_t 0038 #include "TDirectory.h" 0039 #include "TH1.h" // For Histo actions 0040 #include "TH2.h" // For Histo actions 0041 #include "TH3.h" // For Histo actions 0042 #include "THn.h" 0043 #include "THnSparse.h" 0044 #include "TProfile.h" 0045 #include "TProfile2D.h" 0046 #include "TStatistic.h" 0047 0048 // TODO: Needed to show the info message in Snapshot, remove in 6.40 0049 #include "ROOT/RLogger.hxx" 0050 #include "ROOT/RVersion.hxx" 0051 #include "TEnv.h" 0052 #include <cstdlib> 0053 #include <cstring> 0054 0055 #include <algorithm> 0056 #include <cstddef> 0057 #include <initializer_list> 0058 #include <iterator> // std::back_insterter 0059 #include <limits> 0060 #include <memory> 0061 #include <set> 0062 #include <sstream> 0063 #include <stdexcept> 0064 #include <string> 0065 #include <type_traits> // is_same, enable_if 0066 #include <typeinfo> 0067 #include <unordered_set> 0068 #include <utility> // std::index_sequence 0069 #include <vector> 0070 #include <any> 0071 0072 class TGraph; 0073 0074 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface 0075 namespace ROOT { 0076 void DisableImplicitMT(); 0077 bool IsImplicitMTEnabled(); 0078 void EnableImplicitMT(UInt_t numthreads); 0079 class RDataFrame; 0080 } // namespace ROOT 0081 namespace cling { 0082 std::string printValue(ROOT::RDataFrame *tdf); 0083 } 0084 0085 namespace ROOT { 0086 namespace RDF { 0087 namespace RDFDetail = ROOT::Detail::RDF; 0088 namespace RDFInternal = ROOT::Internal::RDF; 0089 namespace TTraits = ROOT::TypeTraits; 0090 0091 template <typename Proxied, typename DataSource> 0092 class RInterface; 0093 0094 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>; 0095 } // namespace RDF 0096 0097 namespace Internal { 0098 namespace RDF { 0099 class GraphCreatorHelper; 0100 void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange); 0101 void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end); 0102 void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec); 0103 void TriggerRun(ROOT::RDF::RNode node); 0104 std::string GetDataSourceLabel(const ROOT::RDF::RNode &node); 0105 void SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline); 0106 } // namespace RDF 0107 } // namespace Internal 0108 0109 namespace RDF { 0110 0111 // clang-format off 0112 /** 0113 * \class ROOT::RDF::RInterface 0114 * \ingroup dataframe 0115 * \brief The public interface to the RDataFrame federation of classes. 0116 * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually. 0117 * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default. 0118 * 0119 * The documentation of each method features a one liner illustrating how to use the method, for example showing how 0120 * the majority of the template parameters are automatically deduced requiring no or very little effort by the user. 0121 */ 0122 // clang-format on 0123 template <typename Proxied, typename DataSource = void> 0124 class RInterface : public RInterfaceBase { 0125 using DS_t = DataSource; 0126 using RFilterBase = RDFDetail::RFilterBase; 0127 using RRangeBase = RDFDetail::RRangeBase; 0128 using RLoopManager = RDFDetail::RLoopManager; 0129 friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt 0130 friend class RDFInternal::GraphDrawing::GraphCreatorHelper; 0131 0132 template <typename T, typename W> 0133 friend class RInterface; 0134 0135 friend void RDFInternal::TriggerRun(RNode node); 0136 friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange); 0137 friend void RDFInternal::ChangeBeginAndEndEntries(const RNode &node, Long64_t start, Long64_t end); 0138 friend void RDFInternal::ChangeSpec(const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec); 0139 friend std::string ROOT::Internal::RDF::GetDataSourceLabel(const RNode &node); 0140 friend void ROOT::Internal::RDF::SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline); 0141 std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface. 0142 0143 public: 0144 //////////////////////////////////////////////////////////////////////////// 0145 /// \brief Copy-assignment operator for RInterface. 0146 RInterface &operator=(const RInterface &) = default; 0147 0148 //////////////////////////////////////////////////////////////////////////// 0149 /// \brief Copy-ctor for RInterface. 0150 RInterface(const RInterface &) = default; 0151 0152 //////////////////////////////////////////////////////////////////////////// 0153 /// \brief Move-ctor for RInterface. 0154 RInterface(RInterface &&) = default; 0155 0156 //////////////////////////////////////////////////////////////////////////// 0157 /// \brief Move-assignment operator for RInterface. 0158 RInterface &operator=(RInterface &&) = default; 0159 0160 //////////////////////////////////////////////////////////////////////////// 0161 /// \brief Build a RInterface from a RLoopManager. 0162 /// This constructor is only available for RInterface<RLoopManager>. 0163 template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>> 0164 RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied) 0165 { 0166 } 0167 0168 //////////////////////////////////////////////////////////////////////////// 0169 /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode. 0170 /// Different RDataFrame methods return different C++ types. All nodes, however, 0171 /// can be cast to this common type at the cost of a small performance penalty. 0172 /// This allows, for example, storing RDataFrame nodes in a vector, or passing them 0173 /// around via (non-template, C++11) helper functions. 0174 /// Example usage: 0175 /// ~~~{.cpp} 0176 /// // a function that conditionally adds a Range to a RDataFrame node. 0177 /// RNode MaybeAddRange(RNode df, bool mustAddRange) 0178 /// { 0179 /// return mustAddRange ? df.Range(1) : df; 0180 /// } 0181 /// // use as : 0182 /// ROOT::RDataFrame df(10); 0183 /// auto maybeRanged = MaybeAddRange(df, true); 0184 /// ~~~ 0185 /// Note that it is not a problem to pass RNode's by value. 0186 operator RNode() const 0187 { 0188 return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister); 0189 } 0190 0191 //////////////////////////////////////////////////////////////////////////// 0192 /// \brief Append a filter to the call graph. 0193 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` 0194 /// signalling whether the event has passed the selection (true) or not (false). 0195 /// \param[in] columns Names of the columns/branches in input to the filter function. 0196 /// \param[in] name Optional name of this filter. See `Report`. 0197 /// \return the filter node of the computation graph. 0198 /// 0199 /// Append a filter node at the point of the call graph corresponding to the 0200 /// object this method is called on. 0201 /// The callable `f` should not have side-effects (e.g. modification of an 0202 /// external or static variable) to ensure correct results when implicit 0203 /// multi-threading is active. 0204 /// 0205 /// RDataFrame only evaluates filters when necessary: if multiple filters 0206 /// are chained one after another, they are executed in order and the first 0207 /// one returning false causes the event to be discarded. 0208 /// Even if multiple actions or transformations depend on the same filter, 0209 /// it is executed once per entry. If its result is requested more than 0210 /// once, the cached result is served. 0211 /// 0212 /// ### Example usage: 0213 /// ~~~{.cpp} 0214 /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y" 0215 /// auto filtered = df.Filter(myCut, {"x", "y"}); 0216 /// 0217 /// // String: it must contain valid C++ except that column names can be used instead of variable names 0218 /// auto filtered = df.Filter("x*y > 0"); 0219 /// ~~~ 0220 /// 0221 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0222 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0223 /// ~~~{.cpp} 0224 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0225 /// ~~~ 0226 /// but instead this will: 0227 /// ~~~{.cpp} 0228 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0229 /// ~~~ 0230 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0231 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> 0232 Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "") 0233 { 0234 RDFInternal::CheckFilter(f); 0235 using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types; 0236 constexpr auto nColumns = ColTypes_t::list_size; 0237 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); 0238 CheckAndFillDSColumns(validColumnNames, ColTypes_t()); 0239 0240 using F_t = RDFDetail::RFilter<F, Proxied>; 0241 0242 auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name); 0243 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister); 0244 } 0245 0246 //////////////////////////////////////////////////////////////////////////// 0247 /// \brief Append a filter to the call graph. 0248 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` 0249 /// signalling whether the event has passed the selection (true) or not (false). 0250 /// \param[in] name Optional name of this filter. See `Report`. 0251 /// \return the filter node of the computation graph. 0252 /// 0253 /// Refer to the first overload of this method for the full documentation. 0254 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0255 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name) 0256 { 0257 // The sfinae is there in order to pick up the overloaded method which accepts two strings 0258 // rather than this template method. 0259 return Filter(f, {}, name); 0260 } 0261 0262 //////////////////////////////////////////////////////////////////////////// 0263 /// \brief Append a filter to the call graph. 0264 /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool` 0265 /// signalling whether the event has passed the selection (true) or not (false). 0266 /// \param[in] columns Names of the columns/branches in input to the filter function. 0267 /// \return the filter node of the computation graph. 0268 /// 0269 /// Refer to the first overload of this method for the full documentation. 0270 template <typename F> 0271 RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns) 0272 { 0273 return Filter(f, ColumnNames_t{columns}); 0274 } 0275 0276 //////////////////////////////////////////////////////////////////////////// 0277 /// \brief Append a filter to the call graph. 0278 /// \param[in] expression The filter expression in C++ 0279 /// \param[in] name Optional name of this filter. See `Report`. 0280 /// \return the filter node of the computation graph. 0281 /// 0282 /// The expression is just-in-time compiled and used to filter entries. It must 0283 /// be valid C++ syntax in which variable names are substituted with the names 0284 /// of branches/columns. 0285 /// 0286 /// ### Example usage: 0287 /// ~~~{.cpp} 0288 /// auto filtered_df = df.Filter("myCollection.size() > 3"); 0289 /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size"); 0290 /// ~~~ 0291 /// 0292 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0293 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0294 /// ~~~{.cpp} 0295 /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0296 /// ~~~ 0297 /// but instead this will: 0298 /// ~~~{.cpp} 0299 /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))") 0300 /// ~~~ 0301 RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "") 0302 { 0303 // deleted by the jitted call to JitFilterHelper 0304 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0305 using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type; 0306 RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister); 0307 const auto jittedFilter = 0308 RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fColRegister, nullptr, GetDataSource()); 0309 0310 return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fColRegister); 0311 } 0312 0313 //////////////////////////////////////////////////////////////////////////// 0314 /// \brief Discard entries with missing values 0315 /// \param[in] column Column name whose entries with missing values should be discarded 0316 /// \return The filter node of the computation graph 0317 /// 0318 /// This operation is useful in case an entry of the dataset is incomplete, 0319 /// i.e. if one or more of the columns do not have valid values. If the value 0320 /// of the input column is missing for an entry, the entire entry will be 0321 /// discarded from the rest of this branch of the computation graph. 0322 /// 0323 /// Use cases include: 0324 /// * When processing multiple files, one or more of them is missing a column 0325 /// * In horizontal joining with entry matching, a certain dataset has no 0326 /// match for the current entry. 0327 /// 0328 /// ### Example usage: 0329 /// 0330 /// \code{.py} 0331 /// # Assume a dataset with columns [idx, x] matching another dataset with 0332 /// # columns [idx, y]. For idx == 42, the right-hand dataset has no match 0333 /// df = ROOT.RDataFrame(dataset) 0334 /// df_nomissing = df.FilterAvailable("idx").Define("z", "x + y") 0335 /// colz = df_nomissing.Take[int]("z") 0336 /// \endcode 0337 /// 0338 /// \code{.cpp} 0339 /// // Assume a dataset with columns [idx, x] matching another dataset with 0340 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match 0341 /// ROOT::RDataFrame df{dataset}; 0342 /// auto df_nomissing = df.FilterAvailable("idx") 0343 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"}); 0344 /// auto colz = df_nomissing.Take<int>("z"); 0345 /// \endcode 0346 /// 0347 /// \note See FilterMissing() if you want to keep only the entries with 0348 /// missing values instead. 0349 RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterAvailable(std::string_view column) 0350 { 0351 const auto columns = ColumnNames_t{column.data()}; 0352 // For now disable this functionality in case of an empty data source and 0353 // the column name was not defined previously. 0354 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS") 0355 throw std::runtime_error("Unknown column: \"" + std::string(column) + "\""); 0356 using F_t = RDFDetail::RFilterWithMissingValues<Proxied>; 0357 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ true, fProxiedPtr, fColRegister, columns); 0358 CheckAndFillDSColumns(columns, TTraits::TypeList<void>{}); 0359 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister); 0360 } 0361 0362 //////////////////////////////////////////////////////////////////////////// 0363 /// \brief Keep only the entries that have missing values. 0364 /// \param[in] column Column name whose entries with missing values should be kept 0365 /// \return The filter node of the computation graph 0366 /// 0367 /// This operation is useful in case an entry of the dataset is incomplete, 0368 /// i.e. if one or more of the columns do not have valid values. It only 0369 /// keeps the entries for which the value of the input column is missing. 0370 /// 0371 /// Use cases include: 0372 /// * When processing multiple files, one or more of them is missing a column 0373 /// * In horizontal joining with entry matching, a certain dataset has no 0374 /// match for the current entry. 0375 /// 0376 /// ### Example usage: 0377 /// 0378 /// \code{.py} 0379 /// # Assume a dataset made of two files vertically chained together, one has 0380 /// # column "x" and the other has column "y" 0381 /// df = ROOT.RDataFrame(dataset) 0382 /// df_valid_col_x = df.FilterMissing("y") 0383 /// df_valid_col_y = df.FilterMissing("x") 0384 /// display_x = df_valid_col_x.Display(("x",)) 0385 /// display_y = df_valid_col_y.Display(("y",)) 0386 /// \endcode 0387 /// 0388 /// \code{.cpp} 0389 /// // Assume a dataset made of two files vertically chained together, one has 0390 /// // column "x" and the other has column "y" 0391 /// ROOT.RDataFrame df{dataset}; 0392 /// auto df_valid_col_x = df.FilterMissing("y"); 0393 /// auto df_valid_col_y = df.FilterMissing("x"); 0394 /// auto display_x = df_valid_col_x.Display<int>({"x"}); 0395 /// auto display_y = df_valid_col_y.Display<int>({"y"}); 0396 /// \endcode 0397 /// 0398 /// \note See FilterAvailable() if you want to discard the entries in case 0399 /// there is a missing value instead. 0400 RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterMissing(std::string_view column) 0401 { 0402 const auto columns = ColumnNames_t{column.data()}; 0403 // For now disable this functionality in case of an empty data source and 0404 // the column name was not defined previously. 0405 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS") 0406 throw std::runtime_error("Unknown column: \"" + std::string(column) + "\""); 0407 using F_t = RDFDetail::RFilterWithMissingValues<Proxied>; 0408 auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ false, fProxiedPtr, fColRegister, columns); 0409 CheckAndFillDSColumns(columns, TTraits::TypeList<void>{}); 0410 return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister); 0411 } 0412 0413 // clang-format off 0414 //////////////////////////////////////////////////////////////////////////// 0415 /// \brief Define a new column. 0416 /// \param[in] name The name of the defined column. 0417 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0418 /// \param[in] columns Names of the columns/branches in input to the producer function. 0419 /// \return the first node of the computation graph for which the new quantity is defined. 0420 /// 0421 /// Define a column that will be visible from all subsequent nodes 0422 /// of the functional chain. The `expression` is only evaluated for entries that pass 0423 /// all the preceding filters. 0424 /// A new variable is created called `name`, accessible as if it was contained 0425 /// in the dataset from subsequent transformations/actions. 0426 /// 0427 /// Use cases include: 0428 /// * caching the results of complex calculations for easy and efficient multiple access 0429 /// * extraction of quantities of interest from complex objects 0430 /// 0431 /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph. 0432 /// 0433 /// ### Example usage: 0434 /// ~~~{.cpp} 0435 /// // assuming a function with signature: 0436 /// double myComplexCalculation(const RVec<float> &muon_pts); 0437 /// // we can pass it directly to Define 0438 /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"}); 0439 /// // alternatively, we can pass the body of the function as a string, as in Filter: 0440 /// auto df_with_define = df.Define("newColumn", "x*x + y*y"); 0441 /// ~~~ 0442 /// 0443 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0444 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0445 /// ~~~{.cpp} 0446 /// df.Define("x2", "Map(v, [](float e) { return e*e; })") 0447 /// ~~~ 0448 /// but instead this will: 0449 /// ~~~{.cpp} 0450 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })") 0451 /// ~~~ 0452 template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0453 RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0454 { 0455 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define"); 0456 } 0457 // clang-format on 0458 0459 // clang-format off 0460 //////////////////////////////////////////////////////////////////////////// 0461 /// \brief Define a new column with a value dependent on the processing slot. 0462 /// \param[in] name The name of the defined column. 0463 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0464 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number). 0465 /// \return the first node of the computation graph for which the new quantity is defined. 0466 /// 0467 /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner. 0468 /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types 0469 /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer 0470 /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with 0471 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. 0472 /// Note that there is no guarantee as to how often each slot will be reached during the event loop. 0473 /// 0474 /// The following two calls are equivalent, although `DefineSlot` is slightly more performant: 0475 /// ~~~{.cpp} 0476 /// int function(unsigned int, double, double); 0477 /// df.Define("x", function, {"rdfslot_", "column1", "column2"}) 0478 /// df.DefineSlot("x", function, {"column1", "column2"}) 0479 /// ~~~ 0480 /// 0481 /// See Define() for more information. 0482 template <typename F> 0483 RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0484 { 0485 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot"); 0486 } 0487 // clang-format on 0488 0489 // clang-format off 0490 //////////////////////////////////////////////////////////////////////////// 0491 /// \brief Define a new column with a value dependent on the processing slot and the current entry. 0492 /// \param[in] name The name of the defined column. 0493 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0494 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry). 0495 /// \return the first node of the computation graph for which the new quantity is defined. 0496 /// 0497 /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom 0498 /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...` 0499 /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned 0500 /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with 0501 /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. 0502 /// Note that there is no guarantee as to how often each slot will be reached during the event loop. 0503 /// The second parameter is reserved for a `ULong64_t` representing the current entry being processed by the current thread. 0504 /// 0505 /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant: 0506 /// ~~~{.cpp} 0507 /// int function(unsigned int, ULong64_t, double, double); 0508 /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"}) 0509 /// DefineSlotEntry("x", function, {"column1", "column2"}) 0510 /// ~~~ 0511 /// 0512 /// See Define() for more information. 0513 template <typename F> 0514 RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0515 { 0516 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns, 0517 "DefineSlotEntry"); 0518 } 0519 // clang-format on 0520 0521 //////////////////////////////////////////////////////////////////////////// 0522 /// \brief Define a new column. 0523 /// \param[in] name The name of the defined column. 0524 /// \param[in] expression An expression in C++ which represents the defined value 0525 /// \return the first node of the computation graph for which the new quantity is defined. 0526 /// 0527 /// The expression is just-in-time compiled and used to produce the column entries. 0528 /// It must be valid C++ syntax in which variable names are substituted with the names 0529 /// of branches/columns. 0530 /// 0531 /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested 0532 /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work: 0533 /// ~~~{.cpp} 0534 /// df.Define("x2", "Map(v, [](float e) { return e*e; })") 0535 /// ~~~ 0536 /// but instead this will: 0537 /// ~~~{.cpp} 0538 /// df.Define("x2", "return Map(v, [](float e) { return e*e; })") 0539 /// ~~~ 0540 /// 0541 /// Refer to the first overload of this method for the full documentation. 0542 RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression) 0543 { 0544 constexpr auto where = "Define"; 0545 RDFInternal::CheckValidCppVarName(name, where); 0546 // these checks must be done before jitting lest we throw exceptions in jitted code 0547 RDFInternal::CheckForRedefinition(where, name, fColRegister, 0548 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 0549 0550 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0551 auto jittedDefine = 0552 RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister, upcastNodeOnHeap); 0553 0554 RDFInternal::RColumnRegister newCols(fColRegister); 0555 newCols.AddDefine(std::move(jittedDefine)); 0556 0557 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0558 0559 return newInterface; 0560 } 0561 0562 //////////////////////////////////////////////////////////////////////////// 0563 /// \brief Overwrite the value and/or type of an existing column. 0564 /// \param[in] name The name of the column to redefine. 0565 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0566 /// \param[in] columns Names of the columns/branches in input to the expression. 0567 /// \return the first node of the computation graph for which the quantity is redefined. 0568 /// 0569 /// The old value of the column can be used as an input for the expression. 0570 /// 0571 /// An exception is thrown in case the column to redefine does not already exist. 0572 /// See Define() for more information. 0573 template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0> 0574 RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0575 { 0576 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine"); 0577 } 0578 0579 // clang-format off 0580 //////////////////////////////////////////////////////////////////////////// 0581 /// \brief Overwrite the value and/or type of an existing column. 0582 /// \param[in] name The name of the column to redefine. 0583 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0584 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot). 0585 /// \return the first node of the computation graph for which the new quantity is defined. 0586 /// 0587 /// The old value of the column can be used as an input for the expression. 0588 /// An exception is thrown in case the column to redefine does not already exist. 0589 /// 0590 /// See DefineSlot() for more information. 0591 // clang-format on 0592 template <typename F> 0593 RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0594 { 0595 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot"); 0596 } 0597 0598 // clang-format off 0599 //////////////////////////////////////////////////////////////////////////// 0600 /// \brief Overwrite the value and/or type of an existing column. 0601 /// \param[in] name The name of the column to redefine. 0602 /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column. 0603 /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry). 0604 /// \return the first node of the computation graph for which the new quantity is defined. 0605 /// 0606 /// The old value of the column can be used as an input for the expression. 0607 /// An exception is thrown in case the column to re-define does not already exist. 0608 /// 0609 /// See DefineSlotEntry() for more information. 0610 // clang-format on 0611 template <typename F> 0612 RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {}) 0613 { 0614 return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns, 0615 "RedefineSlotEntry"); 0616 } 0617 0618 //////////////////////////////////////////////////////////////////////////// 0619 /// \brief Overwrite the value and/or type of an existing column. 0620 /// \param[in] name The name of the column to redefine. 0621 /// \param[in] expression An expression in C++ which represents the defined value 0622 /// \return the first node of the computation graph for which the new quantity is defined. 0623 /// 0624 /// The expression is just-in-time compiled and used to produce the column entries. 0625 /// It must be valid C++ syntax in which variable names are substituted with the names 0626 /// of branches/columns. 0627 /// 0628 /// The old value of the column can be used as an input for the expression. 0629 /// An exception is thrown in case the column to re-define does not already exist. 0630 /// 0631 /// Aliases cannot be overridden. See the corresponding Define() overload for more information. 0632 RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression) 0633 { 0634 constexpr auto where = "Redefine"; 0635 RDFInternal::CheckValidCppVarName(name, where); 0636 RDFInternal::CheckForDefinition(where, name, fColRegister, 0637 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 0638 RDFInternal::CheckForNoVariations(where, name, fColRegister); 0639 0640 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0641 auto jittedDefine = 0642 RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister, upcastNodeOnHeap); 0643 0644 RDFInternal::RColumnRegister newCols(fColRegister); 0645 newCols.AddDefine(std::move(jittedDefine)); 0646 0647 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0648 0649 return newInterface; 0650 } 0651 0652 //////////////////////////////////////////////////////////////////////////// 0653 /// \brief In case the value in the given column is missing, provide a default value 0654 /// \tparam T The type of the column 0655 /// \param[in] column Column name where missing values should be replaced by the given default value 0656 /// \param[in] defaultValue Value to provide instead of a missing value 0657 /// \return The node of the graph that will provide a default value 0658 /// 0659 /// This operation is useful in case an entry of the dataset is incomplete, 0660 /// i.e. if one or more of the columns do not have valid values. It does not 0661 /// modify the values of the column, but in case any entry is missing, it 0662 /// will provide the default value to downstream nodes instead. 0663 /// 0664 /// Use cases include: 0665 /// * When processing multiple files, one or more of them is missing a column 0666 /// * In horizontal joining with entry matching, a certain dataset has no 0667 /// match for the current entry. 0668 /// 0669 /// ### Example usage: 0670 /// 0671 /// \code{.cpp} 0672 /// // Assume a dataset with columns [idx, x] matching another dataset with 0673 /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match 0674 /// ROOT::RDataFrame df{dataset}; 0675 /// auto df_default = df.DefaultValueFor("y", 33) 0676 /// .Define("z", [](int x, int y) { return x + y; }, {"x", "y"}); 0677 /// auto colz = df_default.Take<int>("z"); 0678 /// \endcode 0679 /// 0680 /// \code{.py} 0681 /// df = ROOT.RDataFrame(dataset) 0682 /// df_default = df.DefaultValueFor("y", 33).Define("z", "x + y") 0683 /// colz = df_default.Take[int]("z") 0684 /// \endcode 0685 template <typename T> 0686 RInterface<Proxied, DS_t> DefaultValueFor(std::string_view column, const T &defaultValue) 0687 { 0688 constexpr auto where{"DefaultValueFor"}; 0689 RDFInternal::CheckForNoVariations(where, column, fColRegister); 0690 // For now disable this functionality in case of an empty data source and 0691 // the column name was not defined previously. 0692 if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS") 0693 RDFInternal::CheckForDefinition(where, column, fColRegister, 0694 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 0695 0696 // Declare return type to the interpreter, for future use by jitted actions 0697 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(T)); 0698 if (retTypeName.empty()) { 0699 // The type is not known to the interpreter. 0700 // We must not error out here, but if/when this column is used in jitted code 0701 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(T)); 0702 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; 0703 } 0704 0705 const auto validColumnNames = ColumnNames_t{column.data()}; 0706 auto newColumn = std::make_shared<ROOT::Internal::RDF::RDefaultValueFor<T>>( 0707 column, retTypeName, defaultValue, validColumnNames, fColRegister, *fLoopManager); 0708 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>{}); 0709 0710 RDFInternal::RColumnRegister newCols(fColRegister); 0711 newCols.AddDefine(std::move(newColumn)); 0712 0713 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0714 0715 return newInterface; 0716 } 0717 0718 // clang-format off 0719 //////////////////////////////////////////////////////////////////////////// 0720 /// \brief Define a new column that is updated when the input sample changes. 0721 /// \param[in] name The name of the defined column. 0722 /// \param[in] expression A C++ callable that computes the new value of the defined column. 0723 /// \return the first node of the computation graph for which the new quantity is defined. 0724 /// 0725 /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)` 0726 /// where: 0727 /// - `T` is the type of the defined column 0728 /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify 0729 /// the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame. 0730 /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is 0731 /// being processed (see the class docs for more information). 0732 /// 0733 /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being 0734 /// processed or to inject a callback into the event loop that is only called when the processing of a new sample 0735 /// starts rather than at every entry. 0736 /// 0737 /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often. 0738 /// 0739 /// ### Example usage: 0740 /// ~~~{.cpp} 0741 /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}}; 0742 /// df.DefinePerSample("weightbysample", 0743 /// [](unsigned int slot, const ROOT::RDF::RSampleInfo &id) 0744 /// { return id.Contains("sample1") ? 1.0f : 2.0f; }); 0745 /// ~~~ 0746 // clang-format on 0747 // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch 0748 template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type> 0749 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression) 0750 { 0751 RDFInternal::CheckValidCppVarName(name, "DefinePerSample"); 0752 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, 0753 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 0754 0755 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t)); 0756 if (retTypeName.empty()) { 0757 // The type is not known to the interpreter. 0758 // We must not error out here, but if/when this column is used in jitted code 0759 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t)); 0760 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; 0761 } 0762 0763 auto newColumn = 0764 std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager); 0765 0766 RDFInternal::RColumnRegister newCols(fColRegister); 0767 newCols.AddDefine(std::move(newColumn)); 0768 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0769 return newInterface; 0770 } 0771 0772 // clang-format off 0773 //////////////////////////////////////////////////////////////////////////// 0774 /// \brief Define a new column that is updated when the input sample changes. 0775 /// \param[in] name The name of the defined column. 0776 /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value. 0777 /// \return the first node of the computation graph for which the new quantity is defined. 0778 /// 0779 /// The expression is just-in-time compiled and used to produce the column entries. 0780 /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is 0781 /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the 0782 /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information. 0783 /// 0784 /// ### Example usage: 0785 /// ~~~{.py} 0786 /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root']) 0787 /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f') 0788 /// ~~~ 0789 /// 0790 /// \note 0791 /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this 0792 /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as 0793 /// input parameters. This is for example the correct way to call this overload when working in PyROOT: 0794 /// ~~~{.py} 0795 /// ROOT.gInterpreter.Declare( 0796 /// """ 0797 /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){ 0798 /// return id.Contains("sample1") ? 1.0f : 2.0f; 0799 /// } 0800 /// """) 0801 /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"]) 0802 /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)") 0803 /// ~~~ 0804 /// 0805 /// \note 0806 /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain 0807 /// column names other than those mentioned above: the expression is evaluated once before the processing of the 0808 /// sample even starts, so column values are not accessible. 0809 // clang-format on 0810 RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression) 0811 { 0812 RDFInternal::CheckValidCppVarName(name, "DefinePerSample"); 0813 // these checks must be done before jitting lest we throw exceptions in jitted code 0814 RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, 0815 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 0816 0817 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 0818 auto jittedDefine = 0819 RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap); 0820 0821 RDFInternal::RColumnRegister newCols(fColRegister); 0822 newCols.AddDefine(std::move(jittedDefine)); 0823 0824 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 0825 0826 return newInterface; 0827 } 0828 0829 /// \brief Register systematic variations for a single existing column using custom variation tags. 0830 /// \param[in] colName name of the column for which varied values are provided. 0831 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0832 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0833 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0834 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0835 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 0836 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0837 /// 0838 /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to 0839 /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for 0840 /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with 0841 /// ROOT::RDF::Experimental::VariationsFor (see the example below). 0842 /// 0843 /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and 0844 /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or 0845 /// indirect dependencies of the column values on some registered variations). The keys will be a composition of 0846 /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below. 0847 /// 0848 /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt. 0849 /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"): 0850 /// ~~~{.cpp} 0851 /// auto nominal_hx = 0852 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"}) 0853 /// .Filter("pt > k") 0854 /// .Define("x", someFunc, {"pt"}) 0855 /// .Histo1D("x"); 0856 /// 0857 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0858 /// hx["nominal"].Draw(); 0859 /// hx["pt:down"].Draw("SAME"); 0860 /// hx["pt:up"].Draw("SAME"); 0861 /// ~~~ 0862 /// RDataFrame computes all variations as part of a single loop over the data. 0863 /// In particular, this means that I/O and computation of values shared 0864 /// among variations only happen once for all variations. Thus, the event loop 0865 /// run-time typically scales much better than linearly with the number of 0866 /// variations. 0867 /// 0868 /// RDataFrame lazily computes the varied values required to produce the 0869 /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref 0870 /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only 0871 /// run for the nominal case. 0872 /// 0873 /// See other overloads for examples when variations are added for multiple existing columns, 0874 /// or when the tags are auto-generated instead of being directly defined. 0875 template <typename F> 0876 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, 0877 const std::vector<std::string> &variationTags, std::string_view variationName = "") 0878 { 0879 std::vector<std::string> colNames{{std::string(colName)}}; 0880 const std::string theVariationName{variationName.empty() ? colName : variationName}; 0881 0882 return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags, 0883 theVariationName); 0884 } 0885 0886 /// \brief Register systematic variations for a single existing column using auto-generated variation tags. 0887 /// \param[in] colName name of the column for which varied values are provided. 0888 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0889 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0890 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0891 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0892 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0893 /// `"1"`, etc. 0894 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0895 /// colName is used if none is provided. 0896 /// 0897 /// This overload of Vary takes an nVariations parameter instead of a list of tag names. 0898 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N` 0899 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`. 0900 /// 0901 /// Example usage: 0902 /// ~~~{.cpp} 0903 /// auto nominal_hx = 0904 /// df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2) 0905 /// .Histo1D("x"); 0906 /// 0907 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0908 /// hx["nominal"].Draw(); 0909 /// hx["x:0"].Draw("SAME"); 0910 /// hx["x:1"].Draw("SAME"); 0911 /// ~~~ 0912 /// 0913 /// \note See also This Vary() overload for more information. 0914 template <typename F> 0915 RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns, 0916 std::size_t nVariations, std::string_view variationName = "") 0917 { 0918 R__ASSERT(nVariations > 0 && "Must have at least one variation."); 0919 0920 std::vector<std::string> variationTags; 0921 variationTags.reserve(nVariations); 0922 for (std::size_t i = 0u; i < nVariations; ++i) 0923 variationTags.emplace_back(std::to_string(i)); 0924 0925 const std::string theVariationName{variationName.empty() ? colName : variationName}; 0926 0927 return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName); 0928 } 0929 0930 /// \brief Register systematic variations for multiple existing columns using custom variation tags. 0931 /// \param[in] colNames set of names of the columns for which varied values are provided. 0932 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0933 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0934 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0935 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0936 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 0937 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"` 0938 /// 0939 /// This overload of Vary takes a list of column names as first argument and 0940 /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each 0941 /// affected column. The `variationTags` are defined as `{"down", "up"}`. 0942 /// 0943 /// Example usage: 0944 /// ~~~{.cpp} 0945 /// // produce variations "ptAndEta:down" and "ptAndEta:up" 0946 /// auto nominal_hx = 0947 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously 0948 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; }, 0949 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied 0950 /// {"down", "up"}, // variation tags 0951 /// "ptAndEta") // variation name 0952 /// .Histo1D("pt", "eta"); 0953 /// 0954 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 0955 /// hx["nominal"].Draw(); 0956 /// hx["ptAndEta:down"].Draw("SAME"); 0957 /// hx["ptAndEta:up"].Draw("SAME"); 0958 /// ~~~ 0959 /// 0960 /// \note See also This Vary() overload for more information. 0961 0962 template <typename F> 0963 RInterface<Proxied, DS_t> 0964 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, 0965 const std::vector<std::string> &variationTags, std::string_view variationName) 0966 { 0967 return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName); 0968 } 0969 0970 /// \brief Register systematic variations for multiple existing columns using custom variation tags. 0971 /// \param[in] colNames set of names of the columns for which varied values are provided. 0972 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0973 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0974 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0975 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0976 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 0977 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 0978 /// colName is used if none is provided. 0979 /// 0980 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list 0981 /// is avoided. 0982 /// 0983 /// \note See also This Vary() overload for more information. 0984 template <typename F> 0985 RInterface<Proxied, DS_t> 0986 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns, 0987 const std::vector<std::string> &variationTags, std::string_view variationName) 0988 { 0989 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName); 0990 } 0991 0992 /// \brief Register systematic variations for multiple existing columns using auto-generated tags. 0993 /// \param[in] colNames set of names of the columns for which varied values are provided. 0994 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 0995 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 0996 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 0997 /// \param[in] inputColumns the names of the columns to be passed to the callable. 0998 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 0999 /// `"1"`, etc. 1000 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1001 /// colName is used if none is provided. 1002 /// 1003 /// This overload of Vary takes a list of column names as first argument. 1004 /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names 1005 /// will be auto-generated as the sequence 0...``nVariations-1``. 1006 /// 1007 /// Example usage: 1008 /// ~~~{.cpp} 1009 /// auto nominal_hx = 1010 /// df.Vary({"pt", "eta"}, // the columns that will vary simultaneously 1011 /// [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; }, 1012 /// {"pt", "eta"}, // inputs to the Vary expression, independent of what columns are varied 1013 /// 2, // auto-generated variation tags 1014 /// "ptAndEta") // variation name 1015 /// .Histo1D("pt", "eta"); 1016 /// 1017 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 1018 /// hx["nominal"].Draw(); 1019 /// hx["ptAndEta:0"].Draw("SAME"); 1020 /// hx["ptAndEta:1"].Draw("SAME"); 1021 /// ~~~ 1022 /// 1023 /// \note See also This Vary() overload for more information. 1024 template <typename F> 1025 RInterface<Proxied, DS_t> 1026 Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, 1027 std::size_t nVariations, std::string_view variationName) 1028 { 1029 R__ASSERT(nVariations > 0 && "Must have at least one variation."); 1030 1031 std::vector<std::string> variationTags; 1032 variationTags.reserve(nVariations); 1033 for (std::size_t i = 0u; i < nVariations; ++i) 1034 variationTags.emplace_back(std::to_string(i)); 1035 1036 return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName); 1037 } 1038 1039 /// \brief Register systematic variations for for multiple existing columns using custom variation tags. 1040 /// \param[in] colNames set of names of the columns for which varied values are provided. 1041 /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can 1042 /// take any column values as input, similarly to what happens during Filter and Define calls. It must 1043 /// return an RVec of varied values, one for each variation tag, in the same order as the tags. 1044 /// \param[in] inputColumns the names of the columns to be passed to the callable. 1045 /// \param[in] inputColumns the names of the columns to be passed to the callable. 1046 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 1047 /// `"1"`, etc. 1048 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1049 /// colName is used if none is provided. 1050 /// 1051 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list 1052 /// is avoided. 1053 /// 1054 /// \note See also This Vary() overload for more information. 1055 template <typename F> 1056 RInterface<Proxied, DS_t> 1057 Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns, 1058 std::size_t nVariations, std::string_view variationName) 1059 { 1060 return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName); 1061 } 1062 1063 /// \brief Register systematic variations for a single existing column using custom variation tags. 1064 /// \param[in] colName name of the column for which varied values are provided. 1065 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied 1066 /// values for the specified column. 1067 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 1068 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1069 /// colName is used if none is provided. 1070 /// 1071 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time 1072 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are 1073 /// defined as `{"down", "up"}`. 1074 /// ~~~{.cpp} 1075 /// auto nominal_hx = 1076 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"}) 1077 /// .Filter("pt > k") 1078 /// .Define("x", someFunc, {"pt"}) 1079 /// .Histo1D("x"); 1080 /// 1081 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 1082 /// hx["nominal"].Draw(); 1083 /// hx["pt:down"].Draw("SAME"); 1084 /// hx["pt:up"].Draw("SAME"); 1085 /// ~~~ 1086 /// 1087 /// \note See also This Vary() overload for more information. 1088 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, 1089 const std::vector<std::string> &variationTags, std::string_view variationName = "") 1090 { 1091 std::vector<std::string> colNames{{std::string(colName)}}; 1092 const std::string theVariationName{variationName.empty() ? colName : variationName}; 1093 1094 return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true); 1095 } 1096 1097 /// \brief Register systematic variations for a single existing column using auto-generated variation tags. 1098 /// \param[in] colName name of the column for which varied values are provided. 1099 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied 1100 /// values for the specified column. 1101 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 1102 /// `"1"`, etc. 1103 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1104 /// colName is used if none is provided. 1105 /// 1106 /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time 1107 /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are 1108 /// auto-generated. 1109 /// ~~~{.cpp} 1110 /// auto nominal_hx = 1111 /// df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2) 1112 /// .Histo1D("pt"); 1113 /// 1114 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 1115 /// hx["nominal"].Draw(); 1116 /// hx["pt:0"].Draw("SAME"); 1117 /// hx["pt:1"].Draw("SAME"); 1118 /// ~~~ 1119 /// 1120 /// \note See also This Vary() overload for more information. 1121 RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations, 1122 std::string_view variationName = "") 1123 { 1124 std::vector<std::string> variationTags; 1125 variationTags.reserve(nVariations); 1126 for (std::size_t i = 0u; i < nVariations; ++i) 1127 variationTags.emplace_back(std::to_string(i)); 1128 1129 return Vary(colName, expression, std::move(variationTags), variationName); 1130 } 1131 1132 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags. 1133 /// \param[in] colNames set of names of the columns for which varied values are provided. 1134 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied 1135 /// values for the specified columns. 1136 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 1137 /// `"1"`, etc. 1138 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1139 /// 1140 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time 1141 /// compiled. It takes an nVariations parameter instead of a list of tag names. 1142 /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N` 1143 /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`. 1144 /// The example below shows how Vary() is used while dealing with multiple columns. 1145 /// 1146 /// ~~~{.cpp} 1147 /// auto nominal_hx = 1148 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy") 1149 /// .Histo1D("x", "y"); 1150 /// 1151 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 1152 /// hx["nominal"].Draw(); 1153 /// hx["xy:0"].Draw("SAME"); 1154 /// hx["xy:1"].Draw("SAME"); 1155 /// ~~~ 1156 /// 1157 /// \note See also This Vary() overload for more information. 1158 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression, 1159 std::size_t nVariations, std::string_view variationName) 1160 { 1161 std::vector<std::string> variationTags; 1162 variationTags.reserve(nVariations); 1163 for (std::size_t i = 0u; i < nVariations; ++i) 1164 variationTags.emplace_back(std::to_string(i)); 1165 1166 return Vary(colNames, expression, std::move(variationTags), variationName); 1167 } 1168 1169 /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags. 1170 /// \param[in] colNames set of names of the columns for which varied values are provided. 1171 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied 1172 /// values for the specified column. 1173 /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`, 1174 /// `"1"`, etc. 1175 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1176 /// colName is used if none is provided. 1177 /// 1178 /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list 1179 /// is avoided. 1180 /// 1181 /// \note See also This Vary() overload for more information. 1182 RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression, 1183 std::size_t nVariations, std::string_view variationName) 1184 { 1185 return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName); 1186 } 1187 1188 /// \brief Register systematic variations for multiple existing columns using custom variation tags. 1189 /// \param[in] colNames set of names of the columns for which varied values are provided. 1190 /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied 1191 /// values for the specified columns. 1192 /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`. 1193 /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`. 1194 /// 1195 /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time 1196 /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as 1197 /// `{"down", "up"}`. 1198 /// ~~~{.cpp} 1199 /// auto nominal_hx = 1200 /// df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy") 1201 /// .Histo1D("x", "y"); 1202 /// 1203 /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx); 1204 /// hx["nominal"].Draw(); 1205 /// hx["xy:down"].Draw("SAME"); 1206 /// hx["xy:up"].Draw("SAME"); 1207 /// ~~~ 1208 /// 1209 /// \note See also This Vary() overload for more information. 1210 RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression, 1211 const std::vector<std::string> &variationTags, std::string_view variationName) 1212 { 1213 return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false); 1214 } 1215 1216 //////////////////////////////////////////////////////////////////////////// 1217 /// \brief Allow to refer to a column with a different name. 1218 /// \param[in] alias name of the column alias 1219 /// \param[in] columnName of the column to be aliased 1220 /// \return the first node of the computation graph for which the alias is available. 1221 /// 1222 /// Aliasing an alias is supported. 1223 /// 1224 /// ### Example usage: 1225 /// ~~~{.cpp} 1226 /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!"); 1227 /// ~~~ 1228 RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName) 1229 { 1230 // The symmetry with Define is clear. We want to: 1231 // - Create globally the alias and return this very node, unchanged 1232 // - Make aliases accessible based on chains and not globally 1233 1234 // Helper to find out if a name is a column 1235 auto &dsColumnNames = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}; 1236 1237 constexpr auto where = "Alias"; 1238 RDFInternal::CheckValidCppVarName(alias, where); 1239 // If the alias name is a column name, there is a problem 1240 RDFInternal::CheckForRedefinition(where, alias, fColRegister, dsColumnNames); 1241 1242 const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0]; 1243 1244 RDFInternal::RColumnRegister newCols(fColRegister); 1245 newCols.AddAlias(alias, validColumnName); 1246 1247 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 1248 1249 return newInterface; 1250 } 1251 1252 //////////////////////////////////////////////////////////////////////////// 1253 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`. 1254 /// \deprecated Use other overloads that do not require template arguments. 1255 /// \tparam ColumnTypes variadic list of branch/column types. 1256 /// \param[in] treename The name of the output TTree or RNTuple. 1257 /// \param[in] filename The name of the output TFile. 1258 /// \param[in] columnList The list of names of the columns/branches/fields to be written. 1259 /// \param[in] options RSnapshotOptions struct with extra options to pass to the output TFile and TTree/RNTuple. 1260 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1261 /// 1262 template <typename... ColumnTypes> 1263 R__DEPRECATED( 1264 6, 40, "Snapshot does not need template arguments anymore, you can safely remove them from this function call.") 1265 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, 1266 const ColumnNames_t &columnList, 1267 const RSnapshotOptions &options = RSnapshotOptions()) 1268 { 1269 return Snapshot(treename, filename, columnList, options); 1270 } 1271 1272 //////////////////////////////////////////////////////////////////////////// 1273 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`. 1274 /// \param[in] treename The name of the output TTree or RNTuple. 1275 /// \param[in] filename The name of the output TFile. 1276 /// \param[in] columnList The list of names of the columns/branches/fields to be written. 1277 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple. 1278 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1279 /// 1280 /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source. 1281 /// The types of the columns are automatically inferred and do not need to be specified. 1282 /// 1283 /// Support for writing of nested branches/fields is limited (although RDataFrame is able to read them) and dot ('.') 1284 /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot. 1285 /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also 1286 /// written out and it appears before the array in the columnList. 1287 /// 1288 /// By default, in case of TTree, TChain or RNTuple inputs, Snapshot will try to write out all top-level branches. 1289 /// For other types of inputs, all columns returned by GetColumnNames() will be written out. Systematic variations of 1290 /// columns will be included if the corresponding flag is set in RSnapshotOptions. See \ref snapshot-with-variations 1291 /// "Snapshot with Variations" for more details. If friend trees or chains are present, by default all friend 1292 /// top-level branches that have names that do not collide with names of branches in the main TTree/TChain will be 1293 /// written out. Since v6.24, Snapshot will also write out friend branches with the same names of branches in the 1294 /// main TTree/TChain with names of the form 1295 /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain. 1296 /// 1297 /// ### Writing to a sub-directory 1298 /// 1299 /// Snapshot supports writing the TTree or RNTuple in a sub-directory inside the TFile. It is sufficient to specify 1300 /// the directory path as part of the TTree or RNTuple name, e.g. `df.Snapshot("subdir/t", "f.root")` writes TTree 1301 /// `t` in the sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed). 1302 /// 1303 /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of 1304 /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled 1305 /// with respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in 1306 /// wrong associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will 1307 /// error out if such a "shuffled" TTree is used in a friendship. 1308 /// 1309 /// \note In case no events are written out (e.g. because no event passes all filters), Snapshot will still write the 1310 /// requested output TTree or RNTuple to the file, with all the branches requested to preserve the dataset schema. 1311 /// 1312 /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns 1313 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are 1314 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an 1315 /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`. 1316 /// 1317 /// ### Example invocations: 1318 /// 1319 /// ~~~{.cpp} 1320 /// // No need to specify column types, they are automatically deduced thanks 1321 /// // to information coming from the data source 1322 /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"}); 1323 /// ~~~ 1324 /// 1325 /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in 1326 /// `RSnapshotOptions`: 1327 /// ~~~{.cpp} 1328 /// RSnapshotOptions opts; 1329 /// opts.fLazy = true; 1330 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts); 1331 /// ~~~ 1332 /// 1333 /// To snapshot to the RNTuple data format, the `fOutputFormat` option in `RSnapshotOptions` needs to be set 1334 /// accordingly: 1335 /// ~~~{.cpp} 1336 /// RSnapshotOptions opts; 1337 /// opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple; 1338 /// df.Snapshot("outputNTuple", "outputFile.root", {"x"}, opts); 1339 /// ~~~ 1340 /// 1341 /// Snapshot systematic variations resulting from a Vary() call (see details \ref snapshot-with-variations "here"): 1342 /// ~~~{.cpp} 1343 /// RSnapshotOptions opts; 1344 /// opts.fIncludeVariations = true; 1345 /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts); 1346 /// ~~~ 1347 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, 1348 const ColumnNames_t &columnList, 1349 const RSnapshotOptions &options = RSnapshotOptions()) 1350 { 1351 // TODO: Remove before releasing 6.40.00 1352 #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 40, 0) 1353 static_assert(false && "Remove information about change of Snapshot defaut compression settings."); 1354 #endif 1355 [[maybe_unused]] static bool once = []() { 1356 if (const char *suppress = std::getenv("ROOT_RDF_SNAPSHOT_INFO")) 1357 if (std::strcmp(suppress, "0") == 0) 1358 return true; 1359 if (const char *suppress = gEnv->GetValue("ROOT.RDF.Snapshot.Info", "1")) 1360 if (std::strcmp(suppress, "0") == 0) 1361 return true; 1362 RLogScopedVerbosity showInfo{ROOT::Detail::RDF::RDFLogChannel(), ROOT::ELogLevel::kInfo}; 1363 R__LOG_INFO(ROOT::Detail::RDF::RDFLogChannel()) 1364 << "\n\tIn ROOT 6.38, the default compression settings of Snapshot have been changed from 101 (ZLIB with " 1365 "compression level 1, the TTree default) to 505 (ZSTD with compression level 5). This change may result " 1366 "in smaller Snapshot output dataset size by default. In order to suppress this message, set " 1367 "'ROOT_RDF_SNAPSHOT_INFO=0' in your environment or set 'ROOT.RDF.Snapshot.Info: 0' in your .rootrc " 1368 "file."; 1369 return true; 1370 }(); 1371 // like columnList but with `#var` columns removed 1372 auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); 1373 // like columnListWithoutSizeColumns but with aliases resolved 1374 auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes); 1375 RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases); 1376 // like validCols but with missing size branches required by array branches added in the right positions 1377 const auto pairOfColumnLists = 1378 RDFInternal::AddSizeBranches(GetDataSource(), std::move(colListNoAliases), std::move(colListNoPoundSizes)); 1379 const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first; 1380 const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second; 1381 1382 const auto fullTreeName = treename; 1383 const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName); 1384 treename = parsedTreePath.fTreeName; 1385 const auto &dirname = parsedTreePath.fDirName; 1386 1387 ::TDirectory::TContext ctxt; 1388 1389 RResultPtr<RInterface<RLoopManager>> resPtr; 1390 1391 auto retrieveTypeID = [](const std::string &colName, const std::string &colTypeName, 1392 bool isRNTuple = false) -> const std::type_info * { 1393 try { 1394 return &ROOT::Internal::RDF::TypeName2TypeID(colTypeName); 1395 } catch (const std::runtime_error &err) { 1396 if (isRNTuple) 1397 return &typeid(ROOT::Internal::RDF::UseNativeDataType); 1398 1399 if (std::string(err.what()).find("Cannot extract type_info of type") != std::string::npos) { 1400 // We could not find RTTI for this column, thus we cannot write it out at the moment. 1401 std::string trueTypeName{colTypeName}; 1402 if (colTypeName.rfind("CLING_UNKNOWN_TYPE", 0) == 0) 1403 trueTypeName = colTypeName.substr(19); 1404 std::string msg{"No runtime type information is available for column \"" + colName + 1405 "\" with type name \"" + trueTypeName + 1406 "\". Thus, it cannot be written to disk with Snapshot. Make sure to generate and load " 1407 "ROOT dictionaries for the type of this column."}; 1408 1409 throw std::runtime_error(msg); 1410 } else { 1411 throw; 1412 } 1413 } 1414 }; 1415 1416 RDFInternal::CheckSnapshotOptionsFormatCompatibility(options); 1417 1418 if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) { 1419 // The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one 1420 // without a data source for now, and set it once the actual data source can be created (i.e., after 1421 // writing the RNTuple). 1422 auto newRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(colListNoPoundSizes)); 1423 1424 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{ 1425 std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches, 1426 options, newRDF->GetLoopManager(), GetLoopManager(), true /* fToNTuple */, /*fIncludeVariations=*/false}); 1427 1428 auto &&nColumns = colListNoAliasesWithSizeBranches.size(); 1429 const auto validColumnNames = GetValidatedColumnNames(nColumns, colListNoAliasesWithSizeBranches); 1430 1431 const auto nSlots = fLoopManager->GetNSlots(); 1432 std::vector<const std::type_info *> colTypeIDs; 1433 colTypeIDs.reserve(nColumns); 1434 for (decltype(nColumns) i{}; i < nColumns; i++) { 1435 const auto &colName = validColumnNames[i]; 1436 const auto colTypeName = ROOT::Internal::RDF::ColumnName2ColumnTypeName( 1437 colName, /*tree*/ nullptr, GetDataSource(), fColRegister.GetDefine(colName), options.fVector2RVec); 1438 const std::type_info *colTypeID = retrieveTypeID(colName, colTypeName, /*isRNTuple*/ true); 1439 colTypeIDs.push_back(colTypeID); 1440 } 1441 // Crucial e.g. if the column names do not correspond to already-available column readers created by the data 1442 // source 1443 CheckAndFillDSColumns(validColumnNames, colTypeIDs); 1444 1445 auto action = 1446 RDFInternal::BuildAction(validColumnNames, snapHelperArgs, nSlots, fProxiedPtr, fColRegister, colTypeIDs); 1447 resPtr = MakeResultPtr(newRDF, *GetLoopManager(), std::move(action)); 1448 } else { 1449 if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" && 1450 options.fOutputFormat == ESnapshotOutputFormat::kDefault) { 1451 Warning("Snapshot", 1452 "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you " 1453 "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in " 1454 "RSnapshotOptions. Note that this current default behaviour might change in the future."); 1455 } 1456 1457 // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset 1458 // has actually been created and written to TFile, i.e. at the end of the Snapshot execution. 1459 auto newRDF = std::make_shared<RInterface<RLoopManager>>( 1460 std::make_shared<RLoopManager>(colListNoAliasesWithSizeBranches)); 1461 1462 auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{ 1463 std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches, 1464 options, newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */, options.fIncludeVariations}); 1465 1466 auto &&nColumns = colListNoAliasesWithSizeBranches.size(); 1467 const auto validColumnNames = GetValidatedColumnNames(nColumns, colListNoAliasesWithSizeBranches); 1468 1469 const auto nSlots = fLoopManager->GetNSlots(); 1470 std::vector<const std::type_info *> colTypeIDs; 1471 colTypeIDs.reserve(nColumns); 1472 for (decltype(nColumns) i{}; i < nColumns; i++) { 1473 const auto &colName = validColumnNames[i]; 1474 const auto colTypeName = ROOT::Internal::RDF::ColumnName2ColumnTypeName( 1475 colName, /*tree*/ nullptr, GetDataSource(), fColRegister.GetDefine(colName), options.fVector2RVec); 1476 const std::type_info *colTypeID = retrieveTypeID(colName, colTypeName); 1477 colTypeIDs.push_back(colTypeID); 1478 } 1479 // Crucial e.g. if the column names do not correspond to already-available column readers created by the data 1480 // source 1481 CheckAndFillDSColumns(validColumnNames, colTypeIDs); 1482 1483 auto action = 1484 RDFInternal::BuildAction(validColumnNames, snapHelperArgs, nSlots, fProxiedPtr, fColRegister, colTypeIDs); 1485 resPtr = MakeResultPtr(newRDF, *GetLoopManager(), std::move(action)); 1486 } 1487 1488 if (!options.fLazy) 1489 *resPtr; 1490 return resPtr; 1491 } 1492 1493 // clang-format off 1494 //////////////////////////////////////////////////////////////////////////// 1495 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`. 1496 /// \param[in] treename The name of the output TTree or RNTuple. 1497 /// \param[in] filename The name of the output TFile. 1498 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns. 1499 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple 1500 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1501 /// 1502 /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source. 1503 /// The types of the columns are automatically inferred and do not need to be specified. 1504 /// 1505 /// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages. 1506 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, 1507 std::string_view columnNameRegexp = "", 1508 const RSnapshotOptions &options = RSnapshotOptions()) 1509 { 1510 const auto definedColumns = fColRegister.GenerateColumnNames(); 1511 1512 const auto dsColumns = GetDataSource() ? ROOT::Internal::RDF::GetTopLevelFieldNames(*GetDataSource()) : ColumnNames_t{}; 1513 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those 1514 ColumnNames_t dsColumnsWithoutSizeColumns; 1515 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns), 1516 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; }); 1517 ColumnNames_t columnNames; 1518 columnNames.reserve(definedColumns.size() + dsColumnsWithoutSizeColumns.size()); 1519 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end()); 1520 columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end()); 1521 1522 // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd. 1523 // RemoveDuplicates should preserve ordering of the columns: it might be meaningful. 1524 RDFInternal::RemoveDuplicates(columnNames); 1525 1526 auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot"); 1527 1528 if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS") { 1529 RDFInternal::RemoveRNTupleSubFields(selectedColumns); 1530 } 1531 1532 return Snapshot(treename, filename, selectedColumns, options); 1533 } 1534 // clang-format on 1535 1536 // clang-format off 1537 //////////////////////////////////////////////////////////////////////////// 1538 /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`. 1539 /// \param[in] treename The name of the output TTree or RNTuple. 1540 /// \param[in] filename The name of the output TFile. 1541 /// \param[in] columnList The list of names of the columns/branches to be written. 1542 /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple. 1543 /// \return a `RDataFrame` that wraps the snapshotted dataset. 1544 /// 1545 /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source. 1546 /// The types of the columns are automatically inferred and do not need to be specified. 1547 /// 1548 /// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages. 1549 RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename, 1550 std::initializer_list<std::string> columnList, 1551 const RSnapshotOptions &options = RSnapshotOptions()) 1552 { 1553 ColumnNames_t selectedColumns(columnList); 1554 return Snapshot(treename, filename, selectedColumns, options); 1555 } 1556 // clang-format on 1557 1558 //////////////////////////////////////////////////////////////////////////// 1559 /// \brief Save selected columns in memory. 1560 /// \tparam ColumnTypes variadic list of branch/column types. 1561 /// \param[in] columnList columns to be cached in memory. 1562 /// \return a `RDataFrame` that wraps the cached dataset. 1563 /// 1564 /// This action returns a new `RDataFrame` object, completely detached from 1565 /// the originating `RDataFrame`. The new dataframe only contains the cached 1566 /// columns and stores their content in memory for fast, zero-copy subsequent access. 1567 /// 1568 /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that 1569 /// fits in memory and that will be accessed many times. 1570 /// 1571 /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns 1572 /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are 1573 /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an 1574 /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`. 1575 /// 1576 /// ### Example usage: 1577 /// 1578 /// **Types and columns specified:** 1579 /// ~~~{.cpp} 1580 /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"}); 1581 /// ~~~ 1582 /// 1583 /// **Types inferred and columns specified (this invocation relies on jitting):** 1584 /// ~~~{.cpp} 1585 /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"}); 1586 /// ~~~ 1587 /// 1588 /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):** 1589 /// ~~~{.cpp} 1590 /// auto cache_all_cols_df = df.Cache(myRegexp); 1591 /// ~~~ 1592 template <typename... ColumnTypes> 1593 RInterface<RLoopManager> Cache(const ColumnNames_t &columnList) 1594 { 1595 auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>(); 1596 return CacheImpl<ColumnTypes...>(columnList, staticSeq); 1597 } 1598 1599 //////////////////////////////////////////////////////////////////////////// 1600 /// \brief Save selected columns in memory. 1601 /// \param[in] columnList columns to be cached in memory 1602 /// \return a `RDataFrame` that wraps the cached dataset. 1603 /// 1604 /// See the previous overloads for more information. 1605 RInterface<RLoopManager> Cache(const ColumnNames_t &columnList) 1606 { 1607 // Early return: if the list of columns is empty, just return an empty RDF 1608 // If we proceed, the jitted call will not compile! 1609 if (columnList.empty()) { 1610 auto nEntries = *this->Count(); 1611 RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries)); 1612 return emptyRDF; 1613 } 1614 1615 std::stringstream cacheCall; 1616 auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr); 1617 RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager, 1618 fColRegister); 1619 // build a string equivalent to 1620 // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))" 1621 RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0)); 1622 cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>(" 1623 << RDFInternal::PrettyPrintAddr(&resRDF) 1624 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>(" 1625 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<"; 1626 1627 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache"); 1628 1629 const auto validColumnNames = 1630 GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns); 1631 const auto colTypes = 1632 GetValidatedArgTypes(validColumnNames, fColRegister, nullptr, GetDataSource(), "Cache", /*vector2RVec=*/false); 1633 for (const auto &colType : colTypes) 1634 cacheCall << colType << ", "; 1635 if (!columnListWithoutSizeColumns.empty()) 1636 cacheCall.seekp(-2, cacheCall.cur); // remove the last ", 1637 cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t 1638 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));"; 1639 1640 // book the code to jit with the RLoopManager and trigger the event loop 1641 fLoopManager->ToJitExec(cacheCall.str()); 1642 fLoopManager->Jit(); 1643 1644 return resRDF; 1645 } 1646 1647 //////////////////////////////////////////////////////////////////////////// 1648 /// \brief Save selected columns in memory. 1649 /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns. 1650 /// \return a `RDataFrame` that wraps the cached dataset. 1651 /// 1652 /// The existing columns are matched against the regular expression. If the string provided 1653 /// is empty, all columns are selected. See the previous overloads for more information. 1654 RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "") 1655 { 1656 const auto definedColumns = fColRegister.GenerateColumnNames(); 1657 const auto dsColumns = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}; 1658 // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those 1659 ColumnNames_t dsColumnsWithoutSizeColumns; 1660 std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns), 1661 [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; }); 1662 ColumnNames_t columnNames; 1663 columnNames.reserve(definedColumns.size() + dsColumns.size()); 1664 columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end()); 1665 columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end()); 1666 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache"); 1667 return Cache(selectedColumns); 1668 } 1669 1670 //////////////////////////////////////////////////////////////////////////// 1671 /// \brief Save selected columns in memory. 1672 /// \param[in] columnList columns to be cached in memory. 1673 /// \return a `RDataFrame` that wraps the cached dataset. 1674 /// 1675 /// See the previous overloads for more information. 1676 RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList) 1677 { 1678 ColumnNames_t selectedColumns(columnList); 1679 return Cache(selectedColumns); 1680 } 1681 1682 // clang-format off 1683 //////////////////////////////////////////////////////////////////////////// 1684 /// \brief Creates a node that filters entries based on range: [begin, end). 1685 /// \param[in] begin Initial entry number considered for this range. 1686 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset. 1687 /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0. 1688 /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries. 1689 /// 1690 /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset. 1691 /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported. 1692 /// 1693 /// ### Example usage: 1694 /// ~~~{.cpp} 1695 /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries 1696 /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards 1697 /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3 1698 /// ~~~ 1699 // clang-format on 1700 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1) 1701 { 1702 // check invariants 1703 if (stride == 0 || (end != 0 && end < begin)) 1704 throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin."); 1705 CheckIMTDisabled("Range"); 1706 1707 using Range_t = RDFDetail::RRange<Proxied>; 1708 auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr); 1709 RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister); 1710 return newInterface; 1711 } 1712 1713 // clang-format off 1714 //////////////////////////////////////////////////////////////////////////// 1715 /// \brief Creates a node that filters entries based on range. 1716 /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset. 1717 /// \return a node of the computation graph for which the range is defined. 1718 /// 1719 /// See the other Range overload for a detailed description. 1720 // clang-format on 1721 RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); } 1722 1723 // clang-format off 1724 //////////////////////////////////////////////////////////////////////////// 1725 /// \brief Execute a user-defined function on each entry (*instant action*). 1726 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations. 1727 /// \param[in] columns Names of the columns/branches in input to the user function. 1728 /// 1729 /// The callable `f` is invoked once per entry. This is an *instant action*: 1730 /// upon invocation, an event loop as well as execution of all scheduled actions 1731 /// is triggered. 1732 /// Users are responsible for the thread-safety of this callable when executing 1733 /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT). 1734 /// 1735 /// ### Example usage: 1736 /// ~~~{.cpp} 1737 /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"}); 1738 /// ~~~ 1739 // clang-format on 1740 template <typename F> 1741 void Foreach(F f, const ColumnNames_t &columns = {}) 1742 { 1743 using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay; 1744 using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type; 1745 ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns); 1746 } 1747 1748 // clang-format off 1749 //////////////////////////////////////////////////////////////////////////// 1750 /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*). 1751 /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations. 1752 /// \param[in] columns Names of the columns/branches in input to the user function. 1753 /// 1754 /// Same as `Foreach`, but the user-defined function takes an extra 1755 /// `unsigned int` as its first parameter, the *processing slot index*. 1756 /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`, 1757 /// for each thread of execution. 1758 /// This is meant as a helper in writing thread-safe `Foreach` 1759 /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`. 1760 /// The user-defined processing callable is able to follow different 1761 /// *streams of processing* indexed by the first parameter. 1762 /// `ForeachSlot` works just as well with single-thread execution: in that 1763 /// case `slot` will always be `0`. 1764 /// 1765 /// ### Example usage: 1766 /// ~~~{.cpp} 1767 /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"}); 1768 /// ~~~ 1769 // clang-format on 1770 template <typename F> 1771 void ForeachSlot(F f, const ColumnNames_t &columns = {}) 1772 { 1773 using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>; 1774 constexpr auto nColumns = ColTypes_t::list_size; 1775 1776 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); 1777 CheckAndFillDSColumns(validColumnNames, ColTypes_t()); 1778 1779 using Helper_t = RDFInternal::ForeachSlotHelper<F>; 1780 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 1781 1782 auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister); 1783 1784 fLoopManager->Run(); 1785 } 1786 1787 // clang-format off 1788 //////////////////////////////////////////////////////////////////////////// 1789 /// \brief Execute a user-defined reduce operation on the values of a column. 1790 /// \tparam F The type of the reduce callable. Automatically deduced. 1791 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 1792 /// \param[in] f A callable with signature `T(T,T)` 1793 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. 1794 /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr. 1795 /// 1796 /// A reduction takes two values of a column and merges them into one (e.g. 1797 /// by summing them, taking the maximum, etc). This action performs the 1798 /// specified reduction operation on all processed column values, returning 1799 /// a single value of the same type. The callable f must satisfy the general 1800 /// requirements of a *processing function* besides having signature `T(T,T)` 1801 /// where `T` is the type of column columnName. 1802 /// 1803 /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a 1804 /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific 1805 /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this 1806 /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce` 1807 /// overload. 1808 /// 1809 /// ### Example usage: 1810 /// ~~~{.cpp} 1811 /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol"); 1812 /// ~~~ 1813 /// 1814 /// This action is *lazy*: upon invocation of this method the calculation is 1815 /// booked but not executed. Also see RResultPtr. 1816 // clang-format on 1817 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type> 1818 RResultPtr<T> Reduce(F f, std::string_view columnName = "") 1819 { 1820 static_assert( 1821 std::is_default_constructible<T>::value, 1822 "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)"); 1823 return Reduce(std::move(f), columnName, T()); 1824 } 1825 1826 //////////////////////////////////////////////////////////////////////////// 1827 /// \brief Execute a user-defined reduce operation on the values of a column. 1828 /// \tparam F The type of the reduce callable. Automatically deduced. 1829 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 1830 /// \param[in] f A callable with signature `T(T,T)` 1831 /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead. 1832 /// \param[in] redIdentity The reduced object of each thread is initialized to this value. 1833 /// \return the reduced quantity wrapped in a RResultPtr. 1834 /// 1835 /// ### Example usage: 1836 /// ~~~{.cpp} 1837 /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42); 1838 /// ~~~ 1839 /// See the description of the first Reduce overload for more information. 1840 template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type> 1841 RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity) 1842 { 1843 return Aggregate(f, f, columnName, redIdentity); 1844 } 1845 1846 //////////////////////////////////////////////////////////////////////////// 1847 /// \brief Return the number of entries processed (*lazy action*). 1848 /// \return the number of entries wrapped in a RResultPtr. 1849 /// 1850 /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`). 1851 /// This action is *lazy*: upon invocation of this method the calculation is 1852 /// booked but not executed. Also see RResultPtr. 1853 /// 1854 /// ### Example usage: 1855 /// ~~~{.cpp} 1856 /// auto nEntriesAfterCuts = myFilteredDf.Count(); 1857 /// ~~~ 1858 /// 1859 RResultPtr<ULong64_t> Count() 1860 { 1861 const auto nSlots = fLoopManager->GetNSlots(); 1862 auto cSPtr = std::make_shared<ULong64_t>(0); 1863 using Helper_t = RDFInternal::CountHelper; 1864 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 1865 auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, 1866 RDFInternal::RColumnRegister(fColRegister)); 1867 return MakeResultPtr(cSPtr, *fLoopManager, std::move(action)); 1868 } 1869 1870 //////////////////////////////////////////////////////////////////////////// 1871 /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default). 1872 /// \tparam T The type of the column. 1873 /// \tparam COLL The type of collection used to store the values. 1874 /// \param[in] column The name of the column to collect the values of. 1875 /// \return the content of the selected column wrapped in a RResultPtr. 1876 /// 1877 /// The collection type to be specified for C-style array columns is `RVec<T>`: 1878 /// in this case the returned collection is a `std::vector<RVec<T>>`. 1879 /// ### Example usage: 1880 /// ~~~{.cpp} 1881 /// // In this case intCol is a std::vector<int> 1882 /// auto intCol = rdf.Take<int>("integerColumn"); 1883 /// // Same content as above but in this case taken as a RVec<int> 1884 /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn"); 1885 /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections 1886 /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt"); 1887 /// ~~~ 1888 /// This action is *lazy*: upon invocation of this method the calculation is 1889 /// booked but not executed. Also see RResultPtr. 1890 template <typename T, typename COLL = std::vector<T>> 1891 RResultPtr<COLL> Take(std::string_view column = "") 1892 { 1893 const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)}); 1894 1895 const auto validColumnNames = GetValidatedColumnNames(1, columns); 1896 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>()); 1897 1898 using Helper_t = RDFInternal::TakeHelper<T, T, COLL>; 1899 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 1900 auto valuesPtr = std::make_shared<COLL>(); 1901 const auto nSlots = fLoopManager->GetNSlots(); 1902 1903 auto action = 1904 std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister); 1905 return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action)); 1906 } 1907 1908 //////////////////////////////////////////////////////////////////////////// 1909 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*). 1910 /// \tparam V The type of the column used to fill the histogram. 1911 /// \param[in] model The returned histogram will be constructed using this as a model. 1912 /// \param[in] vName The name of the column that will fill the histogram. 1913 /// \return the monodimensional histogram wrapped in a RResultPtr. 1914 /// 1915 /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram 1916 /// is filled with each one of the elements of the container. In case multiple columns of container type 1917 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but 1918 /// possibly different lengths between events). 1919 /// This action is *lazy*: upon invocation of this method the calculation is 1920 /// booked but not executed. Also see RResultPtr. 1921 /// 1922 /// ### Example usage: 1923 /// ~~~{.cpp} 1924 /// // Deduce column type (this invocation needs jitting internally) 1925 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn"); 1926 /// // Explicit column type 1927 /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn"); 1928 /// ~~~ 1929 /// 1930 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory 1931 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 1932 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 1933 template <typename V = RDFDetail::RInferredType> 1934 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "") 1935 { 1936 const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)}); 1937 1938 const auto validatedColumns = GetValidatedColumnNames(1, userColumns); 1939 1940 std::shared_ptr<::TH1D> h(nullptr); 1941 { 1942 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 1943 h = model.GetHistogram(); 1944 } 1945 1946 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin()) 1947 h->SetCanExtend(::TH1::kAllAxes); 1948 return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr); 1949 } 1950 1951 //////////////////////////////////////////////////////////////////////////// 1952 /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*). 1953 /// \tparam V The type of the column used to fill the histogram. 1954 /// \param[in] vName The name of the column that will fill the histogram. 1955 /// \return the monodimensional histogram wrapped in a RResultPtr. 1956 /// 1957 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). 1958 /// The "name" and "title" strings are built starting from the input column name. 1959 /// See the description of the first Histo1D() overload for more details. 1960 /// 1961 /// ### Example usage: 1962 /// ~~~{.cpp} 1963 /// // Deduce column type (this invocation needs jitting internally) 1964 /// auto myHist1 = myDf.Histo1D("myColumn"); 1965 /// // Explicit column type 1966 /// auto myHist2 = myDf.Histo1D<float>("myColumn"); 1967 /// ~~~ 1968 template <typename V = RDFDetail::RInferredType> 1969 RResultPtr<::TH1D> Histo1D(std::string_view vName) 1970 { 1971 const auto h_name = std::string(vName); 1972 const auto h_title = h_name + ";" + h_name + ";count"; 1973 return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName); 1974 } 1975 1976 //////////////////////////////////////////////////////////////////////////// 1977 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). 1978 /// \tparam V The type of the column used to fill the histogram. 1979 /// \tparam W The type of the column used as weights. 1980 /// \param[in] model The returned histogram will be constructed using this as a model. 1981 /// \param[in] vName The name of the column that will fill the histogram. 1982 /// \param[in] wName The name of the column that will provide the weights. 1983 /// \return the monodimensional histogram wrapped in a RResultPtr. 1984 /// 1985 /// See the description of the first Histo1D() overload for more details. 1986 /// 1987 /// ### Example usage: 1988 /// ~~~{.cpp} 1989 /// // Deduce column type (this invocation needs jitting internally) 1990 /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight"); 1991 /// // Explicit column type 1992 /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight"); 1993 /// ~~~ 1994 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 1995 RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName) 1996 { 1997 const std::vector<std::string_view> columnViews = {vName, wName}; 1998 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 1999 ? ColumnNames_t() 2000 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2001 std::shared_ptr<::TH1D> h(nullptr); 2002 { 2003 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2004 h = model.GetHistogram(); 2005 } 2006 2007 if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin()) 2008 h->SetCanExtend(::TH1::kAllAxes); 2009 return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr); 2010 } 2011 2012 //////////////////////////////////////////////////////////////////////////// 2013 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). 2014 /// \tparam V The type of the column used to fill the histogram. 2015 /// \tparam W The type of the column used as weights. 2016 /// \param[in] vName The name of the column that will fill the histogram. 2017 /// \param[in] wName The name of the column that will provide the weights. 2018 /// \return the monodimensional histogram wrapped in a RResultPtr. 2019 /// 2020 /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.). 2021 /// The "name" and "title" strings are built starting from the input column names. 2022 /// See the description of the first Histo1D() overload for more details. 2023 /// 2024 /// ### Example usage: 2025 /// ~~~{.cpp} 2026 /// // Deduce column types (this invocation needs jitting internally) 2027 /// auto myHist1 = myDf.Histo1D("myValue", "myweight"); 2028 /// // Explicit column types 2029 /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight"); 2030 /// ~~~ 2031 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 2032 RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName) 2033 { 2034 // We build name and title based on the value and weight column names 2035 std::string str_vName{vName}; 2036 std::string str_wName{wName}; 2037 const auto h_name = str_vName + "_weighted_" + str_wName; 2038 const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName; 2039 return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName); 2040 } 2041 2042 //////////////////////////////////////////////////////////////////////////// 2043 /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*). 2044 /// \tparam V The type of the column used to fill the histogram. 2045 /// \tparam W The type of the column used as weights. 2046 /// \param[in] model The returned histogram will be constructed using this as a model. 2047 /// \return the monodimensional histogram wrapped in a RResultPtr. 2048 /// 2049 /// This overload will use the first two default columns as column names. 2050 /// See the description of the first Histo1D() overload for more details. 2051 template <typename V, typename W> 2052 RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}) 2053 { 2054 return Histo1D<V, W>(model, "", ""); 2055 } 2056 2057 //////////////////////////////////////////////////////////////////////////// 2058 /// \brief Fill and return a two-dimensional histogram (*lazy action*). 2059 /// \tparam V1 The type of the column used to fill the x axis of the histogram. 2060 /// \tparam V2 The type of the column used to fill the y axis of the histogram. 2061 /// \param[in] model The returned histogram will be constructed using this as a model. 2062 /// \param[in] v1Name The name of the column that will fill the x axis. 2063 /// \param[in] v2Name The name of the column that will fill the y axis. 2064 /// \return the bidimensional histogram wrapped in a RResultPtr. 2065 /// 2066 /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram 2067 /// is filled with each one of the elements of the container. In case multiple columns of container type 2068 /// are provided (e.g. values and weights) they must have the same length for each one of the events (but 2069 /// possibly different lengths between events). 2070 /// This action is *lazy*: upon invocation of this method the calculation is 2071 /// booked but not executed. Also see RResultPtr. 2072 /// 2073 /// ### Example usage: 2074 /// ~~~{.cpp} 2075 /// // Deduce column types (this invocation needs jitting internally) 2076 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY"); 2077 /// // Explicit column types 2078 /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY"); 2079 /// ~~~ 2080 /// 2081 /// 2082 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory 2083 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2084 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2085 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType> 2086 RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "") 2087 { 2088 std::shared_ptr<::TH2D> h(nullptr); 2089 { 2090 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2091 h = model.GetHistogram(); 2092 } 2093 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { 2094 throw std::runtime_error("2D histograms with no axes limits are not supported yet."); 2095 } 2096 const std::vector<std::string_view> columnViews = {v1Name, v2Name}; 2097 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2098 ? ColumnNames_t() 2099 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2100 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr); 2101 } 2102 2103 //////////////////////////////////////////////////////////////////////////// 2104 /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*). 2105 /// \tparam V1 The type of the column used to fill the x axis of the histogram. 2106 /// \tparam V2 The type of the column used to fill the y axis of the histogram. 2107 /// \tparam W The type of the column used for the weights of the histogram. 2108 /// \param[in] model The returned histogram will be constructed using this as a model. 2109 /// \param[in] v1Name The name of the column that will fill the x axis. 2110 /// \param[in] v2Name The name of the column that will fill the y axis. 2111 /// \param[in] wName The name of the column that will provide the weights. 2112 /// \return the bidimensional histogram wrapped in a RResultPtr. 2113 /// 2114 /// This action is *lazy*: upon invocation of this method the calculation is 2115 /// booked but not executed. Also see RResultPtr. 2116 /// 2117 /// ### Example usage: 2118 /// ~~~{.cpp} 2119 /// // Deduce column types (this invocation needs jitting internally) 2120 /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight"); 2121 /// // Explicit column types 2122 /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight"); 2123 /// ~~~ 2124 /// 2125 /// See the documentation of the first Histo2D() overload for more details. 2126 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2127 typename W = RDFDetail::RInferredType> 2128 RResultPtr<::TH2D> 2129 Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName) 2130 { 2131 std::shared_ptr<::TH2D> h(nullptr); 2132 { 2133 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2134 h = model.GetHistogram(); 2135 } 2136 if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) { 2137 throw std::runtime_error("2D histograms with no axes limits are not supported yet."); 2138 } 2139 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName}; 2140 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2141 ? ColumnNames_t() 2142 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2143 return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr); 2144 } 2145 2146 template <typename V1, typename V2, typename W> 2147 RResultPtr<::TH2D> Histo2D(const TH2DModel &model) 2148 { 2149 return Histo2D<V1, V2, W>(model, "", "", ""); 2150 } 2151 2152 //////////////////////////////////////////////////////////////////////////// 2153 /// \brief Fill and return a three-dimensional histogram (*lazy action*). 2154 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 2155 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 2156 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 2157 /// \param[in] model The returned histogram will be constructed using this as a model. 2158 /// \param[in] v1Name The name of the column that will fill the x axis. 2159 /// \param[in] v2Name The name of the column that will fill the y axis. 2160 /// \param[in] v3Name The name of the column that will fill the z axis. 2161 /// \return the tridimensional histogram wrapped in a RResultPtr. 2162 /// 2163 /// This action is *lazy*: upon invocation of this method the calculation is 2164 /// booked but not executed. Also see RResultPtr. 2165 /// 2166 /// ### Example usage: 2167 /// ~~~{.cpp} 2168 /// // Deduce column types (this invocation needs jitting internally) 2169 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 2170 /// "myValueX", "myValueY", "myValueZ"); 2171 /// // Explicit column types 2172 /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 2173 /// "myValueX", "myValueY", "myValueZ"); 2174 /// ~~~ 2175 /// \note If three-dimensional histograms consume too much memory in multithreaded runs, the cloning of TH3D 2176 /// per thread can be reduced using ROOT::RDF::Experimental::ThreadsPerTH3(). See the section "Memory Usage" in 2177 /// the RDataFrame description. 2178 /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory 2179 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2180 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2181 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2182 typename V3 = RDFDetail::RInferredType> 2183 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "", 2184 std::string_view v3Name = "") 2185 { 2186 std::shared_ptr<::TH3D> h(nullptr); 2187 { 2188 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2189 h = model.GetHistogram(); 2190 } 2191 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { 2192 throw std::runtime_error("3D histograms with no axes limits are not supported yet."); 2193 } 2194 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name}; 2195 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2196 ? ColumnNames_t() 2197 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2198 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr); 2199 } 2200 2201 //////////////////////////////////////////////////////////////////////////// 2202 /// \brief Fill and return a three-dimensional histogram (*lazy action*). 2203 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 2204 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 2205 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 2206 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present. 2207 /// \param[in] model The returned histogram will be constructed using this as a model. 2208 /// \param[in] v1Name The name of the column that will fill the x axis. 2209 /// \param[in] v2Name The name of the column that will fill the y axis. 2210 /// \param[in] v3Name The name of the column that will fill the z axis. 2211 /// \param[in] wName The name of the column that will provide the weights. 2212 /// \return the tridimensional histogram wrapped in a RResultPtr. 2213 /// 2214 /// This action is *lazy*: upon invocation of this method the calculation is 2215 /// booked but not executed. Also see RResultPtr. 2216 /// 2217 /// ### Example usage: 2218 /// ~~~{.cpp} 2219 /// // Deduce column types (this invocation needs jitting internally) 2220 /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 2221 /// "myValueX", "myValueY", "myValueZ", "myWeight"); 2222 /// // Explicit column types 2223 /// using d_t = double; 2224 /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.}, 2225 /// "myValueX", "myValueY", "myValueZ", "myWeight"); 2226 /// ~~~ 2227 /// 2228 /// 2229 /// See the documentation of the first Histo2D() overload for more details. 2230 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2231 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 2232 RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name, 2233 std::string_view v3Name, std::string_view wName) 2234 { 2235 std::shared_ptr<::TH3D> h(nullptr); 2236 { 2237 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2238 h = model.GetHistogram(); 2239 } 2240 if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) { 2241 throw std::runtime_error("3D histograms with no axes limits are not supported yet."); 2242 } 2243 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName}; 2244 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2245 ? ColumnNames_t() 2246 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2247 return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr); 2248 } 2249 2250 template <typename V1, typename V2, typename V3, typename W> 2251 RResultPtr<::TH3D> Histo3D(const TH3DModel &model) 2252 { 2253 return Histo3D<V1, V2, V3, W>(model, "", "", "", ""); 2254 } 2255 2256 //////////////////////////////////////////////////////////////////////////// 2257 /// \brief Fill and return an N-dimensional histogram (*lazy action*). 2258 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not 2259 /// present. 2260 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the 2261 /// object. 2262 /// \param[in] model The returned histogram will be constructed using this as a model. 2263 /// \param[in] columnList 2264 /// A list containing the names of the columns that will be passed when calling `Fill`. 2265 /// (N columns for unweighted filling, or N+1 columns for weighted filling) 2266 /// \return the N-dimensional histogram wrapped in a RResultPtr. 2267 /// 2268 /// This action is *lazy*: upon invocation of this method the calculation is 2269 /// booked but not executed. See RResultPtr documentation. 2270 /// 2271 /// ### Example usage: 2272 /// ~~~{.cpp} 2273 /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4, 2274 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, 2275 /// {"col0", "col1", "col2", "col3"}); 2276 /// ~~~ 2277 /// 2278 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads 2279 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) 2280 { 2281 std::shared_ptr<::THnD> h(nullptr); 2282 { 2283 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2284 h = model.GetHistogram(); 2285 2286 if (int(columnList.size()) == (h->GetNdimensions() + 1)) { 2287 h->Sumw2(); 2288 } else if (int(columnList.size()) != h->GetNdimensions()) { 2289 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); 2290 } 2291 } 2292 return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h, 2293 fProxiedPtr); 2294 } 2295 2296 //////////////////////////////////////////////////////////////////////////// 2297 /// \brief Fill and return an N-dimensional histogram (*lazy action*). 2298 /// \param[in] model The returned histogram will be constructed using this as a model. 2299 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` 2300 /// (N columns for unweighted filling, or N+1 columns for weighted filling) 2301 /// \return the N-dimensional histogram wrapped in a RResultPtr. 2302 /// 2303 /// This action is *lazy*: upon invocation of this method the calculation is 2304 /// booked but not executed. Also see RResultPtr. 2305 /// 2306 /// ### Example usage: 2307 /// ~~~{.cpp} 2308 /// auto myFilledObj = myDf.HistoND({"name","title", 4, 2309 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, 2310 /// {"col0", "col1", "col2", "col3"}); 2311 /// ~~~ 2312 /// 2313 RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList) 2314 { 2315 std::shared_ptr<::THnD> h(nullptr); 2316 { 2317 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2318 h = model.GetHistogram(); 2319 2320 if (int(columnList.size()) == (h->GetNdimensions() + 1)) { 2321 h->Sumw2(); 2322 } else if (int(columnList.size()) != h->GetNdimensions()) { 2323 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); 2324 } 2325 } 2326 return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr, 2327 columnList.size()); 2328 } 2329 2330 //////////////////////////////////////////////////////////////////////////// 2331 /// \brief Fill and return a sparse N-dimensional histogram (*lazy action*). 2332 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not 2333 /// present. 2334 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the 2335 /// object. 2336 /// \param[in] model The returned histogram will be constructed using this as a model. 2337 /// \param[in] columnList 2338 /// A list containing the names of the columns that will be passed when calling `Fill`. 2339 /// (N columns for unweighted filling, or N+1 columns for weighted filling) 2340 /// \return the N-dimensional histogram wrapped in a RResultPtr. 2341 /// 2342 /// This action is *lazy*: upon invocation of this method the calculation is 2343 /// booked but not executed. See RResultPtr documentation. 2344 /// 2345 /// ### Example usage: 2346 /// ~~~{.cpp} 2347 /// auto myFilledObj = myDf.HistoNSparseD<float, float, float, float>({"name","title", 4, 2348 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, 2349 /// {"col0", "col1", "col2", "col3"}); 2350 /// ~~~ 2351 /// 2352 template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads 2353 RResultPtr<::THnSparseD> HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList) 2354 { 2355 std::shared_ptr<::THnSparseD> h(nullptr); 2356 { 2357 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2358 h = model.GetHistogram(); 2359 2360 if (int(columnList.size()) == (h->GetNdimensions() + 1)) { 2361 h->Sumw2(); 2362 } else if (int(columnList.size()) != h->GetNdimensions()) { 2363 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); 2364 } 2365 } 2366 return CreateAction<RDFInternal::ActionTags::HistoNSparseD, FirstColumn, OtherColumns...>(columnList, h, h, 2367 fProxiedPtr); 2368 } 2369 2370 //////////////////////////////////////////////////////////////////////////// 2371 /// \brief Fill and return a sparse N-dimensional histogram (*lazy action*). 2372 /// \param[in] model The returned histogram will be constructed using this as a model. 2373 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` 2374 /// (N columns for unweighted filling, or N+1 columns for weighted filling) 2375 /// \return the N-dimensional histogram wrapped in a RResultPtr. 2376 /// 2377 /// This action is *lazy*: upon invocation of this method the calculation is 2378 /// booked but not executed. Also see RResultPtr. 2379 /// 2380 /// ### Example usage: 2381 /// ~~~{.cpp} 2382 /// auto myFilledObj = myDf.HistoNSparseD({"name","title", 4, 2383 /// {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}}, 2384 /// {"col0", "col1", "col2", "col3"}); 2385 /// ~~~ 2386 /// 2387 RResultPtr<::THnSparseD> HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList) 2388 { 2389 std::shared_ptr<::THnSparseD> h(nullptr); 2390 { 2391 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2392 h = model.GetHistogram(); 2393 2394 if (int(columnList.size()) == (h->GetNdimensions() + 1)) { 2395 h->Sumw2(); 2396 } else if (int(columnList.size()) != h->GetNdimensions()) { 2397 throw std::runtime_error("Wrong number of columns for the specified number of histogram axes."); 2398 } 2399 } 2400 return CreateAction<RDFInternal::ActionTags::HistoNSparseD, RDFDetail::RInferredType>( 2401 columnList, h, h, fProxiedPtr, columnList.size()); 2402 } 2403 2404 //////////////////////////////////////////////////////////////////////////// 2405 /// \brief Fill and return a TGraph object (*lazy action*). 2406 /// \tparam X The type of the column used to fill the x axis. 2407 /// \tparam Y The type of the column used to fill the y axis. 2408 /// \param[in] x The name of the column that will fill the x axis. 2409 /// \param[in] y The name of the column that will fill the y axis. 2410 /// \return the TGraph wrapped in a RResultPtr. 2411 /// 2412 /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph 2413 /// is filled with each one of the elements of the container. 2414 /// If Multithreading is enabled, the order in which points are inserted is undefined. 2415 /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing. 2416 /// A name and a title to the TGraph is given based on the input column names. 2417 /// 2418 /// This action is *lazy*: upon invocation of this method the calculation is 2419 /// booked but not executed. Also see RResultPtr. 2420 /// 2421 /// ### Example usage: 2422 /// ~~~{.cpp} 2423 /// // Deduce column types (this invocation needs jitting internally) 2424 /// auto myGraph1 = myDf.Graph("xValues", "yValues"); 2425 /// // Explicit column types 2426 /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues"); 2427 /// ~~~ 2428 /// 2429 /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory 2430 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2431 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2432 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType> 2433 RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "") 2434 { 2435 auto graph = std::make_shared<::TGraph>(); 2436 const std::vector<std::string_view> columnViews = {x, y}; 2437 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2438 ? ColumnNames_t() 2439 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2440 2441 const auto validatedColumns = GetValidatedColumnNames(2, userColumns); 2442 2443 // We build a default name and title based on the input columns 2444 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0]; 2445 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0]; 2446 graph->SetNameTitle(g_name.c_str(), g_title.c_str()); 2447 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str()); 2448 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str()); 2449 2450 return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr); 2451 } 2452 2453 //////////////////////////////////////////////////////////////////////////// 2454 /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*). 2455 /// \param[in] x The name of the column that will fill the x axis. 2456 /// \param[in] y The name of the column that will fill the y axis. 2457 /// \param[in] exl The name of the column of X low errors 2458 /// \param[in] exh The name of the column of X high errors 2459 /// \param[in] eyl The name of the column of Y low errors 2460 /// \param[in] eyh The name of the column of Y high errors 2461 /// \return the TGraphAsymmErrors wrapped in a RResultPtr. 2462 /// 2463 /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph 2464 /// is filled with each one of the elements of the container. 2465 /// If Multithreading is enabled, the order in which points are inserted is undefined. 2466 /// 2467 /// This action is *lazy*: upon invocation of this method the calculation is 2468 /// booked but not executed. Also see RResultPtr. 2469 /// 2470 /// ### Example usage: 2471 /// ~~~{.cpp} 2472 /// // Deduce column types (this invocation needs jitting internally) 2473 /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh"); 2474 /// // Explicit column types 2475 /// using f = float 2476 /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh"); 2477 /// ~~~ 2478 /// 2479 /// `GraphAsymmErrors` should also be used for the cases in which values associated only with 2480 /// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero. 2481 /// In such cases, user should do the following: 2482 /// ~~~{.cpp} 2483 /// // Create a column of zeros in RDataFrame 2484 /// auto rdf_withzeros = rdf.Define("zero", "0"); 2485 /// // or alternatively: 2486 /// auto rdf_withzeros = rdf.Define("zero", []() -> double { return 0.;}); 2487 /// // Create the graph with y errors only 2488 /// auto rdf_errorsOnYOnly = rdf_withzeros.GraphAsymmErrors("xValues", "yValues", "zero", "zero", "eyl", "eyh"); 2489 /// ~~~ 2490 /// 2491 /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory 2492 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2493 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2494 template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType, 2495 typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType, 2496 typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType> 2497 RResultPtr<::TGraphAsymmErrors> 2498 GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "", 2499 std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "") 2500 { 2501 auto graph = std::make_shared<::TGraphAsymmErrors>(); 2502 const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh}; 2503 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2504 ? ColumnNames_t() 2505 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2506 2507 const auto validatedColumns = GetValidatedColumnNames(6, userColumns); 2508 2509 // We build a default name and title based on the input columns 2510 const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0]; 2511 const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0]; 2512 graph->SetNameTitle(g_name.c_str(), g_title.c_str()); 2513 graph->GetXaxis()->SetTitle(validatedColumns[0].c_str()); 2514 graph->GetYaxis()->SetTitle(validatedColumns[1].c_str()); 2515 2516 return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph, 2517 graph, fProxiedPtr); 2518 } 2519 2520 //////////////////////////////////////////////////////////////////////////// 2521 /// \brief Fill and return a one-dimensional profile (*lazy action*). 2522 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present. 2523 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present. 2524 /// \param[in] model The model to be considered to build the new return value. 2525 /// \param[in] v1Name The name of the column that will fill the x axis. 2526 /// \param[in] v2Name The name of the column that will fill the y axis. 2527 /// \return the monodimensional profile wrapped in a RResultPtr. 2528 /// 2529 /// This action is *lazy*: upon invocation of this method the calculation is 2530 /// booked but not executed. Also see RResultPtr. 2531 /// 2532 /// ### Example usage: 2533 /// ~~~{.cpp} 2534 /// // Deduce column types (this invocation needs jitting internally) 2535 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues"); 2536 /// // Explicit column types 2537 /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues"); 2538 /// ~~~ 2539 /// 2540 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory 2541 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2542 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2543 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType> 2544 RResultPtr<::TProfile> 2545 Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "") 2546 { 2547 std::shared_ptr<::TProfile> h(nullptr); 2548 { 2549 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2550 h = model.GetProfile(); 2551 } 2552 2553 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { 2554 throw std::runtime_error("Profiles with no axes limits are not supported yet."); 2555 } 2556 const std::vector<std::string_view> columnViews = {v1Name, v2Name}; 2557 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2558 ? ColumnNames_t() 2559 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2560 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr); 2561 } 2562 2563 //////////////////////////////////////////////////////////////////////////// 2564 /// \brief Fill and return a one-dimensional profile (*lazy action*). 2565 /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present. 2566 /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present. 2567 /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present. 2568 /// \param[in] model The model to be considered to build the new return value. 2569 /// \param[in] v1Name The name of the column that will fill the x axis. 2570 /// \param[in] v2Name The name of the column that will fill the y axis. 2571 /// \param[in] wName The name of the column that will provide the weights. 2572 /// \return the monodimensional profile wrapped in a RResultPtr. 2573 /// 2574 /// This action is *lazy*: upon invocation of this method the calculation is 2575 /// booked but not executed. Also see RResultPtr. 2576 /// 2577 /// ### Example usage: 2578 /// ~~~{.cpp} 2579 /// // Deduce column types (this invocation needs jitting internally) 2580 /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight"); 2581 /// // Explicit column types 2582 /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.}, 2583 /// "xValues", "yValues", "weight"); 2584 /// ~~~ 2585 /// 2586 /// See the first Profile1D() overload for more details. 2587 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2588 typename W = RDFDetail::RInferredType> 2589 RResultPtr<::TProfile> 2590 Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName) 2591 { 2592 std::shared_ptr<::TProfile> h(nullptr); 2593 { 2594 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2595 h = model.GetProfile(); 2596 } 2597 2598 if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) { 2599 throw std::runtime_error("Profile histograms with no axes limits are not supported yet."); 2600 } 2601 const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName}; 2602 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2603 ? ColumnNames_t() 2604 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2605 return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr); 2606 } 2607 2608 //////////////////////////////////////////////////////////////////////////// 2609 /// \brief Fill and return a one-dimensional profile (*lazy action*). 2610 /// See the first Profile1D() overload for more details. 2611 template <typename V1, typename V2, typename W> 2612 RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model) 2613 { 2614 return Profile1D<V1, V2, W>(model, "", "", ""); 2615 } 2616 2617 //////////////////////////////////////////////////////////////////////////// 2618 /// \brief Fill and return a two-dimensional profile (*lazy action*). 2619 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 2620 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 2621 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 2622 /// \param[in] model The returned profile will be constructed using this as a model. 2623 /// \param[in] v1Name The name of the column that will fill the x axis. 2624 /// \param[in] v2Name The name of the column that will fill the y axis. 2625 /// \param[in] v3Name The name of the column that will fill the z axis. 2626 /// \return the bidimensional profile wrapped in a RResultPtr. 2627 /// 2628 /// This action is *lazy*: upon invocation of this method the calculation is 2629 /// booked but not executed. Also see RResultPtr. 2630 /// 2631 /// ### Example usage: 2632 /// ~~~{.cpp} 2633 /// // Deduce column types (this invocation needs jitting internally) 2634 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2635 /// "xValues", "yValues", "zValues"); 2636 /// // Explicit column types 2637 /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2638 /// "xValues", "yValues", "zValues"); 2639 /// ~~~ 2640 /// 2641 /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory 2642 /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that 2643 /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas). 2644 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2645 typename V3 = RDFDetail::RInferredType> 2646 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "", 2647 std::string_view v2Name = "", std::string_view v3Name = "") 2648 { 2649 std::shared_ptr<::TProfile2D> h(nullptr); 2650 { 2651 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2652 h = model.GetProfile(); 2653 } 2654 2655 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { 2656 throw std::runtime_error("2D profiles with no axes limits are not supported yet."); 2657 } 2658 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name}; 2659 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2660 ? ColumnNames_t() 2661 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2662 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr); 2663 } 2664 2665 //////////////////////////////////////////////////////////////////////////// 2666 /// \brief Fill and return a two-dimensional profile (*lazy action*). 2667 /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present. 2668 /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present. 2669 /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present. 2670 /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present. 2671 /// \param[in] model The returned histogram will be constructed using this as a model. 2672 /// \param[in] v1Name The name of the column that will fill the x axis. 2673 /// \param[in] v2Name The name of the column that will fill the y axis. 2674 /// \param[in] v3Name The name of the column that will fill the z axis. 2675 /// \param[in] wName The name of the column that will provide the weights. 2676 /// \return the bidimensional profile wrapped in a RResultPtr. 2677 /// 2678 /// This action is *lazy*: upon invocation of this method the calculation is 2679 /// booked but not executed. Also see RResultPtr. 2680 /// 2681 /// ### Example usage: 2682 /// ~~~{.cpp} 2683 /// // Deduce column types (this invocation needs jitting internally) 2684 /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2685 /// "xValues", "yValues", "zValues", "weight"); 2686 /// // Explicit column types 2687 /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20}, 2688 /// "xValues", "yValues", "zValues", "weight"); 2689 /// ~~~ 2690 /// 2691 /// See the first Profile2D() overload for more details. 2692 template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType, 2693 typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 2694 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name, 2695 std::string_view v3Name, std::string_view wName) 2696 { 2697 std::shared_ptr<::TProfile2D> h(nullptr); 2698 { 2699 ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError); 2700 h = model.GetProfile(); 2701 } 2702 2703 if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) { 2704 throw std::runtime_error("2D profiles with no axes limits are not supported yet."); 2705 } 2706 const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName}; 2707 const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews) 2708 ? ColumnNames_t() 2709 : ColumnNames_t(columnViews.begin(), columnViews.end()); 2710 return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr); 2711 } 2712 2713 /// \brief Fill and return a two-dimensional profile (*lazy action*). 2714 /// See the first Profile2D() overload for more details. 2715 template <typename V1, typename V2, typename V3, typename W> 2716 RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model) 2717 { 2718 return Profile2D<V1, V2, V3, W>(model, "", "", "", ""); 2719 } 2720 2721 //////////////////////////////////////////////////////////////////////////// 2722 /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*). 2723 /// 2724 /// Type T must provide at least: 2725 /// - a copy-constructor 2726 /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList 2727 /// (these types can also be passed as template parameters to this method) 2728 /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the 2729 /// objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that 2730 /// if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in 2731 /// the TCollection*). 2732 /// 2733 /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present. 2734 /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object. 2735 /// \tparam T The type of the object to fill. Automatically deduced. 2736 /// \param[in] model The model to be considered to build the new return value. 2737 /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill` 2738 /// \return the filled object wrapped in a RResultPtr. 2739 /// 2740 /// The user gives up ownership of the model object. 2741 /// The list of column names to be used for filling must always be specified. 2742 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. 2743 /// Also see RResultPtr. 2744 /// 2745 /// ### Example usage: 2746 /// ~~~{.cpp} 2747 /// MyClass obj; 2748 /// // Deduce column types (this invocation needs jitting internally, and in this case 2749 /// // MyClass needs to be known to the interpreter) 2750 /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"}); 2751 /// // explicit column types 2752 /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"}); 2753 /// ~~~ 2754 /// 2755 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T> 2756 RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList) 2757 { 2758 auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model)); 2759 if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) { 2760 throw std::runtime_error("The absence of axes limits is not supported yet."); 2761 } 2762 return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr, 2763 columnList.size()); 2764 } 2765 2766 //////////////////////////////////////////////////////////////////////////// 2767 /// \brief Return a TStatistic object, filled once per event (*lazy action*). 2768 /// 2769 /// \tparam V The type of the value column 2770 /// \param[in] value The name of the column with the values to fill the statistics with. 2771 /// \return the filled TStatistic object wrapped in a RResultPtr. 2772 /// 2773 /// ### Example usage: 2774 /// ~~~{.cpp} 2775 /// // Deduce column type (this invocation needs jitting internally) 2776 /// auto stats0 = myDf.Stats("values"); 2777 /// // Explicit column type 2778 /// auto stats1 = myDf.Stats<float>("values"); 2779 /// ~~~ 2780 /// 2781 template <typename V = RDFDetail::RInferredType> 2782 RResultPtr<TStatistic> Stats(std::string_view value = "") 2783 { 2784 ColumnNames_t columns; 2785 if (!value.empty()) { 2786 columns.emplace_back(std::string(value)); 2787 } 2788 const auto validColumnNames = GetValidatedColumnNames(1, columns); 2789 if (std::is_same<V, RDFDetail::RInferredType>::value) { 2790 return Fill(TStatistic(), validColumnNames); 2791 } else { 2792 return Fill<V>(TStatistic(), validColumnNames); 2793 } 2794 } 2795 2796 //////////////////////////////////////////////////////////////////////////// 2797 /// \brief Return a TStatistic object, filled once per event (*lazy action*). 2798 /// 2799 /// \tparam V The type of the value column 2800 /// \tparam W The type of the weight column 2801 /// \param[in] value The name of the column with the values to fill the statistics with. 2802 /// \param[in] weight The name of the column with the weights to fill the statistics with. 2803 /// \return the filled TStatistic object wrapped in a RResultPtr. 2804 /// 2805 /// ### Example usage: 2806 /// ~~~{.cpp} 2807 /// // Deduce column types (this invocation needs jitting internally) 2808 /// auto stats0 = myDf.Stats("values", "weights"); 2809 /// // Explicit column types 2810 /// auto stats1 = myDf.Stats<int, float>("values", "weights"); 2811 /// ~~~ 2812 /// 2813 template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType> 2814 RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight) 2815 { 2816 ColumnNames_t columns{std::string(value), std::string(weight)}; 2817 constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value; 2818 constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value; 2819 const auto validColumnNames = GetValidatedColumnNames(2, columns); 2820 // We have 3 cases: 2821 // 1. Both types are inferred: we use Fill and let the jit kick in. 2822 // 2. One of the two types is explicit and the other one is inferred: the case is not supported. 2823 // 3. Both types are explicit: we invoke the fully compiled Fill method. 2824 if (vIsInferred && wIsInferred) { 2825 return Fill(TStatistic(), validColumnNames); 2826 } else if (vIsInferred != wIsInferred) { 2827 std::string error("The "); 2828 error += vIsInferred ? "value " : "weight "; 2829 error += "column type is explicit, while the "; 2830 error += vIsInferred ? "weight " : "value "; 2831 error += " is specified to be inferred. This case is not supported: please specify both types or none."; 2832 throw std::runtime_error(error); 2833 } else { 2834 return Fill<V, W>(TStatistic(), validColumnNames); 2835 } 2836 } 2837 2838 //////////////////////////////////////////////////////////////////////////// 2839 /// \brief Return the minimum of processed column values (*lazy action*). 2840 /// \tparam T The type of the branch/column. 2841 /// \param[in] columnName The name of the branch/column to be treated. 2842 /// \return the minimum value of the selected column wrapped in a RResultPtr. 2843 /// 2844 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2845 /// template specialization of this method. 2846 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. 2847 /// 2848 /// This action is *lazy*: upon invocation of this method the calculation is 2849 /// booked but not executed. Also see RResultPtr. 2850 /// 2851 /// ### Example usage: 2852 /// ~~~{.cpp} 2853 /// // Deduce column type (this invocation needs jitting internally) 2854 /// auto minVal0 = myDf.Min("values"); 2855 /// // Explicit column type 2856 /// auto minVal1 = myDf.Min<double>("values"); 2857 /// ~~~ 2858 /// 2859 template <typename T = RDFDetail::RInferredType> 2860 RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "") 2861 { 2862 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2863 using RetType_t = RDFDetail::MinReturnType_t<T>; 2864 auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max()); 2865 return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr); 2866 } 2867 2868 //////////////////////////////////////////////////////////////////////////// 2869 /// \brief Return the maximum of processed column values (*lazy action*). 2870 /// \tparam T The type of the branch/column. 2871 /// \param[in] columnName The name of the branch/column to be treated. 2872 /// \return the maximum value of the selected column wrapped in a RResultPtr. 2873 /// 2874 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2875 /// template specialization of this method. 2876 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. 2877 /// 2878 /// This action is *lazy*: upon invocation of this method the calculation is 2879 /// booked but not executed. Also see RResultPtr. 2880 /// 2881 /// ### Example usage: 2882 /// ~~~{.cpp} 2883 /// // Deduce column type (this invocation needs jitting internally) 2884 /// auto maxVal0 = myDf.Max("values"); 2885 /// // Explicit column type 2886 /// auto maxVal1 = myDf.Max<double>("values"); 2887 /// ~~~ 2888 /// 2889 template <typename T = RDFDetail::RInferredType> 2890 RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "") 2891 { 2892 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2893 using RetType_t = RDFDetail::MaxReturnType_t<T>; 2894 auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest()); 2895 return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr); 2896 } 2897 2898 //////////////////////////////////////////////////////////////////////////// 2899 /// \brief Return the mean of processed column values (*lazy action*). 2900 /// \tparam T The type of the branch/column. 2901 /// \param[in] columnName The name of the branch/column to be treated. 2902 /// \return the mean value of the selected column wrapped in a RResultPtr. 2903 /// 2904 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2905 /// template specialization of this method. 2906 /// Note that internally, the summations are executed with Kahan sums in double precision, irrespective 2907 /// of the type of column that is read. 2908 /// 2909 /// This action is *lazy*: upon invocation of this method the calculation is 2910 /// booked but not executed. Also see RResultPtr. 2911 /// 2912 /// ### Example usage: 2913 /// ~~~{.cpp} 2914 /// // Deduce column type (this invocation needs jitting internally) 2915 /// auto meanVal0 = myDf.Mean("values"); 2916 /// // Explicit column type 2917 /// auto meanVal1 = myDf.Mean<double>("values"); 2918 /// ~~~ 2919 /// 2920 template <typename T = RDFDetail::RInferredType> 2921 RResultPtr<double> Mean(std::string_view columnName = "") 2922 { 2923 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2924 auto meanV = std::make_shared<double>(0); 2925 return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr); 2926 } 2927 2928 //////////////////////////////////////////////////////////////////////////// 2929 /// \brief Return the unbiased standard deviation of processed column values (*lazy action*). 2930 /// \tparam T The type of the branch/column. 2931 /// \param[in] columnName The name of the branch/column to be treated. 2932 /// \return the standard deviation value of the selected column wrapped in a RResultPtr. 2933 /// 2934 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2935 /// template specialization of this method. 2936 /// 2937 /// This action is *lazy*: upon invocation of this method the calculation is 2938 /// booked but not executed. Also see RResultPtr. 2939 /// 2940 /// ### Example usage: 2941 /// ~~~{.cpp} 2942 /// // Deduce column type (this invocation needs jitting internally) 2943 /// auto stdDev0 = myDf.StdDev("values"); 2944 /// // Explicit column type 2945 /// auto stdDev1 = myDf.StdDev<double>("values"); 2946 /// ~~~ 2947 /// 2948 template <typename T = RDFDetail::RInferredType> 2949 RResultPtr<double> StdDev(std::string_view columnName = "") 2950 { 2951 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2952 auto stdDeviationV = std::make_shared<double>(0); 2953 return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr); 2954 } 2955 2956 // clang-format off 2957 //////////////////////////////////////////////////////////////////////////// 2958 /// \brief Return the sum of processed column values (*lazy action*). 2959 /// \tparam T The type of the branch/column. 2960 /// \param[in] columnName The name of the branch/column. 2961 /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible. 2962 /// \return the sum of the selected column wrapped in a RResultPtr. 2963 /// 2964 /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct 2965 /// template specialization of this method. 2966 /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise. 2967 /// 2968 /// This action is *lazy*: upon invocation of this method the calculation is 2969 /// booked but not executed. Also see RResultPtr. 2970 /// 2971 /// ### Example usage: 2972 /// ~~~{.cpp} 2973 /// // Deduce column type (this invocation needs jitting internally) 2974 /// auto sum0 = myDf.Sum("values"); 2975 /// // Explicit column type 2976 /// auto sum1 = myDf.Sum<double>("values"); 2977 /// ~~~ 2978 /// 2979 template <typename T = RDFDetail::RInferredType> 2980 RResultPtr<RDFDetail::SumReturnType_t<T>> 2981 Sum(std::string_view columnName = "", 2982 const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{}) 2983 { 2984 const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 2985 auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue); 2986 return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr); 2987 } 2988 // clang-format on 2989 2990 //////////////////////////////////////////////////////////////////////////// 2991 /// \brief Gather filtering statistics. 2992 /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr. 2993 /// 2994 /// Calling `Report` on the main `RDataFrame` object gathers stats for 2995 /// all named filters in the call graph. Calling this method on a 2996 /// stored chain state (i.e. a graph node different from the first) gathers 2997 /// the stats for all named filters in the chain section between the original 2998 /// `RDataFrame` and that node (included). Stats are gathered in the same 2999 /// order as the named filters have been added to the graph. 3000 /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the 3001 /// effects cuts had. 3002 /// 3003 /// This action is *lazy*: upon invocation of 3004 /// this method the calculation is booked but not executed. See RResultPtr 3005 /// documentation. 3006 /// 3007 /// ### Example usage: 3008 /// ~~~{.cpp} 3009 /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2"); 3010 /// auto cutReport = filtered3.Report(); 3011 /// cutReport->Print(); 3012 /// ~~~ 3013 /// 3014 RResultPtr<RCutFlowReport> Report() 3015 { 3016 bool returnEmptyReport = false; 3017 // if this is a RInterface<RLoopManager> on which `Define` has been called, users 3018 // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which 3019 // certainly does not contain named filters. 3020 // The number 4 takes into account the implicit columns for entry and slot number 3021 // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_) 3022 if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4) 3023 returnEmptyReport = true; 3024 3025 auto rep = std::make_shared<RCutFlowReport>(); 3026 using Helper_t = RDFInternal::ReportHelper<Proxied>; 3027 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 3028 3029 auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}), 3030 fProxiedPtr, RDFInternal::RColumnRegister(fColRegister)); 3031 3032 return MakeResultPtr(rep, *fLoopManager, std::move(action)); 3033 } 3034 3035 /// \brief Returns the names of the filters created. 3036 /// \return the container of filters names. 3037 /// 3038 /// If called on a root node, all the filters in the computation graph will 3039 /// be printed. For any other node, only the filters upstream of that node. 3040 /// Filters without a name are printed as "Unnamed Filter" 3041 /// This is not an action nor a transformation, just a query to the RDataFrame object. 3042 /// 3043 /// ### Example usage: 3044 /// ~~~{.cpp} 3045 /// auto filtNames = d.GetFilterNames(); 3046 /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl; 3047 /// ~~~ 3048 /// 3049 std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); } 3050 3051 // clang-format off 3052 //////////////////////////////////////////////////////////////////////////// 3053 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot. 3054 /// \tparam F The type of the aggregator callable. Automatically deduced. 3055 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced. 3056 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 3057 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable 3058 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread 3059 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead. 3060 /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted) 3061 /// \return the result of the aggregation wrapped in a RResultPtr. 3062 /// 3063 /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is 3064 /// initialized to aggIdentity or default-constructed if aggIdentity is omitted. 3065 /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and 3066 /// the value of the column columnName. 3067 /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable. 3068 /// Otherwise the signature of aggregator must be `void(U&,T)`. 3069 /// 3070 /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions. 3071 /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two. 3072 /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0]. 3073 /// 3074 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr. 3075 /// 3076 /// Example usage: 3077 /// ~~~{.cpp} 3078 /// auto aggregator = [](double acc, double x) { return acc * x; }; 3079 /// ROOT::EnableImplicitMT(); 3080 /// // If multithread is enabled, the aggregator function will be called by more threads 3081 /// // and will produce a vector of partial accumulators. 3082 /// // The merger function performs the final aggregation of these partial results. 3083 /// auto merger = [](std::vector<double> &accumulators) { 3084 /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) { 3085 /// accumulators[0] *= accumulators[i]; 3086 /// } 3087 /// }; 3088 /// 3089 /// // The accumulator is initialized at this value by every thread. 3090 /// double initValue = 1.; 3091 /// 3092 /// // Multiplies all elements of the column "x" 3093 /// auto result = d.Aggregate(aggregator, merger, "x", initValue); 3094 /// ~~~ 3095 // clang-format on 3096 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type, 3097 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, 3098 typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay, 3099 typename U = TTraits::TakeFirstParameter_t<ArgTypes>, 3100 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>> 3101 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity) 3102 { 3103 RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay()); 3104 const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)}); 3105 3106 const auto validColumnNames = GetValidatedColumnNames(1, columns); 3107 CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>()); 3108 3109 auto accObjPtr = std::make_shared<U>(aggIdentity); 3110 using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>; 3111 using Action_t = RDFInternal::RAction<Helper_t, Proxied>; 3112 auto action = std::make_unique<Action_t>( 3113 Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames, 3114 fProxiedPtr, fColRegister); 3115 return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action)); 3116 } 3117 3118 // clang-format off 3119 //////////////////////////////////////////////////////////////////////////// 3120 /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot. 3121 /// \tparam F The type of the aggregator callable. Automatically deduced. 3122 /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced. 3123 /// \tparam T The type of the column to apply the reduction to. Automatically deduced. 3124 /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable 3125 /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread 3126 /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead. 3127 /// \return the result of the aggregation wrapped in a RResultPtr. 3128 /// 3129 /// See previous Aggregate overload for more information. 3130 // clang-format on 3131 template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type, 3132 typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types, 3133 typename U = TTraits::TakeFirstParameter_t<ArgTypes>, 3134 typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>> 3135 RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "") 3136 { 3137 static_assert( 3138 std::is_default_constructible<U>::value, 3139 "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)"); 3140 return Aggregate(std::move(aggregator), std::move(merger), columnName, U()); 3141 } 3142 3143 // clang-format off 3144 //////////////////////////////////////////////////////////////////////////// 3145 /// \brief Book execution of a custom action using a user-defined helper object. 3146 /// \tparam FirstColumn The type of the first column used by this action. Inferred together with OtherColumns if not present. 3147 /// \tparam OtherColumns A list of the types of the other columns used by this action 3148 /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose. 3149 /// \param[in] helper The Action Helper to be scheduled. 3150 /// \param[in] columns The names of the columns on which the helper acts. 3151 /// \return the result of the helper wrapped in a RResultPtr. 3152 /// 3153 /// This method books a custom action for execution. The behavior of the action is completely dependent on the 3154 /// Helper object provided by the caller. The required interface for the helper is described below (more 3155 /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful): 3156 /// 3157 /// ### Mandatory interface 3158 /// 3159 /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>` 3160 /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible. 3161 /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged. 3162 /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type 3163 /// Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called 3164 /// _before_ Initialize(), because the RResultPtr is constructed before the event loop is started. 3165 /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations. 3166 /// It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper, 3167 /// or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations). 3168 /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event 3169 /// loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader 3170 /// that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations 3171 /// it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers. 3172 /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method 3173 /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value: 3174 /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of 3175 /// the requested columns for the particular entry being processed. 3176 /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result. 3177 /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in 3178 /// diagnostics, SaveGraph(), etc. 3179 /// 3180 /// ### Optional methods 3181 /// 3182 /// If these methods are implemented they enable extra functionality as per the description below. 3183 /// 3184 /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'. 3185 /// Different threads might call this method concurrently, but will do so with different 'slot' numbers. 3186 /// RDataFrame leverages this method to implement RResultPtr::OnPartialResult(). 3187 /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the 3188 /// appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing 3189 /// of every sample, as in DefinePerSample(). 3190 /// * `Helper MakeNew(void *newResult, std::string_view variation = "nominal")`: if implemented, it enables varying 3191 /// the action's result with VariationsFor(). It takes a type-erased new result that can be safely cast to a 3192 /// `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should be used as the action's output result. 3193 /// The function optionally takes the name of the current variation which could be useful in customizing its behaviour. 3194 /// 3195 /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled 3196 /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter. 3197 /// 3198 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr. 3199 /// 3200 /// ### Examples 3201 /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper. 3202 /// 3203 /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx. 3204 /// 3205 // clang-format on 3206 template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper> 3207 RResultPtr<typename std::decay_t<Helper>::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {}) 3208 { 3209 using HelperT = std::decay_t<Helper>; 3210 // TODO add more static sanity checks on Helper 3211 using AH = RDFDetail::RActionImpl<HelperT>; 3212 static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value, 3213 "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>"); 3214 3215 auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper)); 3216 auto resPtr = hPtr->GetResultPtr(); 3217 3218 if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) { 3219 return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{}); 3220 } else { 3221 return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr, 3222 fProxiedPtr, columns.size()); 3223 } 3224 } 3225 3226 //////////////////////////////////////////////////////////////////////////// 3227 /// \brief Provides a representation of the columns in the dataset. 3228 /// \tparam ColumnTypes variadic list of branch/column types. 3229 /// \param[in] columnList Names of the columns to be displayed. 3230 /// \param[in] nRows Number of events for each column to be displayed. 3231 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. 3232 /// \return the `RDisplay` instance wrapped in a RResultPtr. 3233 /// 3234 /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular 3235 /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will 3236 /// return a complete version through `RDisplay::AsString()`. 3237 /// 3238 /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see 3239 /// RResultPtr. 3240 /// 3241 /// Example usage: 3242 /// ~~~{.cpp} 3243 /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries 3244 /// auto d1 = rdf.Display(""); 3245 /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries 3246 /// auto d2 = d.Display({"x", "y"}, 128); 3247 /// // Printing the short representations, the event loop will run 3248 /// d1->Print(); 3249 /// d2->Print(); 3250 /// ~~~ 3251 template <typename... ColumnTypes> 3252 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) 3253 { 3254 CheckIMTDisabled("Display"); 3255 auto newCols = columnList; 3256 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column 3257 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements); 3258 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>; 3259 // Need to add ULong64_t type corresponding to the first column rdfentry_ 3260 return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>( 3261 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr); 3262 } 3263 3264 //////////////////////////////////////////////////////////////////////////// 3265 /// \brief Provides a representation of the columns in the dataset. 3266 /// \param[in] columnList Names of the columns to be displayed. 3267 /// \param[in] nRows Number of events for each column to be displayed. 3268 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. 3269 /// \return the `RDisplay` instance wrapped in a RResultPtr. 3270 /// 3271 /// This overload automatically infers the column types. 3272 /// See the previous overloads for further details. 3273 /// 3274 /// Invoked when no types are specified to Display 3275 RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) 3276 { 3277 CheckIMTDisabled("Display"); 3278 auto newCols = columnList; 3279 newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column 3280 auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements); 3281 using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>; 3282 return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>( 3283 std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr, 3284 columnList.size() + 1); 3285 } 3286 3287 //////////////////////////////////////////////////////////////////////////// 3288 /// \brief Provides a representation of the columns in the dataset. 3289 /// \param[in] columnNameRegexp A regular expression to select the columns. 3290 /// \param[in] nRows Number of events for each column to be displayed. 3291 /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row. 3292 /// \return the `RDisplay` instance wrapped in a RResultPtr. 3293 /// 3294 /// The existing columns are matched against the regular expression. If the string provided 3295 /// is empty, all columns are selected. 3296 /// See the previous overloads for further details. 3297 RResultPtr<RDisplay> 3298 Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10) 3299 { 3300 const auto columnNames = GetColumnNames(); 3301 const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display"); 3302 return Display(selectedColumns, nRows, nMaxCollectionElements); 3303 } 3304 3305 //////////////////////////////////////////////////////////////////////////// 3306 /// \brief Provides a representation of the columns in the dataset. 3307 /// \param[in] columnList Names of the columns to be displayed. 3308 /// \param[in] nRows Number of events for each column to be displayed. 3309 /// \param[in] nMaxCollectionElements Number of maximum elements in collection. 3310 /// \return the `RDisplay` instance wrapped in a RResultPtr. 3311 /// 3312 /// See the previous overloads for further details. 3313 RResultPtr<RDisplay> 3314 Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10) 3315 { 3316 ColumnNames_t selectedColumns(columnList); 3317 return Display(selectedColumns, nRows, nMaxCollectionElements); 3318 } 3319 3320 private: 3321 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type> 3322 std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>> 3323 DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where) 3324 { 3325 if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine 3326 RDFInternal::CheckValidCppVarName(name, where); 3327 RDFInternal::CheckForRedefinition(where, name, fColRegister, 3328 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 3329 } else { 3330 RDFInternal::CheckForDefinition(where, name, fColRegister, 3331 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 3332 RDFInternal::CheckForNoVariations(where, name, fColRegister); 3333 } 3334 3335 using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types; 3336 using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf< 3337 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type; 3338 using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf< 3339 std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type; 3340 3341 constexpr auto nColumns = ColTypes_t::list_size; 3342 3343 const auto validColumnNames = GetValidatedColumnNames(nColumns, columns); 3344 CheckAndFillDSColumns(validColumnNames, ColTypes_t()); 3345 3346 // Declare return type to the interpreter, for future use by jitted actions 3347 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType)); 3348 if (retTypeName.empty()) { 3349 // The type is not known to the interpreter. 3350 // We must not error out here, but if/when this column is used in jitted code 3351 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType)); 3352 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; 3353 } 3354 3355 using NewCol_t = RDFDetail::RDefine<F, DefineType>; 3356 auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames, 3357 fColRegister, *fLoopManager); 3358 3359 RDFInternal::RColumnRegister newCols(fColRegister); 3360 newCols.AddDefine(std::move(newColumn)); 3361 3362 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 3363 3364 return newInterface; 3365 } 3366 3367 // This overload is chosen when the callable passed to Define or DefineSlot returns void. 3368 // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because 3369 // this way compilation of `Define` has no way to continue after throwing the error. 3370 template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type, 3371 bool IsFStringConv = std::is_convertible<F, std::string>::value, 3372 bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value> 3373 std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>> 3374 DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &) 3375 { 3376 static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value, 3377 "Error in `Define`: type returned by expression is not default-constructible"); 3378 return *this; // never reached 3379 } 3380 3381 //////////////////////////////////////////////////////////////////////////// 3382 /// \brief Implementation of cache. 3383 template <typename... ColTypes, std::size_t... S> 3384 RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>) 3385 { 3386 const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot"); 3387 3388 // Check at compile time that the columns types are copy constructible 3389 constexpr bool areCopyConstructible = 3390 RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value; 3391 static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet."); 3392 3393 RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size()); 3394 3395 auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...); 3396 auto ds = std::make_unique<RLazyDS<ColTypes...>>( 3397 std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...); 3398 3399 RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns)); 3400 3401 return cachedRDF; 3402 } 3403 3404 template <bool IsSingleColumn, typename F> 3405 RInterface<Proxied, DS_t> 3406 VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns, 3407 const std::vector<std::string> &variationTags, std::string_view variationName) 3408 { 3409 using F_t = std::decay_t<F>; 3410 using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types; 3411 using RetType = typename TTraits::CallableTraits<F_t>::ret_type; 3412 constexpr auto nColumns = ColTypes_t::list_size; 3413 3414 SanityChecksForVary<RetType>(colNames, variationTags, variationName); 3415 3416 const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns); 3417 CheckAndFillDSColumns(validColumnNames, ColTypes_t{}); 3418 3419 auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType)); 3420 if (retTypeName.empty()) { 3421 // The type is not known to the interpreter, but we don't want to error out 3422 // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name. 3423 const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType)); 3424 retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType; 3425 } 3426 3427 auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>( 3428 colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager, 3429 validColumnNames); 3430 3431 RDFInternal::RColumnRegister newCols(fColRegister); 3432 newCols.AddVariation(std::move(variation)); 3433 3434 RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols)); 3435 3436 return newInterface; 3437 } 3438 3439 RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression, 3440 const std::vector<std::string> &variationTags, 3441 std::string_view variationName, bool isSingleColumn) 3442 { 3443 R__ASSERT(!variationTags.empty() && "Must have at least one variation."); 3444 R__ASSERT(!colNames.empty() && "Must have at least one varied column."); 3445 R__ASSERT(!variationName.empty() && "Must provide a variation name."); 3446 3447 for (auto &colName : colNames) { 3448 RDFInternal::CheckValidCppVarName(colName, "Vary"); 3449 RDFInternal::CheckForDefinition("Vary", colName, fColRegister, 3450 GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{}); 3451 } 3452 RDFInternal::CheckValidCppVarName(variationName, "Vary"); 3453 3454 // when varying multiple columns, they must be different columns 3455 if (colNames.size() > 1) { 3456 std::set<std::string> uniqueCols(colNames.begin(), colNames.end()); 3457 if (uniqueCols.size() != colNames.size()) 3458 throw std::logic_error("A column name was passed to the same Vary invocation multiple times."); 3459 } 3460 3461 auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr)); 3462 auto jittedVariation = 3463 RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, 3464 GetDataSource(), fColRegister, upcastNodeOnHeap, isSingleColumn); 3465 3466 RDFInternal::RColumnRegister newColRegister(fColRegister); 3467 newColRegister.AddVariation(std::move(jittedVariation)); 3468 3469 RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister)); 3470 3471 return newInterface; 3472 } 3473 3474 template <typename Helper, typename ActionResultType> 3475 auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr, 3476 const std::shared_ptr<Helper> &hPtr, 3477 TTraits::TypeList<RDFDetail::RInferredType>) 3478 -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{}) 3479 { 3480 return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u); 3481 } 3482 3483 template <typename Helper, typename ActionResultType, typename... Others> 3484 RResultPtr<ActionResultType> 3485 CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &, 3486 const std::shared_ptr<Helper>& /*hPtr*/, 3487 Others...) 3488 { 3489 throw std::logic_error(std::string("An action was booked with no input columns, but the action requires " 3490 "columns! The action helper type was ") + 3491 typeid(Helper).name()); 3492 return {}; 3493 } 3494 3495 protected: 3496 RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm, 3497 const RDFInternal::RColumnRegister &colRegister) 3498 : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied) 3499 { 3500 } 3501 3502 const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; } 3503 }; 3504 3505 } // namespace RDF 3506 3507 } // namespace ROOT 3508 3509 #endif // ROOT_RDF_INTERFACE
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|