Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:34

0001 // Author: Enrico Guiraud, Danilo Piparo CERN  03/2017
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #ifndef ROOT_RDF_TINTERFACE
0012 #define ROOT_RDF_TINTERFACE
0013 
0014 #include "ROOT/RDataSource.hxx"
0015 #include "ROOT/RDF/ActionHelpers.hxx"
0016 #include "ROOT/RDF/HistoModels.hxx"
0017 #include "ROOT/RDF/InterfaceUtils.hxx"
0018 #include "ROOT/RDF/RColumnRegister.hxx"
0019 #include "ROOT/RDF/RDefine.hxx"
0020 #include "ROOT/RDF/RDefinePerSample.hxx"
0021 #include "ROOT/RDF/RFilter.hxx"
0022 #include "ROOT/RDF/RInterfaceBase.hxx"
0023 #include "ROOT/RDF/RVariation.hxx"
0024 #include "ROOT/RDF/RLazyDSImpl.hxx"
0025 #include "ROOT/RDF/RLoopManager.hxx"
0026 #include "ROOT/RDF/RRange.hxx"
0027 #include "ROOT/RDF/Utils.hxx"
0028 #include "ROOT/RDF/RDFDescription.hxx"
0029 #include "ROOT/RDF/RVariationsDescription.hxx"
0030 #include "ROOT/RResultPtr.hxx"
0031 #include "ROOT/RSnapshotOptions.hxx"
0032 #include <string_view>
0033 #include "ROOT/RVec.hxx"
0034 #include "ROOT/TypeTraits.hxx"
0035 #include "RtypesCore.h" // for ULong64_t
0036 #include "TDirectory.h"
0037 #include "TH1.h" // For Histo actions
0038 #include "TH2.h" // For Histo actions
0039 #include "TH3.h" // For Histo actions
0040 #include "THn.h"
0041 #include "TProfile.h"
0042 #include "TProfile2D.h"
0043 #include "TStatistic.h"
0044 
0045 #include <algorithm>
0046 #include <cstddef>
0047 #include <initializer_list>
0048 #include <iterator> // std::back_insterter
0049 #include <limits>
0050 #include <memory>
0051 #include <set>
0052 #include <sstream>
0053 #include <stdexcept>
0054 #include <string>
0055 #include <type_traits> // is_same, enable_if
0056 #include <typeinfo>
0057 #include <unordered_set>
0058 #include <utility> // std::index_sequence
0059 #include <vector>
0060 
0061 class TGraph;
0062 
0063 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
0064 namespace ROOT {
0065 void DisableImplicitMT();
0066 bool IsImplicitMTEnabled();
0067 void EnableImplicitMT(UInt_t numthreads);
0068 class RDataFrame;
0069 } // namespace ROOT
0070 namespace cling {
0071 std::string printValue(ROOT::RDataFrame *tdf);
0072 }
0073 
0074 namespace ROOT {
0075 namespace RDF {
0076 namespace RDFDetail = ROOT::Detail::RDF;
0077 namespace RDFInternal = ROOT::Internal::RDF;
0078 namespace TTraits = ROOT::TypeTraits;
0079 
0080 template <typename Proxied, typename DataSource>
0081 class RInterface;
0082 
0083 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
0084 } // namespace RDF
0085 
0086 namespace Internal {
0087 namespace RDF {
0088 class GraphCreatorHelper;
0089 void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
0090 void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
0091 void TriggerRun(ROOT::RDF::RNode node);
0092 } // namespace RDF
0093 } // namespace Internal
0094 
0095 namespace RDF {
0096 
0097 // clang-format off
0098 /**
0099  * \class ROOT::RDF::RInterface
0100  * \ingroup dataframe
0101  * \brief The public interface to the RDataFrame federation of classes.
0102  * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
0103  * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
0104  *
0105  * The documentation of each method features a one liner illustrating how to use the method, for example showing how
0106  * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
0107  */
0108 // clang-format on
0109 template <typename Proxied, typename DataSource = void>
0110 class RInterface : public RInterfaceBase {
0111    using DS_t = DataSource;
0112    using RFilterBase = RDFDetail::RFilterBase;
0113    using RRangeBase = RDFDetail::RRangeBase;
0114    using RLoopManager = RDFDetail::RLoopManager;
0115    friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
0116    friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
0117 
0118    template <typename T, typename W>
0119    friend class RInterface;
0120 
0121    friend void RDFInternal::TriggerRun(RNode node);
0122    friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
0123    friend void RDFInternal::ChangeSpec(const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
0124 
0125    std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
0126 
0127 public:
0128    ////////////////////////////////////////////////////////////////////////////
0129    /// \brief Copy-assignment operator for RInterface.
0130    RInterface &operator=(const RInterface &) = default;
0131 
0132    ////////////////////////////////////////////////////////////////////////////
0133    /// \brief Copy-ctor for RInterface.
0134    RInterface(const RInterface &) = default;
0135 
0136    ////////////////////////////////////////////////////////////////////////////
0137    /// \brief Move-ctor for RInterface.
0138    RInterface(RInterface &&) = default;
0139 
0140    ////////////////////////////////////////////////////////////////////////////
0141    /// \brief Move-assignment operator for RInterface.
0142    RInterface &operator=(RInterface &&) = default;
0143 
0144    ////////////////////////////////////////////////////////////////////////////
0145    /// \brief Build a RInterface from a RLoopManager.
0146    /// This constructor is only available for RInterface<RLoopManager>.
0147    template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
0148    RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
0149    {
0150    }
0151 
0152    ////////////////////////////////////////////////////////////////////////////
0153    /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
0154    /// Different RDataFrame methods return different C++ types. All nodes, however,
0155    /// can be cast to this common type at the cost of a small performance penalty.
0156    /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
0157    /// around via (non-template, C++11) helper functions.
0158    /// Example usage:
0159    /// ~~~{.cpp}
0160    /// // a function that conditionally adds a Range to a RDataFrame node.
0161    /// RNode MaybeAddRange(RNode df, bool mustAddRange)
0162    /// {
0163    ///    return mustAddRange ? df.Range(1) : df;
0164    /// }
0165    /// // use as :
0166    /// ROOT::RDataFrame df(10);
0167    /// auto maybeRanged = MaybeAddRange(df, true);
0168    /// ~~~
0169    /// Note that it is not a problem to pass RNode's by value.
0170    operator RNode() const
0171    {
0172       return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
0173    }
0174 
0175    ////////////////////////////////////////////////////////////////////////////
0176    /// \brief Append a filter to the call graph.
0177    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0178    /// signalling whether the event has passed the selection (true) or not (false).
0179    /// \param[in] columns Names of the columns/branches in input to the filter function.
0180    /// \param[in] name Optional name of this filter. See `Report`.
0181    /// \return the filter node of the computation graph.
0182    ///
0183    /// Append a filter node at the point of the call graph corresponding to the
0184    /// object this method is called on.
0185    /// The callable `f` should not have side-effects (e.g. modification of an
0186    /// external or static variable) to ensure correct results when implicit
0187    /// multi-threading is active.
0188    ///
0189    /// RDataFrame only evaluates filters when necessary: if multiple filters
0190    /// are chained one after another, they are executed in order and the first
0191    /// one returning false causes the event to be discarded.
0192    /// Even if multiple actions or transformations depend on the same filter,
0193    /// it is executed once per entry. If its result is requested more than
0194    /// once, the cached result is served.
0195    ///
0196    /// ### Example usage:
0197    /// ~~~{.cpp}
0198    /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
0199    /// auto filtered = df.Filter(myCut, {"x", "y"});
0200    ///
0201    /// // String: it must contain valid C++ except that column names can be used instead of variable names
0202    /// auto filtered = df.Filter("x*y > 0");
0203    /// ~~~
0204    ///
0205    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0206    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0207    /// ~~~{.cpp}
0208    /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0209    /// ~~~
0210    /// but instead this will:
0211    /// ~~~{.cpp}
0212    /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0213    /// ~~~
0214    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0215    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t>
0216    Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
0217    {
0218       RDFInternal::CheckFilter(f);
0219       using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
0220       constexpr auto nColumns = ColTypes_t::list_size;
0221       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
0222       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
0223 
0224       using F_t = RDFDetail::RFilter<F, Proxied>;
0225 
0226       auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
0227       return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
0228    }
0229 
0230    ////////////////////////////////////////////////////////////////////////////
0231    /// \brief Append a filter to the call graph.
0232    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0233    /// signalling whether the event has passed the selection (true) or not (false).
0234    /// \param[in] name Optional name of this filter. See `Report`.
0235    /// \return the filter node of the computation graph.
0236    ///
0237    /// Refer to the first overload of this method for the full documentation.
0238    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0239    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name)
0240    {
0241       // The sfinae is there in order to pick up the overloaded method which accepts two strings
0242       // rather than this template method.
0243       return Filter(f, {}, name);
0244    }
0245 
0246    ////////////////////////////////////////////////////////////////////////////
0247    /// \brief Append a filter to the call graph.
0248    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0249    /// signalling whether the event has passed the selection (true) or not (false).
0250    /// \param[in] columns Names of the columns/branches in input to the filter function.
0251    /// \return the filter node of the computation graph.
0252    ///
0253    /// Refer to the first overload of this method for the full documentation.
0254    template <typename F>
0255    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
0256    {
0257       return Filter(f, ColumnNames_t{columns});
0258    }
0259 
0260    ////////////////////////////////////////////////////////////////////////////
0261    /// \brief Append a filter to the call graph.
0262    /// \param[in] expression The filter expression in C++
0263    /// \param[in] name Optional name of this filter. See `Report`.
0264    /// \return the filter node of the computation graph.
0265    ///
0266    /// The expression is just-in-time compiled and used to filter entries. It must
0267    /// be valid C++ syntax in which variable names are substituted with the names
0268    /// of branches/columns.
0269    ///
0270    /// ### Example usage:
0271    /// ~~~{.cpp}
0272    /// auto filtered_df = df.Filter("myCollection.size() > 3");
0273    /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
0274    /// ~~~
0275    ///
0276    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0277    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0278    /// ~~~{.cpp}
0279    /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0280    /// ~~~
0281    /// but instead this will:
0282    /// ~~~{.cpp}
0283    /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0284    /// ~~~
0285    RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
0286    {
0287       // deleted by the jitted call to JitFilterHelper
0288       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0289       using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
0290       RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister);
0291       const auto jittedFilter =
0292          RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fLoopManager->GetBranchNames(), fColRegister,
0293                                     fLoopManager->GetTree(), fDataSource);
0294 
0295       return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fColRegister);
0296    }
0297 
0298    // clang-format off
0299    ////////////////////////////////////////////////////////////////////////////
0300    /// \brief Define a new column.
0301    /// \param[in] name The name of the defined column.
0302    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0303    /// \param[in] columns Names of the columns/branches in input to the producer function.
0304    /// \return the first node of the computation graph for which the new quantity is defined.
0305    ///
0306    /// Define a column that will be visible from all subsequent nodes
0307    /// of the functional chain. The `expression` is only evaluated for entries that pass
0308    /// all the preceding filters.
0309    /// A new variable is created called `name`, accessible as if it was contained
0310    /// in the dataset from subsequent transformations/actions.
0311    ///
0312    /// Use cases include:
0313    /// * caching the results of complex calculations for easy and efficient multiple access
0314    /// * extraction of quantities of interest from complex objects
0315    ///
0316    /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
0317    ///
0318    /// ### Example usage:
0319    /// ~~~{.cpp}
0320    /// // assuming a function with signature:
0321    /// double myComplexCalculation(const RVec<float> &muon_pts);
0322    /// // we can pass it directly to Define
0323    /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
0324    /// // alternatively, we can pass the body of the function as a string, as in Filter:
0325    /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
0326    /// ~~~
0327    ///
0328    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0329    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0330    /// ~~~{.cpp}
0331    /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
0332    /// ~~~
0333    /// but instead this will:
0334    /// ~~~{.cpp}
0335    /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
0336    /// ~~~
0337    template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0338    RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
0339    {
0340       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
0341    }
0342    // clang-format on
0343 
0344    // clang-format off
0345    ////////////////////////////////////////////////////////////////////////////
0346    /// \brief Define a new column with a value dependent on the processing slot.
0347    /// \param[in] name The name of the defined column.
0348    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0349    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
0350    /// \return the first node of the computation graph for which the new quantity is defined.
0351    ///
0352    /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
0353    /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
0354    /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
0355    /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
0356    /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
0357    ///
0358    /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
0359    /// ~~~{.cpp}
0360    /// int function(unsigned int, double, double);
0361    /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
0362    /// df.DefineSlot("x", function, {"column1", "column2"})
0363    /// ~~~
0364    ///
0365    /// See Define() for more information.
0366    template <typename F>
0367    RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
0368    {
0369       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
0370    }
0371    // clang-format on
0372 
0373    // clang-format off
0374    ////////////////////////////////////////////////////////////////////////////
0375    /// \brief Define a new column with a value dependent on the processing slot and the current entry.
0376    /// \param[in] name The name of the defined column.
0377    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0378    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
0379    /// \return the first node of the computation graph for which the new quantity is defined.
0380    ///
0381    /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
0382    /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
0383    /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
0384    /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
0385    /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1. The second parameter
0386    /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
0387    ///
0388    /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
0389    /// ~~~{.cpp}
0390    /// int function(unsigned int, ULong64_t, double, double);
0391    /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
0392    /// DefineSlotEntry("x", function, {"column1", "column2"})
0393    /// ~~~
0394    ///
0395    /// See Define() for more information.
0396    template <typename F>
0397    RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
0398    {
0399       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
0400                                                                         "DefineSlotEntry");
0401    }
0402    // clang-format on
0403 
0404    ////////////////////////////////////////////////////////////////////////////
0405    /// \brief Define a new column.
0406    /// \param[in] name The name of the defined column.
0407    /// \param[in] expression An expression in C++ which represents the defined value
0408    /// \return the first node of the computation graph for which the new quantity is defined.
0409    ///
0410    /// The expression is just-in-time compiled and used to produce the column entries.
0411    /// It must be valid C++ syntax in which variable names are substituted with the names
0412    /// of branches/columns.
0413    ///
0414    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0415    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0416    /// ~~~{.cpp}
0417    /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
0418    /// ~~~
0419    /// but instead this will:
0420    /// ~~~{.cpp}
0421    /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
0422    /// ~~~
0423    ///
0424    /// Refer to the first overload of this method for the full documentation.
0425    RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
0426    {
0427       constexpr auto where = "Define";
0428       RDFInternal::CheckValidCppVarName(name, where);
0429       // these checks must be done before jitting lest we throw exceptions in jitted code
0430       RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
0431                                         fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
0432 
0433       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0434       auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
0435                                                      fLoopManager->GetBranchNames(), upcastNodeOnHeap);
0436 
0437       RDFInternal::RColumnRegister newCols(fColRegister);
0438       newCols.AddDefine(std::move(jittedDefine));
0439 
0440       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0441 
0442       return newInterface;
0443    }
0444 
0445    ////////////////////////////////////////////////////////////////////////////
0446    /// \brief Overwrite the value and/or type of an existing column.
0447    /// \param[in] name The name of the column to redefine.
0448    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0449    /// \param[in] columns Names of the columns/branches in input to the expression.
0450    /// \return the first node of the computation graph for which the quantity is redefined.
0451    ///
0452    /// The old value of the column can be used as an input for the expression.
0453    ///
0454    /// An exception is thrown in case the column to redefine does not already exist.
0455    /// See Define() for more information.
0456    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0457    RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
0458    {
0459       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
0460    }
0461 
0462    // clang-format off
0463    ////////////////////////////////////////////////////////////////////////////
0464    /// \brief Overwrite the value and/or type of an existing column.
0465    /// \param[in] name The name of the column to redefine.
0466    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0467    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
0468    /// \return the first node of the computation graph for which the new quantity is defined.
0469    ///
0470    /// The old value of the column can be used as an input for the expression.
0471    /// An exception is thrown in case the column to redefine does not already exist.
0472    ///
0473    /// See DefineSlot() for more information.
0474    // clang-format on
0475    template <typename F>
0476    RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
0477    {
0478       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
0479    }
0480 
0481    // clang-format off
0482    ////////////////////////////////////////////////////////////////////////////
0483    /// \brief Overwrite the value and/or type of an existing column.
0484    /// \param[in] name The name of the column to redefine.
0485    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0486    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
0487    /// \return the first node of the computation graph for which the new quantity is defined.
0488    ///
0489    /// The old value of the column can be used as an input for the expression.
0490    /// An exception is thrown in case the column to re-define does not already exist.
0491    ///
0492    /// See DefineSlotEntry() for more information.
0493    // clang-format on
0494    template <typename F>
0495    RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
0496    {
0497       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
0498                                                                         "RedefineSlotEntry");
0499    }
0500 
0501    ////////////////////////////////////////////////////////////////////////////
0502    /// \brief Overwrite the value and/or type of an existing column.
0503    /// \param[in] name The name of the column to redefine.
0504    /// \param[in] expression An expression in C++ which represents the defined value
0505    /// \return the first node of the computation graph for which the new quantity is defined.
0506    ///
0507    /// The expression is just-in-time compiled and used to produce the column entries.
0508    /// It must be valid C++ syntax in which variable names are substituted with the names
0509    /// of branches/columns.
0510    ///
0511    /// The old value of the column can be used as an input for the expression.
0512    /// An exception is thrown in case the column to re-define does not already exist.
0513    ///
0514    /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
0515    RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression)
0516    {
0517       constexpr auto where = "Redefine";
0518       RDFInternal::CheckValidCppVarName(name, where);
0519       RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
0520                                       fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
0521       RDFInternal::CheckForNoVariations(where, name, fColRegister);
0522 
0523       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0524       auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, fColRegister,
0525                                                      fLoopManager->GetBranchNames(), upcastNodeOnHeap);
0526 
0527       RDFInternal::RColumnRegister newCols(fColRegister);
0528       newCols.AddDefine(std::move(jittedDefine));
0529 
0530       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0531 
0532       return newInterface;
0533    }
0534 
0535    // clang-format off
0536    ////////////////////////////////////////////////////////////////////////////
0537    /// \brief Define a new column that is updated when the input sample changes.
0538    /// \param[in] name The name of the defined column.
0539    /// \param[in] expression A C++ callable that computes the new value of the defined column.
0540    /// \return the first node of the computation graph for which the new quantity is defined.
0541    ///
0542    /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
0543    /// where:
0544    /// - `T` is the type of the defined column
0545    /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
0546    ///   the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
0547    /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
0548    ///   being processed (see the class docs for more information).
0549    ///
0550    /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
0551    /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
0552    /// starts rather than at every entry.
0553    ///
0554    /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
0555    ///
0556    /// ### Example usage:
0557    /// ~~~{.cpp}
0558    /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
0559    /// df.DefinePerSample("weightbysample",
0560    ///                    [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
0561    ///                    { return id.Contains("sample1") ? 1.0f : 2.0f; });
0562    /// ~~~
0563    // clang-format on
0564    // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
0565    template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
0566    RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
0567    {
0568       RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
0569       RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
0570                                         fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
0571 
0572       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
0573       if (retTypeName.empty()) {
0574          // The type is not known to the interpreter.
0575          // We must not error out here, but if/when this column is used in jitted code
0576          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
0577          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
0578       }
0579 
0580       auto newColumn =
0581          std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
0582 
0583       RDFInternal::RColumnRegister newCols(fColRegister);
0584       newCols.AddDefine(std::move(newColumn));
0585       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0586       return newInterface;
0587    }
0588 
0589    // clang-format off
0590    ////////////////////////////////////////////////////////////////////////////
0591    /// \brief Define a new column that is updated when the input sample changes.
0592    /// \param[in] name The name of the defined column.
0593    /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
0594    /// \return the first node of the computation graph for which the new quantity is defined.
0595    ///
0596    /// The expression is just-in-time compiled and used to produce the column entries.
0597    /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
0598    /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
0599    /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
0600    ///
0601    /// ### Example usage:
0602    /// ~~~{.py}
0603    /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
0604    /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
0605    /// ~~~
0606    ///
0607    /// \note
0608    /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
0609    /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
0610    /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
0611    /// ~~~{.py}
0612    /// ROOT.gInterpreter.Declare(
0613    /// """
0614    /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
0615    ///    return id.Contains("sample1") ? 1.0f : 2.0f;
0616    /// }
0617    /// """)
0618    /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
0619    /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
0620    /// ~~~
0621    ///
0622    /// \note
0623    /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
0624    /// column names other than those mentioned above: the expression is evaluated once before the processing of the
0625    /// sample even starts, so column values are not accessible.
0626    // clang-format on
0627    RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
0628    {
0629       RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
0630       // these checks must be done before jitting lest we throw exceptions in jitted code
0631       RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
0632                                         fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
0633 
0634       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0635       auto jittedDefine =
0636          RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
0637 
0638       RDFInternal::RColumnRegister newCols(fColRegister);
0639       newCols.AddDefine(std::move(jittedDefine));
0640 
0641       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0642 
0643       return newInterface;
0644    }
0645 
0646    /// \brief Register systematic variations for a single existing column using custom variation tags.
0647    /// \param[in] colName name of the column for which varied values are provided.
0648    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0649    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0650    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0651    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0652    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0653    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0654    ///
0655    /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
0656    /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
0657    /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with
0658    /// ROOT::RDF::Experimental::VariationsFor (see the example below).
0659    ///
0660    /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
0661    /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
0662    /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
0663    /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
0664    ///
0665    /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
0666    /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
0667    /// ~~~{.cpp}
0668    /// auto nominal_hx =
0669    ///     df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
0670    ///       .Filter("pt > k")
0671    ///       .Define("x", someFunc, {"pt"})
0672    ///       .Histo1D("x");
0673    ///
0674    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0675    /// hx["nominal"].Draw();
0676    /// hx["pt:down"].Draw("SAME");
0677    /// hx["pt:up"].Draw("SAME");
0678    /// ~~~
0679    /// RDataFrame computes all variations as part of a single loop over the data.
0680    /// In particular, this means that I/O and computation of values shared
0681    /// among variations only happen once for all variations. Thus, the event loop
0682    /// run-time typically scales much better than linearly with the number of
0683    /// variations.
0684    ///
0685    /// RDataFrame lazily computes the varied values required to produce the
0686    /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref
0687    /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only
0688    /// run for the nominal case.
0689    ///
0690    /// See other overloads for examples when variations are added for multiple existing columns,
0691    /// or when the tags are auto-generated instead of being directly defined.
0692    template <typename F>
0693    RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
0694                                   const std::vector<std::string> &variationTags, std::string_view variationName = "")
0695    {
0696       std::vector<std::string> colNames{{std::string(colName)}};
0697       const std::string theVariationName{variationName.empty() ? colName : variationName};
0698 
0699       return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
0700                             theVariationName);
0701    }
0702 
0703    /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
0704    /// \param[in] colName name of the column for which varied values are provided.
0705    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0706    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0707    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0708    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0709    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0710    /// `"1"`, etc. 
0711    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0712    ///            colName is used if none is provided.
0713    ///
0714    /// This overload of Vary takes an nVariations parameter instead of a list of tag names.
0715    /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
0716    /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
0717    ///
0718    /// Example usage:
0719    /// ~~~{.cpp}
0720    /// auto nominal_hx =
0721    ///   df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
0722    ///     .Histo1D("x");
0723    ///
0724    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0725    /// hx["nominal"].Draw();
0726    /// hx["x:0"].Draw("SAME");
0727    /// hx["x:1"].Draw("SAME");
0728    /// ~~~
0729    ///
0730    /// \sa This Vary() overload for more information.
0731    template <typename F>
0732    RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
0733                                   std::size_t nVariations, std::string_view variationName = "")
0734    {
0735       R__ASSERT(nVariations > 0 && "Must have at least one variation.");
0736 
0737       std::vector<std::string> variationTags;
0738       variationTags.reserve(nVariations);
0739       for (std::size_t i = 0u; i < nVariations; ++i)
0740          variationTags.emplace_back(std::to_string(i));
0741 
0742       const std::string theVariationName{variationName.empty() ? colName : variationName};
0743 
0744       return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
0745    }
0746 
0747    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
0748    /// \param[in] colNames set of names of the columns for which varied values are provided.
0749    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0750    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0751    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0752    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0753    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0754    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`
0755    ///
0756    /// This overload of Vary takes a list of column names as first argument and
0757    /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
0758    /// affected column. The `variationTags` are defined as `{"down", "up"}`.
0759    ///
0760    /// Example usage:
0761    /// ~~~{.cpp}
0762    /// // produce variations "ptAndEta:down" and "ptAndEta:up"
0763    /// auto nominal_hx =
0764    ///   df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
0765    ///         [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
0766    ///         {"pt", "eta"},  // inputs to the Vary expression, independent of what columns are varied
0767    ///         {"down", "up"}, // variation tags
0768    ///         "ptAndEta")    // variation name
0769    ///     .Histo1D("pt", "eta");
0770    ///
0771    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0772    /// hx["nominal"].Draw();
0773    /// hx["ptAndEta:down"].Draw("SAME");
0774    /// hx["ptAndEta:up"].Draw("SAME");
0775    /// ~~~
0776    ///
0777    /// \sa This Vary() overload for more information.
0778 
0779    template <typename F>
0780    RInterface<Proxied, DS_t>
0781    Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
0782         const std::vector<std::string> &variationTags, std::string_view variationName)
0783    {
0784       return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
0785    }
0786 
0787    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
0788    /// \param[in] colNames set of names of the columns for which varied values are provided.
0789    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0790    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0791    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0792    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0793    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0794    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0795    ///            colName is used if none is provided.
0796    ///
0797    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
0798    /// is avoided.
0799    ///
0800    /// \sa This Vary() overload for more information.
0801    template <typename F>
0802    RInterface<Proxied, DS_t>
0803    Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
0804         const std::vector<std::string> &variationTags, std::string_view variationName)
0805    {
0806       return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName);
0807    }
0808 
0809    /// \brief Register systematic variations for multiple existing columns using auto-generated tags.
0810    /// \param[in] colNames set of names of the columns for which varied values are provided.
0811    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0812    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0813    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0814    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0815    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0816    /// `"1"`, etc. 
0817    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0818    ///            colName is used if none is provided.
0819    ///
0820    /// This overload of Vary takes a list of column names as first argument.
0821    /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names
0822    /// will be auto-generated as the sequence 0...``nVariations-1``.
0823    ///
0824    /// Example usage:
0825    /// ~~~{.cpp}
0826    /// auto nominal_hx =
0827    ///   df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
0828    ///         [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
0829    ///         {"pt", "eta"},  // inputs to the Vary expression, independent of what columns are varied
0830    ///         2, // auto-generated variation tags
0831    ///         "ptAndEta")    // variation name
0832    ///     .Histo1D("pt", "eta");
0833    ///
0834    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0835    /// hx["nominal"].Draw();
0836    /// hx["ptAndEta:0"].Draw("SAME");
0837    /// hx["ptAndEta:1"].Draw("SAME");
0838    /// ~~~
0839    ///
0840    /// \sa This Vary() overload for more information.
0841    template <typename F>
0842    RInterface<Proxied, DS_t>
0843    Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
0844         std::size_t nVariations, std::string_view variationName)
0845    {
0846       R__ASSERT(nVariations > 0 && "Must have at least one variation.");
0847 
0848       std::vector<std::string> variationTags;
0849       variationTags.reserve(nVariations);
0850       for (std::size_t i = 0u; i < nVariations; ++i)
0851          variationTags.emplace_back(std::to_string(i));
0852 
0853       return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
0854    }
0855 
0856    /// \brief Register systematic variations for for multiple existing columns using custom variation tags.
0857    /// \param[in] colNames set of names of the columns for which varied values are provided.
0858    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0859    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0860    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0861    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0862    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0863    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0864    /// `"1"`, etc. 
0865    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0866    ///            colName is used if none is provided.
0867    ///
0868    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
0869    /// is avoided.
0870    ///
0871    /// \sa This Vary() overload for more information.
0872    template <typename F>
0873    RInterface<Proxied, DS_t>
0874    Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
0875         std::size_t nVariations, std::string_view variationName)
0876    {
0877       return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName);
0878    }
0879 
0880    /// \brief Register systematic variations for a single existing column using custom variation tags.
0881    /// \param[in] colName name of the column for which varied values are provided.
0882    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
0883    ///            values for the specified column.
0884    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0885    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0886    ///            colName is used if none is provided.
0887    ///
0888    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
0889    /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
0890    /// defined as `{"down", "up"}`.
0891    /// ~~~{.cpp}
0892    /// auto nominal_hx =
0893    ///     df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
0894    ///       .Filter("pt > k")
0895    ///       .Define("x", someFunc, {"pt"})
0896    ///       .Histo1D("x");
0897    ///
0898    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0899    /// hx["nominal"].Draw();
0900    /// hx["pt:down"].Draw("SAME");
0901    /// hx["pt:up"].Draw("SAME");
0902    /// ~~~
0903    ///
0904    /// \sa This Vary() overload for more information.
0905    RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
0906                                   const std::vector<std::string> &variationTags, std::string_view variationName = "")
0907    {
0908       std::vector<std::string> colNames{{std::string(colName)}};
0909       const std::string theVariationName{variationName.empty() ? colName : variationName};
0910 
0911       return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
0912    }
0913 
0914    /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
0915    /// \param[in] colName name of the column for which varied values are provided.
0916    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
0917    ///            values for the specified column.
0918    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0919    /// `"1"`, etc. 
0920    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0921    ///            colName is used if none is provided.
0922    ///
0923    /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time
0924    /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
0925    /// auto-generated.
0926    /// ~~~{.cpp}
0927    /// auto nominal_hx =
0928    ///     df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2)
0929    ///       .Histo1D("pt");
0930    ///
0931    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0932    /// hx["nominal"].Draw();
0933    /// hx["pt:0"].Draw("SAME");
0934    /// hx["pt:1"].Draw("SAME");
0935    /// ~~~
0936    ///
0937    /// \sa This Vary() overload for more information.
0938    RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
0939                                   std::string_view variationName = "")
0940    {
0941       std::vector<std::string> variationTags;
0942       variationTags.reserve(nVariations);
0943       for (std::size_t i = 0u; i < nVariations; ++i)
0944          variationTags.emplace_back(std::to_string(i));
0945 
0946       return Vary(colName, expression, std::move(variationTags), variationName);
0947    }
0948 
0949    /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
0950    /// \param[in] colNames set of names of the columns for which varied values are provided.
0951    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
0952    ///            values for the specified columns.
0953    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0954    /// `"1"`, etc. 
0955    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0956    ///
0957    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
0958    /// compiled. It takes an nVariations parameter instead of a list of tag names.
0959    /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
0960    /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
0961    /// The example below shows how Vary() is used while dealing with multiple columns.
0962    ///
0963    /// ~~~{.cpp}
0964    /// auto nominal_hx =
0965    ///     df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
0966    ///       .Histo1D("x", "y");
0967    ///
0968    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0969    /// hx["nominal"].Draw();
0970    /// hx["xy:0"].Draw("SAME");
0971    /// hx["xy:1"].Draw("SAME");
0972    /// ~~~
0973    ///
0974    /// \sa This Vary() overload for more information.
0975    RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
0976                                   std::size_t nVariations, std::string_view variationName)
0977    {
0978       std::vector<std::string> variationTags;
0979       variationTags.reserve(nVariations);
0980       for (std::size_t i = 0u; i < nVariations; ++i)
0981          variationTags.emplace_back(std::to_string(i));
0982 
0983       return Vary(colNames, expression, std::move(variationTags), variationName);
0984    }
0985 
0986    /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
0987    /// \param[in] colNames set of names of the columns for which varied values are provided.
0988    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
0989    ///            values for the specified column.
0990    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0991    /// `"1"`, etc. 
0992    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0993    ///            colName is used if none is provided.
0994    ///
0995    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
0996    /// is avoided.
0997    ///
0998    /// \sa This Vary() overload for more information.
0999    RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression,
1000                                   std::size_t nVariations, std::string_view variationName)
1001    {
1002       return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName);
1003    }
1004 
1005    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
1006    /// \param[in] colNames set of names of the columns for which varied values are provided.
1007    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1008    ///            values for the specified columns.
1009    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1010    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1011    ///
1012    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1013    /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as
1014    /// `{"down", "up"}`.
1015    /// ~~~{.cpp}
1016    /// auto nominal_hx =
1017    ///     df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
1018    ///       .Histo1D("x", "y");
1019    ///
1020    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1021    /// hx["nominal"].Draw();
1022    /// hx["xy:down"].Draw("SAME");
1023    /// hx["xy:up"].Draw("SAME");
1024    /// ~~~
1025    ///
1026    /// \sa This Vary() overload for more information.
1027    RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1028                                   const std::vector<std::string> &variationTags, std::string_view variationName)
1029    {
1030       return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
1031    }
1032 
1033    ////////////////////////////////////////////////////////////////////////////
1034    /// \brief Allow to refer to a column with a different name.
1035    /// \param[in] alias name of the column alias
1036    /// \param[in] columnName of the column to be aliased
1037    /// \return the first node of the computation graph for which the alias is available.
1038    ///
1039    /// Aliasing an alias is supported.
1040    ///
1041    /// ### Example usage:
1042    /// ~~~{.cpp}
1043    /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
1044    /// ~~~
1045    RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
1046    {
1047       // The symmetry with Define is clear. We want to:
1048       // - Create globally the alias and return this very node, unchanged
1049       // - Make aliases accessible based on chains and not globally
1050 
1051       // Helper to find out if a name is a column
1052       auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1053 
1054       constexpr auto where = "Alias";
1055       RDFInternal::CheckValidCppVarName(alias, where);
1056       // If the alias name is a column name, there is a problem
1057       RDFInternal::CheckForRedefinition(where, alias, fColRegister, fLoopManager->GetBranchNames(), dsColumnNames);
1058 
1059       const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
1060 
1061       RDFInternal::RColumnRegister newCols(fColRegister);
1062       newCols.AddAlias(alias, validColumnName);
1063 
1064       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
1065 
1066       return newInterface;
1067    }
1068 
1069    ////////////////////////////////////////////////////////////////////////////
1070    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1071    /// \tparam ColumnTypes variadic list of branch/column types.
1072    /// \param[in] treename The name of the output TTree.
1073    /// \param[in] filename The name of the output TFile.
1074    /// \param[in] columnList The list of names of the columns/branches to be written.
1075    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1076    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1077    ///
1078    /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
1079    /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1080    /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1081    /// written out and it appears before the array in the columnList.
1082    ///
1083    /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
1084    /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
1085    /// present, by default all friend top-level branches that have names that do not collide with
1086    /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
1087    /// friend branches with the same names of branches in the main TTree/TChain with names of the form
1088    /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1089    ///
1090    /// ### Writing to a sub-directory
1091    ///
1092    /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
1093    /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
1094    /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1095    ///
1096    /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1097    /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled with
1098    /// respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in wrong
1099    /// associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1100    /// error out if such a "shuffled" TTree is used in a friendship.
1101    ///
1102    /// \note In case no events are written out (e.g. because no event passes all filters) the behavior of Snapshot in
1103    /// single-thread and multi-thread runs is different: in single-thread runs, Snapshot will write out a TTree with
1104    /// the specified name and zero entries; in multi-thread runs, no TTree object will be written out to disk.
1105    ///
1106    /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1107    /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1108    /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1109    /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1110    ///
1111    /// ### Example invocations:
1112    ///
1113    /// ~~~{.cpp}
1114    /// // without specifying template parameters (column types automatically deduced)
1115    /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1116    ///
1117    /// // specifying template parameters ("x" is `int`, "y" is `float`)
1118    /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
1119    /// ~~~
1120    ///
1121    /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1122    /// `RSnapshotOptions`:
1123    /// ~~~{.cpp}
1124    /// RSnapshotOptions opts;
1125    /// opts.fLazy = true;
1126    /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1127    /// ~~~
1128    template <typename... ColumnTypes>
1129    RResultPtr<RInterface<RLoopManager>>
1130    Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
1131             const RSnapshotOptions &options = RSnapshotOptions())
1132    {
1133       return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1134    }
1135 
1136    ////////////////////////////////////////////////////////////////////////////
1137    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1138    /// \param[in] treename The name of the output TTree.
1139    /// \param[in] filename The name of the output TFile.
1140    /// \param[in] columnList The list of names of the columns/branches to be written.
1141    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1142    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1143    ///
1144    /// This function returns a `RDataFrame` built with the output tree as a source.
1145    /// The types of the columns are automatically inferred and do not need to be specified.
1146    ///
1147    /// See above for a more complete description and example usages.
1148    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1149                                                  const ColumnNames_t &columnList,
1150                                                  const RSnapshotOptions &options = RSnapshotOptions())
1151    {
1152       // like columnList but with `#var` columns removed
1153       auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1154       // like columnListWithoutSizeColumns but with aliases resolved
1155       auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
1156       RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases);
1157       // like validCols but with missing size branches required by array branches added in the right positions
1158       const auto pairOfColumnLists =
1159          RDFInternal::AddSizeBranches(fLoopManager->GetBranchNames(), fLoopManager->GetTree(),
1160                                       std::move(colListNoAliases), std::move(colListNoPoundSizes));
1161       const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1162       const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1163 
1164 
1165       const auto fullTreeName = treename;
1166       const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1167       treename = parsedTreePath.fTreeName;
1168       const auto &dirname = parsedTreePath.fDirName;
1169 
1170       auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(
1171          RDFInternal::SnapshotHelperArgs{std::string(filename), std::string(dirname), std::string(treename),
1172                                          colListWithAliasesAndSizeBranches, options});
1173 
1174       ::TDirectory::TContext ctxt;
1175 
1176       // The CreateLMFromTTree function by default opens the file passed as input
1177       // to check for the presence of the TTree inside. But at this moment the
1178       // filename we are using here corresponds to a file which does not exist yet,
1179       // i.e. the output file of the Snapshot call. Thus, checkFile=false will
1180       // prevent the function from trying to open a non-existent file.
1181       auto newRDF = std::make_shared<RInterface<RLoopManager>>(ROOT::Detail::RDF::CreateLMFromTTree(
1182          fullTreeName, filename, colListNoAliasesWithSizeBranches, /*checkFile*/ false));
1183 
1184       auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1185          colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr,
1186          colListNoAliasesWithSizeBranches.size());
1187 
1188       if (!options.fLazy)
1189          *resPtr;
1190       return resPtr;
1191    }
1192 
1193    // clang-format off
1194    ////////////////////////////////////////////////////////////////////////////
1195    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1196    /// \param[in] treename The name of the output TTree.
1197    /// \param[in] filename The name of the output TFile.
1198    /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1199    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1200    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1201    ///
1202    /// This function returns a `RDataFrame` built with the output tree as a source.
1203    /// The types of the columns are automatically inferred and do not need to be specified.
1204    ///
1205    /// See above for a more complete description and example usages.
1206    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1207                                                  std::string_view columnNameRegexp = "",
1208                                                  const RSnapshotOptions &options = RSnapshotOptions())
1209    {
1210       const auto definedColumns = fColRegister.GenerateColumnNames();
1211       auto *tree = fLoopManager->GetTree();
1212       const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1213       const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1214       // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1215       ColumnNames_t dsColumnsWithoutSizeColumns;
1216       std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1217                    [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1218       ColumnNames_t columnNames;
1219       columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1220       columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1221       columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1222       columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1223 
1224       // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1225       // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1226       RDFInternal::RemoveDuplicates(columnNames);
1227 
1228       const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1229       return Snapshot(treename, filename, selectedColumns, options);
1230    }
1231    // clang-format on
1232 
1233    // clang-format off
1234    ////////////////////////////////////////////////////////////////////////////
1235    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1236    /// \param[in] treename The name of the output TTree.
1237    /// \param[in] filename The name of the output TFile.
1238    /// \param[in] columnList The list of names of the columns/branches to be written.
1239    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1240    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1241    ///
1242    /// This function returns a `RDataFrame` built with the output tree as a source.
1243    /// The types of the columns are automatically inferred and do not need to be specified.
1244    ///
1245    /// See above for a more complete description and example usages.
1246    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1247                                                  std::initializer_list<std::string> columnList,
1248                                                  const RSnapshotOptions &options = RSnapshotOptions())
1249    {
1250       ColumnNames_t selectedColumns(columnList);
1251       return Snapshot(treename, filename, selectedColumns, options);
1252    }
1253    // clang-format on
1254 
1255    ////////////////////////////////////////////////////////////////////////////
1256    /// \brief Save selected columns in memory.
1257    /// \tparam ColumnTypes variadic list of branch/column types.
1258    /// \param[in] columnList columns to be cached in memory.
1259    /// \return a `RDataFrame` that wraps the cached dataset.
1260    ///
1261    /// This action returns a new `RDataFrame` object, completely detached from
1262    /// the originating `RDataFrame`. The new dataframe only contains the cached
1263    /// columns and stores their content in memory for fast, zero-copy subsequent access.
1264    ///
1265    /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1266    /// fits in memory and that will be accessed many times.
1267    ///
1268    /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1269    /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1270    /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1271    /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1272    ///
1273    /// ### Example usage:
1274    ///
1275    /// **Types and columns specified:**
1276    /// ~~~{.cpp}
1277    /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1278    /// ~~~
1279    ///
1280    /// **Types inferred and columns specified (this invocation relies on jitting):**
1281    /// ~~~{.cpp}
1282    /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1283    /// ~~~
1284    ///
1285    /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1286    /// ~~~{.cpp}
1287    /// auto cache_all_cols_df = df.Cache(myRegexp);
1288    /// ~~~
1289    template <typename... ColumnTypes>
1290    RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1291    {
1292       auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1293       return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1294    }
1295 
1296    ////////////////////////////////////////////////////////////////////////////
1297    /// \brief Save selected columns in memory.
1298    /// \param[in] columnList columns to be cached in memory
1299    /// \return a `RDataFrame` that wraps the cached dataset.
1300    ///
1301    /// See the previous overloads for more information.
1302    RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1303    {
1304       // Early return: if the list of columns is empty, just return an empty RDF
1305       // If we proceed, the jitted call will not compile!
1306       if (columnList.empty()) {
1307          auto nEntries = *this->Count();
1308          RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1309          return emptyRDF;
1310       }
1311 
1312       std::stringstream cacheCall;
1313       auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1314       RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1315                                                                                       fColRegister);
1316       // build a string equivalent to
1317       // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1318       RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1319       cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1320                 << RDFInternal::PrettyPrintAddr(&resRDF)
1321                 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1322                 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1323 
1324       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1325 
1326       const auto validColumnNames =
1327          GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1328       const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(), fDataSource,
1329                                                  "Cache", /*vector2rvec=*/false);
1330       for (const auto &colType : colTypes)
1331          cacheCall << colType << ", ";
1332       if (!columnListWithoutSizeColumns.empty())
1333          cacheCall.seekp(-2, cacheCall.cur);                         // remove the last ",
1334       cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1335                 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1336 
1337       // book the code to jit with the RLoopManager and trigger the event loop
1338       fLoopManager->ToJitExec(cacheCall.str());
1339       fLoopManager->Jit();
1340 
1341       return resRDF;
1342    }
1343 
1344    ////////////////////////////////////////////////////////////////////////////
1345    /// \brief Save selected columns in memory.
1346    /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1347    /// \return a `RDataFrame` that wraps the cached dataset.
1348    ///
1349    /// The existing columns are matched against the regular expression. If the string provided
1350    /// is empty, all columns are selected. See the previous overloads for more information.
1351    RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
1352    {
1353       const auto definedColumns = fColRegister.GenerateColumnNames();
1354       auto *tree = fLoopManager->GetTree();
1355       const auto treeBranchNames =
1356          tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1357       const auto dsColumns = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
1358       // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1359       ColumnNames_t dsColumnsWithoutSizeColumns;
1360       std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1361                    [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1362       ColumnNames_t columnNames;
1363       columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1364       columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1365       columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1366       columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1367       const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1368       return Cache(selectedColumns);
1369    }
1370 
1371    ////////////////////////////////////////////////////////////////////////////
1372    /// \brief Save selected columns in memory.
1373    /// \param[in] columnList columns to be cached in memory.
1374    /// \return a `RDataFrame` that wraps the cached dataset.
1375    ///
1376    /// See the previous overloads for more information.
1377    RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1378    {
1379       ColumnNames_t selectedColumns(columnList);
1380       return Cache(selectedColumns);
1381    }
1382 
1383    // clang-format off
1384    ////////////////////////////////////////////////////////////////////////////
1385    /// \brief Creates a node that filters entries based on range: [begin, end).
1386    /// \param[in] begin Initial entry number considered for this range.
1387    /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1388    /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1389    /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1390    ///
1391    /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1392    /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1393    ///
1394    /// ### Example usage:
1395    /// ~~~{.cpp}
1396    /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1397    /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1398    /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1399    /// ~~~
1400    // clang-format on
1401    RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1402    {
1403       // check invariants
1404       if (stride == 0 || (end != 0 && end < begin))
1405          throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1406       CheckIMTDisabled("Range");
1407 
1408       using Range_t = RDFDetail::RRange<Proxied>;
1409       auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1410       RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister);
1411       return newInterface;
1412    }
1413 
1414    // clang-format off
1415    ////////////////////////////////////////////////////////////////////////////
1416    /// \brief Creates a node that filters entries based on range.
1417    /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1418    /// \return a node of the computation graph for which the range is defined.
1419    ///
1420    /// See the other Range overload for a detailed description.
1421    // clang-format on
1422    RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1423 
1424    // clang-format off
1425    ////////////////////////////////////////////////////////////////////////////
1426    /// \brief Execute a user-defined function on each entry (*instant action*).
1427    /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1428    /// \param[in] columns Names of the columns/branches in input to the user function.
1429    ///
1430    /// The callable `f` is invoked once per entry. This is an *instant action*:
1431    /// upon invocation, an event loop as well as execution of all scheduled actions
1432    /// is triggered.
1433    /// Users are responsible for the thread-safety of this callable when executing
1434    /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1435    ///
1436    /// ### Example usage:
1437    /// ~~~{.cpp}
1438    /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1439    /// ~~~
1440    // clang-format on
1441    template <typename F>
1442    void Foreach(F f, const ColumnNames_t &columns = {})
1443    {
1444       using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1445       using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1446       ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1447    }
1448 
1449    // clang-format off
1450    ////////////////////////////////////////////////////////////////////////////
1451    /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1452    /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1453    /// \param[in] columns Names of the columns/branches in input to the user function.
1454    ///
1455    /// Same as `Foreach`, but the user-defined function takes an extra
1456    /// `unsigned int` as its first parameter, the *processing slot index*.
1457    /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1458    /// for each thread of execution.
1459    /// This is meant as a helper in writing thread-safe `Foreach`
1460    /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1461    /// The user-defined processing callable is able to follow different
1462    /// *streams of processing* indexed by the first parameter.
1463    /// `ForeachSlot` works just as well with single-thread execution: in that
1464    /// case `slot` will always be `0`.
1465    ///
1466    /// ### Example usage:
1467    /// ~~~{.cpp}
1468    /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1469    /// ~~~
1470    // clang-format on
1471    template <typename F>
1472    void ForeachSlot(F f, const ColumnNames_t &columns = {})
1473    {
1474       using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1475       constexpr auto nColumns = ColTypes_t::list_size;
1476 
1477       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1478       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1479 
1480       using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1481       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1482 
1483       auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1484 
1485       fLoopManager->Run();
1486    }
1487 
1488    // clang-format off
1489    ////////////////////////////////////////////////////////////////////////////
1490    /// \brief Execute a user-defined reduce operation on the values of a column.
1491    /// \tparam F The type of the reduce callable. Automatically deduced.
1492    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1493    /// \param[in] f A callable with signature `T(T,T)`
1494    /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1495    /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1496    ///
1497    /// A reduction takes two values of a column and merges them into one (e.g.
1498    /// by summing them, taking the maximum, etc). This action performs the
1499    /// specified reduction operation on all processed column values, returning
1500    /// a single value of the same type. The callable f must satisfy the general
1501    /// requirements of a *processing function* besides having signature `T(T,T)`
1502    /// where `T` is the type of column columnName.
1503    ///
1504    /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1505    /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1506    /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1507    /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1508    /// overload.
1509    ///
1510    /// ### Example usage:
1511    /// ~~~{.cpp}
1512    /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1513    /// ~~~
1514    ///
1515    /// This action is *lazy*: upon invocation of this method the calculation is
1516    /// booked but not executed. Also see RResultPtr.
1517    // clang-format on
1518    template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1519    RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1520    {
1521       static_assert(
1522          std::is_default_constructible<T>::value,
1523          "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1524       return Reduce(std::move(f), columnName, T());
1525    }
1526 
1527    ////////////////////////////////////////////////////////////////////////////
1528    /// \brief Execute a user-defined reduce operation on the values of a column.
1529    /// \tparam F The type of the reduce callable. Automatically deduced.
1530    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1531    /// \param[in] f A callable with signature `T(T,T)`
1532    /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1533    /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1534    /// \return the reduced quantity wrapped in a RResultPtr.
1535    ///
1536    /// ### Example usage:
1537    /// ~~~{.cpp}
1538    /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1539    /// ~~~
1540    /// See the description of the first Reduce overload for more information.
1541    template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1542    RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1543    {
1544       return Aggregate(f, f, columnName, redIdentity);
1545    }
1546 
1547    ////////////////////////////////////////////////////////////////////////////
1548    /// \brief Return the number of entries processed (*lazy action*).
1549    /// \return the number of entries wrapped in a RResultPtr.
1550    ///
1551    /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1552    /// This action is *lazy*: upon invocation of this method the calculation is
1553    /// booked but not executed. Also see RResultPtr.
1554    ///
1555    /// ### Example usage:
1556    /// ~~~{.cpp}
1557    /// auto nEntriesAfterCuts = myFilteredDf.Count();
1558    /// ~~~
1559    ///
1560    RResultPtr<ULong64_t> Count()
1561    {
1562       const auto nSlots = fLoopManager->GetNSlots();
1563       auto cSPtr = std::make_shared<ULong64_t>(0);
1564       using Helper_t = RDFInternal::CountHelper;
1565       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1566       auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1567                                                RDFInternal::RColumnRegister(fColRegister));
1568       return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1569    }
1570 
1571    ////////////////////////////////////////////////////////////////////////////
1572    /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1573    /// \tparam T The type of the column.
1574    /// \tparam COLL The type of collection used to store the values.
1575    /// \param[in] column The name of the column to collect the values of.
1576    /// \return the content of the selected column wrapped in a RResultPtr.
1577    ///
1578    /// The collection type to be specified for C-style array columns is `RVec<T>`:
1579    /// in this case the returned collection is a `std::vector<RVec<T>>`.
1580    /// ### Example usage:
1581    /// ~~~{.cpp}
1582    /// // In this case intCol is a std::vector<int>
1583    /// auto intCol = rdf.Take<int>("integerColumn");
1584    /// // Same content as above but in this case taken as a RVec<int>
1585    /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1586    /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1587    /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1588    /// ~~~
1589    /// This action is *lazy*: upon invocation of this method the calculation is
1590    /// booked but not executed. Also see RResultPtr.
1591    template <typename T, typename COLL = std::vector<T>>
1592    RResultPtr<COLL> Take(std::string_view column = "")
1593    {
1594       const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1595 
1596       const auto validColumnNames = GetValidatedColumnNames(1, columns);
1597       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1598 
1599       using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1600       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1601       auto valuesPtr = std::make_shared<COLL>();
1602       const auto nSlots = fLoopManager->GetNSlots();
1603 
1604       auto action =
1605          std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1606       return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1607    }
1608 
1609    ////////////////////////////////////////////////////////////////////////////
1610    /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1611    /// \tparam V The type of the column used to fill the histogram.
1612    /// \param[in] model The returned histogram will be constructed using this as a model.
1613    /// \param[in] vName The name of the column that will fill the histogram.
1614    /// \return the monodimensional histogram wrapped in a RResultPtr.
1615    ///
1616    /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1617    /// is filled with each one of the elements of the container. In case multiple columns of container type
1618    /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1619    /// possibly different lengths between events).
1620    /// This action is *lazy*: upon invocation of this method the calculation is
1621    /// booked but not executed. Also see RResultPtr.
1622    ///
1623    /// ### Example usage:
1624    /// ~~~{.cpp}
1625    /// // Deduce column type (this invocation needs jitting internally)
1626    /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1627    /// // Explicit column type
1628    /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1629    /// ~~~
1630    ///
1631    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1632    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1633    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1634    template <typename V = RDFDetail::RInferredType>
1635    RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1636    {
1637       const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1638 
1639       const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1640 
1641       std::shared_ptr<::TH1D> h(nullptr);
1642       {
1643          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1644          h = model.GetHistogram();
1645          h->SetDirectory(nullptr);
1646       }
1647 
1648       if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1649          RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1650       return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr);
1651    }
1652 
1653    ////////////////////////////////////////////////////////////////////////////
1654    /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1655    /// \tparam V The type of the column used to fill the histogram.
1656    /// \param[in] vName The name of the column that will fill the histogram.
1657    /// \return the monodimensional histogram wrapped in a RResultPtr.
1658    ///
1659    /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1660    /// The "name" and "title" strings are built starting from the input column name.
1661    /// See the description of the first Histo1D() overload for more details.
1662    ///
1663    /// ### Example usage:
1664    /// ~~~{.cpp}
1665    /// // Deduce column type (this invocation needs jitting internally)
1666    /// auto myHist1 = myDf.Histo1D("myColumn");
1667    /// // Explicit column type
1668    /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1669    /// ~~~
1670    template <typename V = RDFDetail::RInferredType>
1671    RResultPtr<::TH1D> Histo1D(std::string_view vName)
1672    {
1673       const auto h_name = std::string(vName);
1674       const auto h_title = h_name + ";" + h_name + ";count";
1675       return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1676    }
1677 
1678    ////////////////////////////////////////////////////////////////////////////
1679    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1680    /// \tparam V The type of the column used to fill the histogram.
1681    /// \tparam W The type of the column used as weights.
1682    /// \param[in] model The returned histogram will be constructed using this as a model.
1683    /// \param[in] vName The name of the column that will fill the histogram.
1684    /// \param[in] wName The name of the column that will provide the weights.
1685    /// \return the monodimensional histogram wrapped in a RResultPtr.
1686    ///
1687    /// See the description of the first Histo1D() overload for more details.
1688    ///
1689    /// ### Example usage:
1690    /// ~~~{.cpp}
1691    /// // Deduce column type (this invocation needs jitting internally)
1692    /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1693    /// // Explicit column type
1694    /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1695    /// ~~~
1696    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1697    RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1698    {
1699       const std::vector<std::string_view> columnViews = {vName, wName};
1700       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1701                                   ? ColumnNames_t()
1702                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
1703       std::shared_ptr<::TH1D> h(nullptr);
1704       {
1705          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1706          h = model.GetHistogram();
1707       }
1708       return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr);
1709    }
1710 
1711    ////////////////////////////////////////////////////////////////////////////
1712    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1713    /// \tparam V The type of the column used to fill the histogram.
1714    /// \tparam W The type of the column used as weights.
1715    /// \param[in] vName The name of the column that will fill the histogram.
1716    /// \param[in] wName The name of the column that will provide the weights.
1717    /// \return the monodimensional histogram wrapped in a RResultPtr.
1718    ///
1719    /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1720    /// The "name" and "title" strings are built starting from the input column names.
1721    /// See the description of the first Histo1D() overload for more details.
1722    ///
1723    /// ### Example usage:
1724    /// ~~~{.cpp}
1725    /// // Deduce column types (this invocation needs jitting internally)
1726    /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1727    /// // Explicit column types
1728    /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1729    /// ~~~
1730    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1731    RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1732    {
1733       // We build name and title based on the value and weight column names
1734       std::string str_vName{vName};
1735       std::string str_wName{wName};
1736       const auto h_name = str_vName + "_weighted_" + str_wName;
1737       const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1738       return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1739    }
1740 
1741    ////////////////////////////////////////////////////////////////////////////
1742    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1743    /// \tparam V The type of the column used to fill the histogram.
1744    /// \tparam W The type of the column used as weights.
1745    /// \param[in] model The returned histogram will be constructed using this as a model.
1746    /// \return the monodimensional histogram wrapped in a RResultPtr.
1747    ///
1748    /// This overload will use the first two default columns as column names.
1749    /// See the description of the first Histo1D() overload for more details.
1750    template <typename V, typename W>
1751    RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1752    {
1753       return Histo1D<V, W>(model, "", "");
1754    }
1755 
1756    ////////////////////////////////////////////////////////////////////////////
1757    /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1758    /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1759    /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1760    /// \param[in] model The returned histogram will be constructed using this as a model.
1761    /// \param[in] v1Name The name of the column that will fill the x axis.
1762    /// \param[in] v2Name The name of the column that will fill the y axis.
1763    /// \return the bidimensional histogram wrapped in a RResultPtr.
1764    ///
1765    /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1766    /// is filled with each one of the elements of the container. In case multiple columns of container type
1767    /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1768    /// possibly different lengths between events).
1769    /// This action is *lazy*: upon invocation of this method the calculation is
1770    /// booked but not executed. Also see RResultPtr.
1771    ///
1772    /// ### Example usage:
1773    /// ~~~{.cpp}
1774    /// // Deduce column types (this invocation needs jitting internally)
1775    /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1776    /// // Explicit column types
1777    /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1778    /// ~~~
1779    ///
1780    ///
1781    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1782    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1783    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1784    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1785    RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1786    {
1787       std::shared_ptr<::TH2D> h(nullptr);
1788       {
1789          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1790          h = model.GetHistogram();
1791       }
1792       if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1793          throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1794       }
1795       const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1796       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1797                                   ? ColumnNames_t()
1798                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
1799       return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr);
1800    }
1801 
1802    ////////////////////////////////////////////////////////////////////////////
1803    /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
1804    /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1805    /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1806    /// \tparam W The type of the column used for the weights of the histogram.
1807    /// \param[in] model The returned histogram will be constructed using this as a model.
1808    /// \param[in] v1Name The name of the column that will fill the x axis.
1809    /// \param[in] v2Name The name of the column that will fill the y axis.
1810    /// \param[in] wName The name of the column that will provide the weights.
1811    /// \return the bidimensional histogram wrapped in a RResultPtr.
1812    ///
1813    /// This action is *lazy*: upon invocation of this method the calculation is
1814    /// booked but not executed. Also see RResultPtr.
1815    ///
1816    /// ### Example usage:
1817    /// ~~~{.cpp}
1818    /// // Deduce column types (this invocation needs jitting internally)
1819    /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1820    /// // Explicit column types
1821    /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1822    /// ~~~
1823    ///
1824    /// See the documentation of the first Histo2D() overload for more details.
1825    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1826              typename W = RDFDetail::RInferredType>
1827    RResultPtr<::TH2D>
1828    Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1829    {
1830       std::shared_ptr<::TH2D> h(nullptr);
1831       {
1832          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1833          h = model.GetHistogram();
1834       }
1835       if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1836          throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1837       }
1838       const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1839       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1840                                   ? ColumnNames_t()
1841                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
1842       return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
1843    }
1844 
1845    template <typename V1, typename V2, typename W>
1846    RResultPtr<::TH2D> Histo2D(const TH2DModel &model)
1847    {
1848       return Histo2D<V1, V2, W>(model, "", "", "");
1849    }
1850 
1851    ////////////////////////////////////////////////////////////////////////////
1852    /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1853    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1854    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1855    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1856    /// \param[in] model The returned histogram will be constructed using this as a model.
1857    /// \param[in] v1Name The name of the column that will fill the x axis.
1858    /// \param[in] v2Name The name of the column that will fill the y axis.
1859    /// \param[in] v3Name The name of the column that will fill the z axis.
1860    /// \return the tridimensional histogram wrapped in a RResultPtr.
1861    ///
1862    /// This action is *lazy*: upon invocation of this method the calculation is
1863    /// booked but not executed. Also see RResultPtr.
1864    ///
1865    /// ### Example usage:
1866    /// ~~~{.cpp}
1867    /// // Deduce column types (this invocation needs jitting internally)
1868    /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1869    ///                             "myValueX", "myValueY", "myValueZ");
1870    /// // Explicit column types
1871    /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1872    ///                                                    "myValueX", "myValueY", "myValueZ");
1873    /// ~~~
1874    ///
1875    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1876    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1877    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1878    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1879              typename V3 = RDFDetail::RInferredType>
1880    RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
1881                               std::string_view v3Name = "")
1882    {
1883       std::shared_ptr<::TH3D> h(nullptr);
1884       {
1885          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1886          h = model.GetHistogram();
1887       }
1888       if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1889          throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1890       }
1891       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1892       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1893                                   ? ColumnNames_t()
1894                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
1895       return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
1896    }
1897 
1898    ////////////////////////////////////////////////////////////////////////////
1899    /// \brief Fill and return a three-dimensional histogram (*lazy action*).
1900    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1901    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1902    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1903    /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1904    /// \param[in] model The returned histogram will be constructed using this as a model.
1905    /// \param[in] v1Name The name of the column that will fill the x axis.
1906    /// \param[in] v2Name The name of the column that will fill the y axis.
1907    /// \param[in] v3Name The name of the column that will fill the z axis.
1908    /// \param[in] wName The name of the column that will provide the weights.
1909    /// \return the tridimensional histogram wrapped in a RResultPtr.
1910    ///
1911    /// This action is *lazy*: upon invocation of this method the calculation is
1912    /// booked but not executed. Also see RResultPtr.
1913    ///
1914    /// ### Example usage:
1915    /// ~~~{.cpp}
1916    /// // Deduce column types (this invocation needs jitting internally)
1917    /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1918    ///                             "myValueX", "myValueY", "myValueZ", "myWeight");
1919    /// // Explicit column types
1920    /// using d_t = double;
1921    /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1922    ///                                                    "myValueX", "myValueY", "myValueZ", "myWeight");
1923    /// ~~~
1924    ///
1925    ///
1926    /// See the documentation of the first Histo2D() overload for more details.
1927    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1928              typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1929    RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
1930                               std::string_view v3Name, std::string_view wName)
1931    {
1932       std::shared_ptr<::TH3D> h(nullptr);
1933       {
1934          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1935          h = model.GetHistogram();
1936       }
1937       if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1938          throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1939       }
1940       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1941       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1942                                   ? ColumnNames_t()
1943                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
1944       return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
1945    }
1946 
1947    template <typename V1, typename V2, typename V3, typename W>
1948    RResultPtr<::TH3D> Histo3D(const TH3DModel &model)
1949    {
1950       return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1951    }
1952 
1953    ////////////////////////////////////////////////////////////////////////////
1954    /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1955    /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
1956    /// present.
1957    /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
1958    /// object.
1959    /// \param[in] model The returned histogram will be constructed using this as a model.
1960    /// \param[in] columnList
1961    /// A list containing the names of the columns that will be passed when calling `Fill`.
1962    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
1963    /// \return the N-dimensional histogram wrapped in a RResultPtr.
1964    ///
1965    /// This action is *lazy*: upon invocation of this method the calculation is
1966    /// booked but not executed. See RResultPtr documentation.
1967    ///
1968    /// ### Example usage:
1969    /// ~~~{.cpp}
1970    /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
1971    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
1972    ///                                               {"col0", "col1", "col2", "col3"});
1973    /// ~~~
1974    ///
1975    template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
1976    RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
1977    {
1978       std::shared_ptr<::THnD> h(nullptr);
1979       {
1980          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1981          h = model.GetHistogram();
1982 
1983          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
1984             h->Sumw2();
1985          } else if (int(columnList.size()) != h->GetNdimensions()) {
1986             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
1987          }
1988       }
1989       return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
1990                                                                                           fProxiedPtr);
1991    }
1992 
1993    ////////////////////////////////////////////////////////////////////////////
1994    /// \brief Fill and return an N-dimensional histogram (*lazy action*).
1995    /// \param[in] model The returned histogram will be constructed using this as a model.
1996    /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1997    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
1998    /// \return the N-dimensional histogram wrapped in a RResultPtr.
1999    ///
2000    /// This action is *lazy*: upon invocation of this method the calculation is
2001    /// booked but not executed. Also see RResultPtr.
2002    ///
2003    /// ### Example usage:
2004    /// ~~~{.cpp}
2005    /// auto myFilledObj = myDf.HistoND({"name","title", 4,
2006    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2007    ///                                               {"col0", "col1", "col2", "col3"});
2008    /// ~~~
2009    ///
2010    RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2011    {
2012       std::shared_ptr<::THnD> h(nullptr);
2013       {
2014          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2015          h = model.GetHistogram();
2016 
2017          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2018             h->Sumw2();
2019          } else if (int(columnList.size()) != h->GetNdimensions()) {
2020             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2021          }
2022       }
2023       return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr,
2024                                                                                       columnList.size());
2025    }
2026 
2027    ////////////////////////////////////////////////////////////////////////////
2028    /// \brief Fill and return a TGraph object (*lazy action*).
2029    /// \tparam X The type of the column used to fill the x axis.
2030    /// \tparam Y The type of the column used to fill the y axis.
2031    /// \param[in] x The name of the column that will fill the x axis.
2032    /// \param[in] y The name of the column that will fill the y axis.
2033    /// \return the TGraph wrapped in a RResultPtr.
2034    ///
2035    /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
2036    /// is filled with each one of the elements of the container.
2037    /// If Multithreading is enabled, the order in which points are inserted is undefined.
2038    /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
2039    /// A name and a title to the TGraph is given based on the input column names.
2040    ///
2041    /// This action is *lazy*: upon invocation of this method the calculation is
2042    /// booked but not executed. Also see RResultPtr.
2043    ///
2044    /// ### Example usage:
2045    /// ~~~{.cpp}
2046    /// // Deduce column types (this invocation needs jitting internally)
2047    /// auto myGraph1 = myDf.Graph("xValues", "yValues");
2048    /// // Explicit column types
2049    /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
2050    /// ~~~
2051    ///
2052    /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
2053    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2054    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2055    template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
2056    RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "")
2057    {
2058       auto graph = std::make_shared<::TGraph>();
2059       const std::vector<std::string_view> columnViews = {x, y};
2060       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2061                                   ? ColumnNames_t()
2062                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2063 
2064       const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
2065 
2066       // We build a default name and title based on the input columns
2067       const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2068       const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2069       graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2070       graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2071       graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2072 
2073       return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr);
2074    }
2075 
2076    ////////////////////////////////////////////////////////////////////////////
2077    /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
2078    /// \param[in] x The name of the column that will fill the x axis.
2079    /// \param[in] y The name of the column that will fill the y axis.
2080    /// \param[in] exl The name of the column of X low errors
2081    /// \param[in] exh The name of the column of X high errors
2082    /// \param[in] eyl The name of the column of Y low errors
2083    /// \param[in] eyh The name of the column of Y high errors
2084    /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
2085    ///
2086    /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
2087    /// is filled with each one of the elements of the container.
2088    /// If Multithreading is enabled, the order in which points are inserted is undefined.
2089    ///
2090    /// This action is *lazy*: upon invocation of this method the calculation is
2091    /// booked but not executed. Also see RResultPtr.
2092    ///
2093    /// ### Example usage:
2094    /// ~~~{.cpp}
2095    /// // Deduce column types (this invocation needs jitting internally)
2096    /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2097    /// // Explicit column types
2098    /// using f = float
2099    /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2100    /// ~~~
2101    ///
2102    /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2103    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2104    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2105    template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2106              typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
2107              typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
2108    RResultPtr<::TGraphAsymmErrors>
2109    GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "",
2110                     std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2111    {
2112       auto graph = std::make_shared<::TGraphAsymmErrors>();
2113       const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2114       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2115                                   ? ColumnNames_t()
2116                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2117 
2118       const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
2119 
2120       // We build a default name and title based on the input columns
2121       const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2122       const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2123       graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2124       graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2125       graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2126 
2127       return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
2128                                                                                                graph, fProxiedPtr);
2129    }
2130 
2131    ////////////////////////////////////////////////////////////////////////////
2132    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2133    /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2134    /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2135    /// \param[in] model The model to be considered to build the new return value.
2136    /// \param[in] v1Name The name of the column that will fill the x axis.
2137    /// \param[in] v2Name The name of the column that will fill the y axis.
2138    /// \return the monodimensional profile wrapped in a RResultPtr.
2139    ///
2140    /// This action is *lazy*: upon invocation of this method the calculation is
2141    /// booked but not executed. Also see RResultPtr.
2142    ///
2143    /// ### Example usage:
2144    /// ~~~{.cpp}
2145    /// // Deduce column types (this invocation needs jitting internally)
2146    /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2147    /// // Explicit column types
2148    /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2149    /// ~~~
2150    ///
2151    /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2152    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2153    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2154    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2155    RResultPtr<::TProfile>
2156    Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2157    {
2158       std::shared_ptr<::TProfile> h(nullptr);
2159       {
2160          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2161          h = model.GetProfile();
2162       }
2163 
2164       if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2165          throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2166       }
2167       const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2168       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2169                                   ? ColumnNames_t()
2170                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2171       return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr);
2172    }
2173 
2174    ////////////////////////////////////////////////////////////////////////////
2175    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2176    /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2177    /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2178    /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2179    /// \param[in] model The model to be considered to build the new return value.
2180    /// \param[in] v1Name The name of the column that will fill the x axis.
2181    /// \param[in] v2Name The name of the column that will fill the y axis.
2182    /// \param[in] wName The name of the column that will provide the weights.
2183    /// \return the monodimensional profile wrapped in a RResultPtr.
2184    ///
2185    /// This action is *lazy*: upon invocation of this method the calculation is
2186    /// booked but not executed. Also see RResultPtr.
2187    ///
2188    /// ### Example usage:
2189    /// ~~~{.cpp}
2190    /// // Deduce column types (this invocation needs jitting internally)
2191    /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2192    /// // Explicit column types
2193    /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2194    ///                                                   "xValues", "yValues", "weight");
2195    /// ~~~
2196    ///
2197    /// See the first Profile1D() overload for more details.
2198    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2199              typename W = RDFDetail::RInferredType>
2200    RResultPtr<::TProfile>
2201    Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2202    {
2203       std::shared_ptr<::TProfile> h(nullptr);
2204       {
2205          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2206          h = model.GetProfile();
2207       }
2208 
2209       if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2210          throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2211       }
2212       const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2213       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2214                                   ? ColumnNames_t()
2215                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2216       return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2217    }
2218 
2219    ////////////////////////////////////////////////////////////////////////////
2220    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2221    /// See the first Profile1D() overload for more details.
2222    template <typename V1, typename V2, typename W>
2223    RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model)
2224    {
2225       return Profile1D<V1, V2, W>(model, "", "", "");
2226    }
2227 
2228    ////////////////////////////////////////////////////////////////////////////
2229    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2230    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2231    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2232    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2233    /// \param[in] model The returned profile will be constructed using this as a model.
2234    /// \param[in] v1Name The name of the column that will fill the x axis.
2235    /// \param[in] v2Name The name of the column that will fill the y axis.
2236    /// \param[in] v3Name The name of the column that will fill the z axis.
2237    /// \return the bidimensional profile wrapped in a RResultPtr.
2238    ///
2239    /// This action is *lazy*: upon invocation of this method the calculation is
2240    /// booked but not executed. Also see RResultPtr.
2241    ///
2242    /// ### Example usage:
2243    /// ~~~{.cpp}
2244    /// // Deduce column types (this invocation needs jitting internally)
2245    /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2246    ///                               "xValues", "yValues", "zValues");
2247    /// // Explicit column types
2248    /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2249    ///                                                   "xValues", "yValues", "zValues");
2250    /// ~~~
2251    ///
2252    /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2253    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2254    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2255    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2256              typename V3 = RDFDetail::RInferredType>
2257    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2258                                       std::string_view v2Name = "", std::string_view v3Name = "")
2259    {
2260       std::shared_ptr<::TProfile2D> h(nullptr);
2261       {
2262          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2263          h = model.GetProfile();
2264       }
2265 
2266       if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2267          throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2268       }
2269       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2270       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2271                                   ? ColumnNames_t()
2272                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2273       return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2274    }
2275 
2276    ////////////////////////////////////////////////////////////////////////////
2277    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2278    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2279    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2280    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2281    /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2282    /// \param[in] model The returned histogram will be constructed using this as a model.
2283    /// \param[in] v1Name The name of the column that will fill the x axis.
2284    /// \param[in] v2Name The name of the column that will fill the y axis.
2285    /// \param[in] v3Name The name of the column that will fill the z axis.
2286    /// \param[in] wName The name of the column that will provide the weights.
2287    /// \return the bidimensional profile wrapped in a RResultPtr.
2288    ///
2289    /// This action is *lazy*: upon invocation of this method the calculation is
2290    /// booked but not executed. Also see RResultPtr.
2291    ///
2292    /// ### Example usage:
2293    /// ~~~{.cpp}
2294    /// // Deduce column types (this invocation needs jitting internally)
2295    /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2296    ///                               "xValues", "yValues", "zValues", "weight");
2297    /// // Explicit column types
2298    /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2299    ///                                                        "xValues", "yValues", "zValues", "weight");
2300    /// ~~~
2301    ///
2302    /// See the first Profile2D() overload for more details.
2303    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2304              typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2305    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2306                                       std::string_view v3Name, std::string_view wName)
2307    {
2308       std::shared_ptr<::TProfile2D> h(nullptr);
2309       {
2310          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2311          h = model.GetProfile();
2312       }
2313 
2314       if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2315          throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2316       }
2317       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2318       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2319                                   ? ColumnNames_t()
2320                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2321       return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2322    }
2323 
2324    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2325    /// See the first Profile2D() overload for more details.
2326    template <typename V1, typename V2, typename V3, typename W>
2327    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model)
2328    {
2329       return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2330    }
2331 
2332    ////////////////////////////////////////////////////////////////////////////
2333    /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2334    ///
2335    /// Type T must provide at least:
2336    /// - a copy-constructor
2337    /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2338    ///   (these types can also be passed as template parameters to this method)
2339    /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2340    ///   objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2341    ///   if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2342    ///   the TCollection*).
2343    ///
2344    /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2345    /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2346    /// \tparam T The type of the object to fill. Automatically deduced.
2347    /// \param[in] model The model to be considered to build the new return value.
2348    /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2349    /// \return the filled object wrapped in a RResultPtr.
2350    ///
2351    /// The user gives up ownership of the model object.
2352    /// The list of column names to be used for filling must always be specified.
2353    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2354    /// Also see RResultPtr.
2355    ///
2356    /// ### Example usage:
2357    /// ~~~{.cpp}
2358    /// MyClass obj;
2359    /// // Deduce column types (this invocation needs jitting internally, and in this case
2360    /// // MyClass needs to be known to the interpreter)
2361    /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2362    /// // explicit column types
2363    /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2364    /// ~~~
2365    ///
2366    template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2367    RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2368    {
2369       auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2370       if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2371          throw std::runtime_error("The absence of axes limits is not supported yet.");
2372       }
2373       return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2374                                                                                        columnList.size());
2375    }
2376 
2377    ////////////////////////////////////////////////////////////////////////////
2378    /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2379    ///
2380    /// \tparam V The type of the value column
2381    /// \param[in] value The name of the column with the values to fill the statistics with.
2382    /// \return the filled TStatistic object wrapped in a RResultPtr.
2383    ///
2384    /// ### Example usage:
2385    /// ~~~{.cpp}
2386    /// // Deduce column type (this invocation needs jitting internally)
2387    /// auto stats0 = myDf.Stats("values");
2388    /// // Explicit column type
2389    /// auto stats1 = myDf.Stats<float>("values");
2390    /// ~~~
2391    ///
2392    template <typename V = RDFDetail::RInferredType>
2393    RResultPtr<TStatistic> Stats(std::string_view value = "")
2394    {
2395       ColumnNames_t columns;
2396       if (!value.empty()) {
2397          columns.emplace_back(std::string(value));
2398       }
2399       const auto validColumnNames = GetValidatedColumnNames(1, columns);
2400       if (std::is_same<V, RDFDetail::RInferredType>::value) {
2401          return Fill(TStatistic(), validColumnNames);
2402       } else {
2403          return Fill<V>(TStatistic(), validColumnNames);
2404       }
2405    }
2406 
2407    ////////////////////////////////////////////////////////////////////////////
2408    /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2409    ///
2410    /// \tparam V The type of the value column
2411    /// \tparam W The type of the weight column
2412    /// \param[in] value The name of the column with the values to fill the statistics with.
2413    /// \param[in] weight The name of the column with the weights to fill the statistics with.
2414    /// \return the filled TStatistic object wrapped in a RResultPtr.
2415    ///
2416    /// ### Example usage:
2417    /// ~~~{.cpp}
2418    /// // Deduce column types (this invocation needs jitting internally)
2419    /// auto stats0 = myDf.Stats("values", "weights");
2420    /// // Explicit column types
2421    /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2422    /// ~~~
2423    ///
2424    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2425    RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2426    {
2427       ColumnNames_t columns{std::string(value), std::string(weight)};
2428       constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2429       constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2430       const auto validColumnNames = GetValidatedColumnNames(2, columns);
2431       // We have 3 cases:
2432       // 1. Both types are inferred: we use Fill and let the jit kick in.
2433       // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2434       // 3. Both types are explicit: we invoke the fully compiled Fill method.
2435       if (vIsInferred && wIsInferred) {
2436          return Fill(TStatistic(), validColumnNames);
2437       } else if (vIsInferred != wIsInferred) {
2438          std::string error("The ");
2439          error += vIsInferred ? "value " : "weight ";
2440          error += "column type is explicit, while the ";
2441          error += vIsInferred ? "weight " : "value ";
2442          error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2443          throw std::runtime_error(error);
2444       } else {
2445          return Fill<V, W>(TStatistic(), validColumnNames);
2446       }
2447    }
2448 
2449    ////////////////////////////////////////////////////////////////////////////
2450    /// \brief Return the minimum of processed column values (*lazy action*).
2451    /// \tparam T The type of the branch/column.
2452    /// \param[in] columnName The name of the branch/column to be treated.
2453    /// \return the minimum value of the selected column wrapped in a RResultPtr.
2454    ///
2455    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2456    /// template specialization of this method.
2457    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2458    ///
2459    /// This action is *lazy*: upon invocation of this method the calculation is
2460    /// booked but not executed. Also see RResultPtr.
2461    ///
2462    /// ### Example usage:
2463    /// ~~~{.cpp}
2464    /// // Deduce column type (this invocation needs jitting internally)
2465    /// auto minVal0 = myDf.Min("values");
2466    /// // Explicit column type
2467    /// auto minVal1 = myDf.Min<double>("values");
2468    /// ~~~
2469    ///
2470    template <typename T = RDFDetail::RInferredType>
2471    RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
2472    {
2473       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2474       using RetType_t = RDFDetail::MinReturnType_t<T>;
2475       auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2476       return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr);
2477    }
2478 
2479    ////////////////////////////////////////////////////////////////////////////
2480    /// \brief Return the maximum of processed column values (*lazy action*).
2481    /// \tparam T The type of the branch/column.
2482    /// \param[in] columnName The name of the branch/column to be treated.
2483    /// \return the maximum value of the selected column wrapped in a RResultPtr.
2484    ///
2485    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2486    /// template specialization of this method.
2487    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2488    ///
2489    /// This action is *lazy*: upon invocation of this method the calculation is
2490    /// booked but not executed. Also see RResultPtr.
2491    ///
2492    /// ### Example usage:
2493    /// ~~~{.cpp}
2494    /// // Deduce column type (this invocation needs jitting internally)
2495    /// auto maxVal0 = myDf.Max("values");
2496    /// // Explicit column type
2497    /// auto maxVal1 = myDf.Max<double>("values");
2498    /// ~~~
2499    ///
2500    template <typename T = RDFDetail::RInferredType>
2501    RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
2502    {
2503       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2504       using RetType_t = RDFDetail::MaxReturnType_t<T>;
2505       auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2506       return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr);
2507    }
2508 
2509    ////////////////////////////////////////////////////////////////////////////
2510    /// \brief Return the mean of processed column values (*lazy action*).
2511    /// \tparam T The type of the branch/column.
2512    /// \param[in] columnName The name of the branch/column to be treated.
2513    /// \return the mean value of the selected column wrapped in a RResultPtr.
2514    ///
2515    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2516    /// template specialization of this method.
2517    ///
2518    /// This action is *lazy*: upon invocation of this method the calculation is
2519    /// booked but not executed. Also see RResultPtr.
2520    ///
2521    /// ### Example usage:
2522    /// ~~~{.cpp}
2523    /// // Deduce column type (this invocation needs jitting internally)
2524    /// auto meanVal0 = myDf.Mean("values");
2525    /// // Explicit column type
2526    /// auto meanVal1 = myDf.Mean<double>("values");
2527    /// ~~~
2528    ///
2529    template <typename T = RDFDetail::RInferredType>
2530    RResultPtr<double> Mean(std::string_view columnName = "")
2531    {
2532       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2533       auto meanV = std::make_shared<double>(0);
2534       return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr);
2535    }
2536 
2537    ////////////////////////////////////////////////////////////////////////////
2538    /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2539    /// \tparam T The type of the branch/column.
2540    /// \param[in] columnName The name of the branch/column to be treated.
2541    /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2542    ///
2543    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2544    /// template specialization of this method.
2545    ///
2546    /// This action is *lazy*: upon invocation of this method the calculation is
2547    /// booked but not executed. Also see RResultPtr.
2548    ///
2549    /// ### Example usage:
2550    /// ~~~{.cpp}
2551    /// // Deduce column type (this invocation needs jitting internally)
2552    /// auto stdDev0 = myDf.StdDev("values");
2553    /// // Explicit column type
2554    /// auto stdDev1 = myDf.StdDev<double>("values");
2555    /// ~~~
2556    ///
2557    template <typename T = RDFDetail::RInferredType>
2558    RResultPtr<double> StdDev(std::string_view columnName = "")
2559    {
2560       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2561       auto stdDeviationV = std::make_shared<double>(0);
2562       return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr);
2563    }
2564 
2565    // clang-format off
2566    ////////////////////////////////////////////////////////////////////////////
2567    /// \brief Return the sum of processed column values (*lazy action*).
2568    /// \tparam T The type of the branch/column.
2569    /// \param[in] columnName The name of the branch/column.
2570    /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2571    /// \return the sum of the selected column wrapped in a RResultPtr.
2572    ///
2573    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2574    /// template specialization of this method.
2575    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2576    ///
2577    /// This action is *lazy*: upon invocation of this method the calculation is
2578    /// booked but not executed. Also see RResultPtr.
2579    ///
2580    /// ### Example usage:
2581    /// ~~~{.cpp}
2582    /// // Deduce column type (this invocation needs jitting internally)
2583    /// auto sum0 = myDf.Sum("values");
2584    /// // Explicit column type
2585    /// auto sum1 = myDf.Sum<double>("values");
2586    /// ~~~
2587    ///
2588    template <typename T = RDFDetail::RInferredType>
2589    RResultPtr<RDFDetail::SumReturnType_t<T>>
2590    Sum(std::string_view columnName = "",
2591        const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2592    {
2593       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2594       auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2595       return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr);
2596    }
2597    // clang-format on
2598 
2599    ////////////////////////////////////////////////////////////////////////////
2600    /// \brief Gather filtering statistics.
2601    /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2602    ///
2603    /// Calling `Report` on the main `RDataFrame` object gathers stats for
2604    /// all named filters in the call graph. Calling this method on a
2605    /// stored chain state (i.e. a graph node different from the first) gathers
2606    /// the stats for all named filters in the chain section between the original
2607    /// `RDataFrame` and that node (included). Stats are gathered in the same
2608    /// order as the named filters have been added to the graph.
2609    /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2610    /// effects cuts had.
2611    ///
2612    /// This action is *lazy*: upon invocation of
2613    /// this method the calculation is booked but not executed. See RResultPtr
2614    /// documentation.
2615    ///
2616    /// ### Example usage:
2617    /// ~~~{.cpp}
2618    /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2619    /// auto cutReport = filtered3.Report();
2620    /// cutReport->Print();
2621    /// ~~~
2622    ///
2623    RResultPtr<RCutFlowReport> Report()
2624    {
2625       bool returnEmptyReport = false;
2626       // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2627       // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2628       // certainly does not contain named filters.
2629       // The number 4 takes into account the implicit columns for entry and slot number
2630       // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2631       if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4)
2632          returnEmptyReport = true;
2633 
2634       auto rep = std::make_shared<RCutFlowReport>();
2635       using Helper_t = RDFInternal::ReportHelper<Proxied>;
2636       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2637 
2638       auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
2639                                                fProxiedPtr, RDFInternal::RColumnRegister(fColRegister));
2640 
2641       return MakeResultPtr(rep, *fLoopManager, std::move(action));
2642    }
2643 
2644    /// \brief Returns the names of the filters created.
2645    /// \return the container of filters names.
2646    ///
2647    /// If called on a root node, all the filters in the computation graph will
2648    /// be printed. For any other node, only the filters upstream of that node.
2649    /// Filters without a name are printed as "Unnamed Filter"
2650    /// This is not an action nor a transformation, just a query to the RDataFrame object.
2651    ///
2652    /// ### Example usage:
2653    /// ~~~{.cpp}
2654    /// auto filtNames = d.GetFilterNames();
2655    /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2656    /// ~~~
2657    ///
2658    std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2659 
2660    // clang-format off
2661    ////////////////////////////////////////////////////////////////////////////
2662    /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2663    /// \tparam F The type of the aggregator callable. Automatically deduced.
2664    /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2665    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2666    /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2667    /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2668    /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2669    /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2670    /// \return the result of the aggregation wrapped in a RResultPtr.
2671    ///
2672    /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2673    /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2674    /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2675    /// the value of the column columnName.
2676    /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2677    /// Otherwise the signature of aggregator must be `void(U&,T)`.
2678    ///
2679    /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2680    /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2681    /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2682    ///
2683    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2684    ///
2685    /// Example usage:
2686    /// ~~~{.cpp}
2687    /// auto aggregator = [](double acc, double x) { return acc * x; };
2688    /// ROOT::EnableImplicitMT();
2689    /// // If multithread is enabled, the aggregator function will be called by more threads
2690    /// // and will produce a vector of partial accumulators.
2691    /// // The merger function performs the final aggregation of these partial results.
2692    /// auto merger = [](std::vector<double> &accumulators) {
2693    ///    for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2694    ///       accumulators[0] *= accumulators[i];
2695    ///    }
2696    /// };
2697    ///
2698    /// // The accumulator is initialized at this value by every thread.
2699    /// double initValue = 1.;
2700    ///
2701    /// // Multiplies all elements of the column "x"
2702    /// auto result = d.Aggregate(aggregator, merger, "x", initValue);
2703    /// ~~~
2704    // clang-format on
2705    template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2706              typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2707              typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2708              typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2709              typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2710    RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2711    {
2712       RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2713       const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2714 
2715       const auto validColumnNames = GetValidatedColumnNames(1, columns);
2716       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2717 
2718       auto accObjPtr = std::make_shared<U>(aggIdentity);
2719       using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2720       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2721       auto action = std::make_unique<Action_t>(
2722          Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2723          fProxiedPtr, fColRegister);
2724       return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2725    }
2726 
2727    // clang-format off
2728    ////////////////////////////////////////////////////////////////////////////
2729    /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2730    /// \tparam F The type of the aggregator callable. Automatically deduced.
2731    /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2732    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2733    /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2734    /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2735    /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2736    /// \return the result of the aggregation wrapped in a RResultPtr.
2737    ///
2738    /// See previous Aggregate overload for more information.
2739    // clang-format on
2740    template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2741              typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2742              typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2743              typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2744    RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2745    {
2746       static_assert(
2747          std::is_default_constructible<U>::value,
2748          "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2749       return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2750    }
2751 
2752    // clang-format off
2753    ////////////////////////////////////////////////////////////////////////////
2754    /// \brief Book execution of a custom action using a user-defined helper object.
2755    /// \tparam FirstColumn The type of the first column used by this action.  Inferred together with OtherColumns if not present.
2756    /// \tparam OtherColumns A list of the types of the other columns used by this action
2757    /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2758    /// \param[in] helper The Action Helper to be scheduled.
2759    /// \param[in] columns The names of the columns on which the helper acts.
2760    /// \return the result of the helper wrapped in a RResultPtr.
2761    ///
2762    /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2763    /// Helper object provided by the caller. The required interface for the helper is described below (more
2764    /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2765    ///
2766    /// ### Mandatory interface
2767    ///
2768    /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>`
2769    /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible.
2770    /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged.
2771    /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type
2772    ///   Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2773    ///   _before_ Initialize(), because the RResultPtr is constructed before the event loop is started.
2774    /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations.
2775    ///   It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2776    ///   or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2777    /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event
2778    ///   loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader
2779    ///   that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations
2780    ///   it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
2781    /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method
2782    ///   during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2783    ///   this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2784    ///   the requested columns for the particular entry being processed.
2785    /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2786    /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in
2787    ///    diagnostics, SaveGraph(), etc.
2788    ///
2789    /// ### Optional methods
2790    ///
2791    /// If these methods are implemented they enable extra functionality as per the description below.
2792    ///
2793    /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'.
2794    ///   Different threads might call this method concurrently, but will do so with different 'slot' numbers.
2795    ///   RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
2796    /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the
2797    ///   appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
2798    ///   of every sample, as in DefinePerSample().
2799    /// * `Helper MakeNew(void *newResult)`: if implemented, it enables varying the action's result with VariationsFor(). It takes a
2800    ///   type-erased new result that can be safely cast to a `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should
2801    ///   be used as the action's output result.
2802    ///
2803    /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled
2804    /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.
2805    ///
2806    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2807    ///
2808    /// ### Examples
2809    /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.
2810    ///
2811    /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
2812    ///
2813    // clang-format on
2814    template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
2815    RResultPtr<typename std::decay_t<Helper>::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
2816    {
2817       using HelperT = std::decay_t<Helper>;
2818       // TODO add more static sanity checks on Helper
2819       using AH = RDFDetail::RActionImpl<HelperT>;
2820       static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
2821                     "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2822 
2823       auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
2824       auto resPtr = hPtr->GetResultPtr();
2825 
2826       if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
2827          return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
2828       } else {
2829          return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
2830                                                                                           fProxiedPtr, columns.size());
2831       }
2832    }
2833 
2834    ////////////////////////////////////////////////////////////////////////////
2835    /// \brief Provides a representation of the columns in the dataset.
2836    /// \tparam ColumnTypes variadic list of branch/column types.
2837    /// \param[in] columnList Names of the columns to be displayed.
2838    /// \param[in] nRows Number of events for each column to be displayed.
2839    /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2840    /// \return the `RDisplay` instance wrapped in a RResultPtr.
2841    ///
2842    /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2843    /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
2844    /// return a complete version through `RDisplay::AsString()`.
2845    ///
2846    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
2847    /// RResultPtr.
2848    ///
2849    /// Example usage:
2850    /// ~~~{.cpp}
2851    /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2852    /// auto d1 = rdf.Display("");
2853    /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2854    /// auto d2 = d.Display({"x", "y"}, 128);
2855    /// // Printing the short representations, the event loop will run
2856    /// d1->Print();
2857    /// d2->Print();
2858    /// ~~~
2859    template <typename... ColumnTypes>
2860    RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2861    {
2862       CheckIMTDisabled("Display");
2863       auto newCols = columnList;
2864       newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
2865       auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
2866       using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
2867       // Need to add ULong64_t type corresponding to the first column rdfentry_
2868       return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
2869          std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
2870    }
2871 
2872    ////////////////////////////////////////////////////////////////////////////
2873    /// \brief Provides a representation of the columns in the dataset.
2874    /// \param[in] columnList Names of the columns to be displayed.
2875    /// \param[in] nRows Number of events for each column to be displayed.
2876    /// \param[in] nMaxCollectionElements  Maximum number of collection elements to display per row.
2877    /// \return the `RDisplay` instance wrapped in a RResultPtr.
2878    ///
2879    /// This overload automatically infers the column types.
2880    /// See the previous overloads for further details.
2881    ///
2882    /// Invoked when no types are specified to Display
2883    RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2884    {
2885       CheckIMTDisabled("Display");
2886       auto newCols = columnList;
2887       newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
2888       auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
2889       using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
2890       return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
2891          std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
2892          columnList.size() + 1);
2893    }
2894 
2895    ////////////////////////////////////////////////////////////////////////////
2896    /// \brief Provides a representation of the columns in the dataset.
2897    /// \param[in] columnNameRegexp A regular expression to select the columns.
2898    /// \param[in] nRows Number of events for each column to be displayed.
2899    /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
2900    /// \return the `RDisplay` instance wrapped in a RResultPtr.
2901    ///
2902    /// The existing columns are matched against the regular expression. If the string provided
2903    /// is empty, all columns are selected.
2904    /// See the previous overloads for further details.
2905    RResultPtr<RDisplay>
2906    Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
2907    {
2908       const auto columnNames = GetColumnNames();
2909       const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
2910       return Display(selectedColumns, nRows, nMaxCollectionElements);
2911    }
2912 
2913    ////////////////////////////////////////////////////////////////////////////
2914    /// \brief Provides a representation of the columns in the dataset.
2915    /// \param[in] columnList Names of the columns to be displayed.
2916    /// \param[in] nRows Number of events for each column to be displayed.
2917    /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
2918    /// \return the `RDisplay` instance wrapped in a RResultPtr.
2919    ///
2920    /// See the previous overloads for further details.
2921    RResultPtr<RDisplay>
2922    Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
2923    {
2924       ColumnNames_t selectedColumns(columnList);
2925       return Display(selectedColumns, nRows, nMaxCollectionElements);
2926    }
2927 
2928 private:
2929    template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2930    std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
2931    DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
2932    {
2933       if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
2934          RDFInternal::CheckValidCppVarName(name, where);
2935          RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
2936                                            fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
2937       } else {
2938          RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
2939                                          fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
2940          RDFInternal::CheckForNoVariations(where, name, fColRegister);
2941       }
2942 
2943       using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2944       using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2945          std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type;
2946       using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2947          std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type;
2948 
2949       constexpr auto nColumns = ColTypes_t::list_size;
2950 
2951       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2952       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
2953 
2954       // Declare return type to the interpreter, for future use by jitted actions
2955       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2956       if (retTypeName.empty()) {
2957          // The type is not known to the interpreter.
2958          // We must not error out here, but if/when this column is used in jitted code
2959          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2960          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
2961       }
2962 
2963       using NewCol_t = RDFDetail::RDefine<F, DefineType>;
2964       auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
2965                                                   fColRegister, *fLoopManager);
2966 
2967       RDFInternal::RColumnRegister newCols(fColRegister);
2968       newCols.AddDefine(std::move(newColumn));
2969 
2970       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
2971 
2972       return newInterface;
2973    }
2974 
2975    // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2976    // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2977    // this way compilation of `Define` has no way to continue after throwing the error.
2978    template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2979              bool IsFStringConv = std::is_convertible<F, std::string>::value,
2980              bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2981    std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
2982    DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
2983    {
2984       static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2985                     "Error in `Define`: type returned by expression is not default-constructible");
2986       return *this; // never reached
2987    }
2988 
2989    template <typename... ColumnTypes>
2990    RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename,
2991                                                      const ColumnNames_t &columnList, const RSnapshotOptions &options)
2992    {
2993       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
2994 
2995       RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
2996       // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it.
2997       const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
2998       RDFInternal::CheckForDuplicateSnapshotColumns(validCols);
2999       CheckAndFillDSColumns(validCols, TTraits::TypeList<ColumnTypes...>());
3000 
3001       const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
3002       const auto &treename = parsedTreePath.fTreeName;
3003       const auto &dirname = parsedTreePath.fDirName;
3004 
3005       auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3006          std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options});
3007 
3008       ::TDirectory::TContext ctxt;
3009 
3010       // The CreateLMFromTTree function by default opens the file passed as input
3011       // to check for the presence of the TTree inside. But at this moment the
3012       // filename we are using here corresponds to a file which does not exist yet,
3013       // i.e. the output file of the Snapshot call. Thus, checkFile=false will
3014       // prevent the function from trying to open a non-existent file.
3015       auto newRDF = std::make_shared<RInterface<RLoopManager>>(ROOT::Detail::RDF::CreateLMFromTTree(
3016          fullTreeName, filename, /*defaultColumns=*/columnListWithoutSizeColumns, /*checkFile=*/false));
3017 
3018       // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
3019       // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column names.
3020       auto resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
3021                                                                                     fProxiedPtr);
3022 
3023       if (!options.fLazy)
3024          *resPtr;
3025       return resPtr;
3026    }
3027 
3028    ////////////////////////////////////////////////////////////////////////////
3029    /// \brief Implementation of cache.
3030    template <typename... ColTypes, std::size_t... S>
3031    RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3032    {
3033       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3034 
3035       // Check at compile time that the columns types are copy constructible
3036       constexpr bool areCopyConstructible =
3037          RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3038       static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3039 
3040       RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3041 
3042       auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3043       auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3044          std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3045 
3046       RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3047 
3048       return cachedRDF;
3049    }
3050 
3051    template <bool IsSingleColumn, typename F>
3052    RInterface<Proxied, DS_t>
3053    VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
3054             const std::vector<std::string> &variationTags, std::string_view variationName)
3055    {
3056       using F_t = std::decay_t<F>;
3057       using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
3058       using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
3059       constexpr auto nColumns = ColTypes_t::list_size;
3060 
3061       SanityChecksForVary<RetType>(colNames, variationTags, variationName);
3062 
3063       const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
3064       CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
3065 
3066       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3067       if (retTypeName.empty()) {
3068          // The type is not known to the interpreter, but we don't want to error out
3069          // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
3070          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3071          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3072       }
3073 
3074       auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
3075          colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
3076          validColumnNames);
3077 
3078       RDFInternal::RColumnRegister newCols(fColRegister);
3079       newCols.AddVariation(std::move(variation));
3080 
3081       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3082 
3083       return newInterface;
3084    }
3085 
3086    RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
3087                                             const std::vector<std::string> &variationTags,
3088                                             std::string_view variationName, bool isSingleColumn)
3089    {
3090       R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
3091       R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
3092       R__ASSERT(!variationName.empty() && "Must provide a variation name.");
3093 
3094       for (auto &colName : colNames) {
3095          RDFInternal::CheckValidCppVarName(colName, "Vary");
3096          RDFInternal::CheckForDefinition("Vary", colName, fColRegister, fLoopManager->GetBranchNames(),
3097                                          fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
3098       }
3099       RDFInternal::CheckValidCppVarName(variationName, "Vary");
3100 
3101       // when varying multiple columns, they must be different columns
3102       if (colNames.size() > 1) {
3103          std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3104          if (uniqueCols.size() != colNames.size())
3105             throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3106       }
3107 
3108       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3109       auto jittedVariation =
3110          RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager, fDataSource,
3111                                        fColRegister, fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn);
3112 
3113       RDFInternal::RColumnRegister newColRegister(fColRegister);
3114       newColRegister.AddVariation(std::move(jittedVariation));
3115 
3116       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister));
3117 
3118       return newInterface;
3119    }
3120 
3121    template <typename Helper, typename ActionResultType>
3122    auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3123                                               const std::shared_ptr<Helper> &hPtr,
3124                                               TTraits::TypeList<RDFDetail::RInferredType>)
3125       -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3126    {
3127       return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u);
3128    }
3129 
3130    template <typename Helper, typename ActionResultType, typename... Others>
3131    RResultPtr<ActionResultType>
3132    CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3133                                          const std::shared_ptr<Helper>& /*hPtr*/,
3134                                          Others...)
3135    {
3136       throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3137                                          "columns! The action helper type was ") +
3138                              typeid(Helper).name());
3139       return {};
3140    }
3141 
3142 protected:
3143    RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
3144               const RDFInternal::RColumnRegister &colRegister)
3145       : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied)
3146    {
3147    }
3148 
3149    const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3150 };
3151 
3152 } // namespace RDF
3153 
3154 } // namespace ROOT
3155 
3156 #endif // ROOT_RDF_INTERFACE