Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:14:18

0001 // Author: Enrico Guiraud, Danilo Piparo CERN  03/2017
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #ifndef ROOT_RDF_TINTERFACE
0012 #define ROOT_RDF_TINTERFACE
0013 
0014 #include "ROOT/RDataSource.hxx"
0015 #include "ROOT/RDF/ActionHelpers.hxx"
0016 #include "ROOT/RDF/HistoModels.hxx"
0017 #include "ROOT/RDF/InterfaceUtils.hxx"
0018 #include "ROOT/RDF/RColumnRegister.hxx"
0019 #include "ROOT/RDF/RDefaultValueFor.hxx"
0020 #include "ROOT/RDF/RDefine.hxx"
0021 #include "ROOT/RDF/RDefinePerSample.hxx"
0022 #include "ROOT/RDF/RFilter.hxx"
0023 #include "ROOT/RDF/RInterfaceBase.hxx"
0024 #include "ROOT/RDF/RVariation.hxx"
0025 #include "ROOT/RDF/RLazyDSImpl.hxx"
0026 #include "ROOT/RDF/RLoopManager.hxx"
0027 #include "ROOT/RDF/RRange.hxx"
0028 #include "ROOT/RDF/RFilterWithMissingValues.hxx"
0029 #include "ROOT/RDF/Utils.hxx"
0030 #include "ROOT/RDF/RDFDescription.hxx"
0031 #include "ROOT/RDF/RVariationsDescription.hxx"
0032 #include "ROOT/RResultPtr.hxx"
0033 #include "ROOT/RSnapshotOptions.hxx"
0034 #include <string_view>
0035 #include "ROOT/RVec.hxx"
0036 #include "ROOT/TypeTraits.hxx"
0037 #include "RtypesCore.h" // for ULong64_t
0038 #include "TDirectory.h"
0039 #include "TH1.h" // For Histo actions
0040 #include "TH2.h" // For Histo actions
0041 #include "TH3.h" // For Histo actions
0042 #include "THn.h"
0043 #include "TProfile.h"
0044 #include "TProfile2D.h"
0045 #include "TStatistic.h"
0046 
0047 #include <algorithm>
0048 #include <cstddef>
0049 #include <initializer_list>
0050 #include <iterator> // std::back_insterter
0051 #include <limits>
0052 #include <memory>
0053 #include <set>
0054 #include <sstream>
0055 #include <stdexcept>
0056 #include <string>
0057 #include <type_traits> // is_same, enable_if
0058 #include <typeinfo>
0059 #include <unordered_set>
0060 #include <utility> // std::index_sequence
0061 #include <vector>
0062 #include <any>
0063 
0064 class TGraph;
0065 
0066 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
0067 namespace ROOT {
0068 void DisableImplicitMT();
0069 bool IsImplicitMTEnabled();
0070 void EnableImplicitMT(UInt_t numthreads);
0071 class RDataFrame;
0072 } // namespace ROOT
0073 namespace cling {
0074 std::string printValue(ROOT::RDataFrame *tdf);
0075 }
0076 
0077 namespace ROOT {
0078 namespace RDF {
0079 namespace RDFDetail = ROOT::Detail::RDF;
0080 namespace RDFInternal = ROOT::Internal::RDF;
0081 namespace TTraits = ROOT::TypeTraits;
0082 
0083 template <typename Proxied, typename DataSource>
0084 class RInterface;
0085 
0086 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
0087 } // namespace RDF
0088 
0089 namespace Internal {
0090 namespace RDF {
0091 class GraphCreatorHelper;
0092 void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
0093 void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end);
0094 void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
0095 void TriggerRun(ROOT::RDF::RNode node);
0096 std::string GetDataSourceLabel(const ROOT::RDF::RNode &node);
0097 void SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline);
0098 } // namespace RDF
0099 } // namespace Internal
0100 
0101 namespace RDF {
0102 
0103 // clang-format off
0104 /**
0105  * \class ROOT::RDF::RInterface
0106  * \ingroup dataframe
0107  * \brief The public interface to the RDataFrame federation of classes.
0108  * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
0109  * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
0110  *
0111  * The documentation of each method features a one liner illustrating how to use the method, for example showing how
0112  * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
0113  */
0114 // clang-format on
0115 template <typename Proxied, typename DataSource = void>
0116 class RInterface : public RInterfaceBase {
0117    using DS_t = DataSource;
0118    using RFilterBase = RDFDetail::RFilterBase;
0119    using RRangeBase = RDFDetail::RRangeBase;
0120    using RLoopManager = RDFDetail::RLoopManager;
0121    friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
0122    friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
0123 
0124    template <typename T, typename W>
0125    friend class RInterface;
0126 
0127    friend void RDFInternal::TriggerRun(RNode node);
0128    friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
0129    friend void RDFInternal::ChangeBeginAndEndEntries(const RNode &node, Long64_t start, Long64_t end);
0130    friend void RDFInternal::ChangeSpec(const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
0131    friend std::string ROOT::Internal::RDF::GetDataSourceLabel(const RNode &node);
0132    friend void ROOT::Internal::RDF::SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline);
0133    std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
0134 
0135 public:
0136    ////////////////////////////////////////////////////////////////////////////
0137    /// \brief Copy-assignment operator for RInterface.
0138    RInterface &operator=(const RInterface &) = default;
0139 
0140    ////////////////////////////////////////////////////////////////////////////
0141    /// \brief Copy-ctor for RInterface.
0142    RInterface(const RInterface &) = default;
0143 
0144    ////////////////////////////////////////////////////////////////////////////
0145    /// \brief Move-ctor for RInterface.
0146    RInterface(RInterface &&) = default;
0147 
0148    ////////////////////////////////////////////////////////////////////////////
0149    /// \brief Move-assignment operator for RInterface.
0150    RInterface &operator=(RInterface &&) = default;
0151 
0152    ////////////////////////////////////////////////////////////////////////////
0153    /// \brief Build a RInterface from a RLoopManager.
0154    /// This constructor is only available for RInterface<RLoopManager>.
0155    template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
0156    RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
0157    {
0158    }
0159 
0160    ////////////////////////////////////////////////////////////////////////////
0161    /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
0162    /// Different RDataFrame methods return different C++ types. All nodes, however,
0163    /// can be cast to this common type at the cost of a small performance penalty.
0164    /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
0165    /// around via (non-template, C++11) helper functions.
0166    /// Example usage:
0167    /// ~~~{.cpp}
0168    /// // a function that conditionally adds a Range to a RDataFrame node.
0169    /// RNode MaybeAddRange(RNode df, bool mustAddRange)
0170    /// {
0171    ///    return mustAddRange ? df.Range(1) : df;
0172    /// }
0173    /// // use as :
0174    /// ROOT::RDataFrame df(10);
0175    /// auto maybeRanged = MaybeAddRange(df, true);
0176    /// ~~~
0177    /// Note that it is not a problem to pass RNode's by value.
0178    operator RNode() const
0179    {
0180       return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
0181    }
0182 
0183    ////////////////////////////////////////////////////////////////////////////
0184    /// \brief Append a filter to the call graph.
0185    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0186    /// signalling whether the event has passed the selection (true) or not (false).
0187    /// \param[in] columns Names of the columns/branches in input to the filter function.
0188    /// \param[in] name Optional name of this filter. See `Report`.
0189    /// \return the filter node of the computation graph.
0190    ///
0191    /// Append a filter node at the point of the call graph corresponding to the
0192    /// object this method is called on.
0193    /// The callable `f` should not have side-effects (e.g. modification of an
0194    /// external or static variable) to ensure correct results when implicit
0195    /// multi-threading is active.
0196    ///
0197    /// RDataFrame only evaluates filters when necessary: if multiple filters
0198    /// are chained one after another, they are executed in order and the first
0199    /// one returning false causes the event to be discarded.
0200    /// Even if multiple actions or transformations depend on the same filter,
0201    /// it is executed once per entry. If its result is requested more than
0202    /// once, the cached result is served.
0203    ///
0204    /// ### Example usage:
0205    /// ~~~{.cpp}
0206    /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
0207    /// auto filtered = df.Filter(myCut, {"x", "y"});
0208    ///
0209    /// // String: it must contain valid C++ except that column names can be used instead of variable names
0210    /// auto filtered = df.Filter("x*y > 0");
0211    /// ~~~
0212    ///
0213    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0214    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0215    /// ~~~{.cpp}
0216    /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0217    /// ~~~
0218    /// but instead this will:
0219    /// ~~~{.cpp}
0220    /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0221    /// ~~~
0222    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0223    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t>
0224    Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
0225    {
0226       RDFInternal::CheckFilter(f);
0227       using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
0228       constexpr auto nColumns = ColTypes_t::list_size;
0229       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
0230       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
0231 
0232       using F_t = RDFDetail::RFilter<F, Proxied>;
0233 
0234       auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
0235       return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
0236    }
0237 
0238    ////////////////////////////////////////////////////////////////////////////
0239    /// \brief Append a filter to the call graph.
0240    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0241    /// signalling whether the event has passed the selection (true) or not (false).
0242    /// \param[in] name Optional name of this filter. See `Report`.
0243    /// \return the filter node of the computation graph.
0244    ///
0245    /// Refer to the first overload of this method for the full documentation.
0246    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0247    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name)
0248    {
0249       // The sfinae is there in order to pick up the overloaded method which accepts two strings
0250       // rather than this template method.
0251       return Filter(f, {}, name);
0252    }
0253 
0254    ////////////////////////////////////////////////////////////////////////////
0255    /// \brief Append a filter to the call graph.
0256    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0257    /// signalling whether the event has passed the selection (true) or not (false).
0258    /// \param[in] columns Names of the columns/branches in input to the filter function.
0259    /// \return the filter node of the computation graph.
0260    ///
0261    /// Refer to the first overload of this method for the full documentation.
0262    template <typename F>
0263    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
0264    {
0265       return Filter(f, ColumnNames_t{columns});
0266    }
0267 
0268    ////////////////////////////////////////////////////////////////////////////
0269    /// \brief Append a filter to the call graph.
0270    /// \param[in] expression The filter expression in C++
0271    /// \param[in] name Optional name of this filter. See `Report`.
0272    /// \return the filter node of the computation graph.
0273    ///
0274    /// The expression is just-in-time compiled and used to filter entries. It must
0275    /// be valid C++ syntax in which variable names are substituted with the names
0276    /// of branches/columns.
0277    ///
0278    /// ### Example usage:
0279    /// ~~~{.cpp}
0280    /// auto filtered_df = df.Filter("myCollection.size() > 3");
0281    /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
0282    /// ~~~
0283    ///
0284    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0285    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0286    /// ~~~{.cpp}
0287    /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0288    /// ~~~
0289    /// but instead this will:
0290    /// ~~~{.cpp}
0291    /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0292    /// ~~~
0293    RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
0294    {
0295       // deleted by the jitted call to JitFilterHelper
0296       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0297       using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
0298       RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister);
0299       const auto jittedFilter =
0300          RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fLoopManager->GetBranchNames(), fColRegister,
0301                                     fLoopManager->GetTree(), GetDataSource());
0302 
0303       return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fColRegister);
0304    }
0305 
0306    ////////////////////////////////////////////////////////////////////////////
0307    /// \brief Discard entries with missing values
0308    /// \param[in] column Column name whose entries with missing values should be discarded
0309    /// \return The filter node of the computation graph
0310    ///
0311    /// This operation is useful in case an entry of the dataset is incomplete,
0312    /// i.e. if one or more of the columns do not have valid values. If the value
0313    /// of the input column is missing for an entry, the entire entry will be
0314    /// discarded from the rest of this branch of the computation graph.
0315    ///
0316    /// Use cases include:
0317    /// * When processing multiple files, one or more of them is missing a column
0318    /// * In horizontal joining with entry matching, a certain dataset has no
0319    ///   match for the current entry.
0320    ///
0321    /// ### Example usage:
0322    ///
0323    /// \code{.py}
0324    /// # Assume a dataset with columns [idx, x] matching another dataset with
0325    /// # columns [idx, y]. For idx == 42, the right-hand dataset has no match
0326    /// df = ROOT.RDataFrame(dataset)
0327    /// df_nomissing = df.FilterAvailable("idx").Define("z", "x + y")
0328    /// colz = df_nomissing.Take[int]("z")
0329    /// \endcode
0330    ///
0331    /// \code{.cpp}
0332    /// // Assume a dataset with columns [idx, x] matching another dataset with
0333    /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
0334    /// ROOT::RDataFrame df{dataset};
0335    /// auto df_nomissing = df.FilterAvailable("idx")
0336    ///                       .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
0337    /// auto colz = df_nomissing.Take<int>("z");
0338    /// \endcode
0339    ///
0340    /// \note See FilterMissing() if you want to keep only the entries with
0341    ///       missing values instead.
0342    RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterAvailable(std::string_view column)
0343    {
0344       const auto columns = ColumnNames_t{column.data()};
0345       // For now disable this functionality in case of an empty data source and
0346       // the column name was not defined previously.
0347       if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
0348          throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
0349       using F_t = RDFDetail::RFilterWithMissingValues<Proxied>;
0350       auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ true, fProxiedPtr, fColRegister, columns);
0351       CheckAndFillDSColumns(columns, TTraits::TypeList<void>{});
0352       return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
0353    }
0354 
0355    ////////////////////////////////////////////////////////////////////////////
0356    /// \brief Keep only the entries that have missing values.
0357    /// \param[in] column Column name whose entries with missing values should be kept
0358    /// \return The filter node of the computation graph
0359    ///
0360    /// This operation is useful in case an entry of the dataset is incomplete,
0361    /// i.e. if one or more of the columns do not have valid values. It only
0362    /// keeps the entries for which the value of the input column is missing.
0363    ///
0364    /// Use cases include:
0365    /// * When processing multiple files, one or more of them is missing a column
0366    /// * In horizontal joining with entry matching, a certain dataset has no
0367    ///   match for the current entry.
0368    ///
0369    /// ### Example usage:
0370    ///
0371    /// \code{.py}
0372    /// # Assume a dataset made of two files vertically chained together, one has
0373    /// # column "x" and the other has column "y"
0374    /// df = ROOT.RDataFrame(dataset)
0375    /// df_valid_col_x = df.FilterMissing("y")
0376    /// df_valid_col_y = df.FilterMissing("x")
0377    /// display_x = df_valid_col_x.Display(("x",))
0378    /// display_y = df_valid_col_y.Display(("y",))
0379    /// \endcode
0380    ///
0381    /// \code{.cpp}
0382    /// // Assume a dataset made of two files vertically chained together, one has
0383    /// // column "x" and the other has column "y"
0384    /// ROOT.RDataFrame df{dataset};
0385    /// auto df_valid_col_x = df.FilterMissing("y");
0386    /// auto df_valid_col_y = df.FilterMissing("x");
0387    /// auto display_x = df_valid_col_x.Display<int>({"x"});
0388    /// auto display_y = df_valid_col_y.Display<int>({"y"});
0389    /// \endcode
0390    ///
0391    /// \note See FilterAvailable() if you want to discard the entries in case
0392    ///       there is a missing value instead.
0393    RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterMissing(std::string_view column)
0394    {
0395       const auto columns = ColumnNames_t{column.data()};
0396       // For now disable this functionality in case of an empty data source and
0397       // the column name was not defined previously.
0398       if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
0399          throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
0400       using F_t = RDFDetail::RFilterWithMissingValues<Proxied>;
0401       auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ false, fProxiedPtr, fColRegister, columns);
0402       CheckAndFillDSColumns(columns, TTraits::TypeList<void>{});
0403       return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
0404    }
0405 
0406    // clang-format off
0407    ////////////////////////////////////////////////////////////////////////////
0408    /// \brief Define a new column.
0409    /// \param[in] name The name of the defined column.
0410    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0411    /// \param[in] columns Names of the columns/branches in input to the producer function.
0412    /// \return the first node of the computation graph for which the new quantity is defined.
0413    ///
0414    /// Define a column that will be visible from all subsequent nodes
0415    /// of the functional chain. The `expression` is only evaluated for entries that pass
0416    /// all the preceding filters.
0417    /// A new variable is created called `name`, accessible as if it was contained
0418    /// in the dataset from subsequent transformations/actions.
0419    ///
0420    /// Use cases include:
0421    /// * caching the results of complex calculations for easy and efficient multiple access
0422    /// * extraction of quantities of interest from complex objects
0423    ///
0424    /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
0425    ///
0426    /// ### Example usage:
0427    /// ~~~{.cpp}
0428    /// // assuming a function with signature:
0429    /// double myComplexCalculation(const RVec<float> &muon_pts);
0430    /// // we can pass it directly to Define
0431    /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
0432    /// // alternatively, we can pass the body of the function as a string, as in Filter:
0433    /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
0434    /// ~~~
0435    ///
0436    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0437    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0438    /// ~~~{.cpp}
0439    /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
0440    /// ~~~
0441    /// but instead this will:
0442    /// ~~~{.cpp}
0443    /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
0444    /// ~~~
0445    template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0446    RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
0447    {
0448       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
0449    }
0450    // clang-format on
0451 
0452    // clang-format off
0453    ////////////////////////////////////////////////////////////////////////////
0454    /// \brief Define a new column with a value dependent on the processing slot.
0455    /// \param[in] name The name of the defined column.
0456    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0457    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
0458    /// \return the first node of the computation graph for which the new quantity is defined.
0459    ///
0460    /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
0461    /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
0462    /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
0463    /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
0464    /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
0465    /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
0466    ///
0467    /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
0468    /// ~~~{.cpp}
0469    /// int function(unsigned int, double, double);
0470    /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
0471    /// df.DefineSlot("x", function, {"column1", "column2"})
0472    /// ~~~
0473    ///
0474    /// See Define() for more information.
0475    template <typename F>
0476    RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
0477    {
0478       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
0479    }
0480    // clang-format on
0481 
0482    // clang-format off
0483    ////////////////////////////////////////////////////////////////////////////
0484    /// \brief Define a new column with a value dependent on the processing slot and the current entry.
0485    /// \param[in] name The name of the defined column.
0486    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0487    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
0488    /// \return the first node of the computation graph for which the new quantity is defined.
0489    ///
0490    /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
0491    /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
0492    /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
0493    /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
0494    /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
0495    /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
0496    /// The second parameter is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
0497    ///
0498    /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
0499    /// ~~~{.cpp}
0500    /// int function(unsigned int, ULong64_t, double, double);
0501    /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
0502    /// DefineSlotEntry("x", function, {"column1", "column2"})
0503    /// ~~~
0504    ///
0505    /// See Define() for more information.
0506    template <typename F>
0507    RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
0508    {
0509       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
0510                                                                         "DefineSlotEntry");
0511    }
0512    // clang-format on
0513 
0514    ////////////////////////////////////////////////////////////////////////////
0515    /// \brief Define a new column.
0516    /// \param[in] name The name of the defined column.
0517    /// \param[in] expression An expression in C++ which represents the defined value
0518    /// \return the first node of the computation graph for which the new quantity is defined.
0519    ///
0520    /// The expression is just-in-time compiled and used to produce the column entries.
0521    /// It must be valid C++ syntax in which variable names are substituted with the names
0522    /// of branches/columns.
0523    ///
0524    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0525    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0526    /// ~~~{.cpp}
0527    /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
0528    /// ~~~
0529    /// but instead this will:
0530    /// ~~~{.cpp}
0531    /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
0532    /// ~~~
0533    ///
0534    /// Refer to the first overload of this method for the full documentation.
0535    RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
0536    {
0537       constexpr auto where = "Define";
0538       RDFInternal::CheckValidCppVarName(name, where);
0539       // these checks must be done before jitting lest we throw exceptions in jitted code
0540       RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
0541                                         GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0542 
0543       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0544       auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister,
0545                                                      fLoopManager->GetBranchNames(), upcastNodeOnHeap);
0546 
0547       RDFInternal::RColumnRegister newCols(fColRegister);
0548       newCols.AddDefine(std::move(jittedDefine));
0549 
0550       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0551 
0552       return newInterface;
0553    }
0554 
0555    ////////////////////////////////////////////////////////////////////////////
0556    /// \brief Overwrite the value and/or type of an existing column.
0557    /// \param[in] name The name of the column to redefine.
0558    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0559    /// \param[in] columns Names of the columns/branches in input to the expression.
0560    /// \return the first node of the computation graph for which the quantity is redefined.
0561    ///
0562    /// The old value of the column can be used as an input for the expression.
0563    ///
0564    /// An exception is thrown in case the column to redefine does not already exist.
0565    /// See Define() for more information.
0566    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0567    RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
0568    {
0569       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
0570    }
0571 
0572    // clang-format off
0573    ////////////////////////////////////////////////////////////////////////////
0574    /// \brief Overwrite the value and/or type of an existing column.
0575    /// \param[in] name The name of the column to redefine.
0576    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0577    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
0578    /// \return the first node of the computation graph for which the new quantity is defined.
0579    ///
0580    /// The old value of the column can be used as an input for the expression.
0581    /// An exception is thrown in case the column to redefine does not already exist.
0582    ///
0583    /// See DefineSlot() for more information.
0584    // clang-format on
0585    template <typename F>
0586    RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
0587    {
0588       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
0589    }
0590 
0591    // clang-format off
0592    ////////////////////////////////////////////////////////////////////////////
0593    /// \brief Overwrite the value and/or type of an existing column.
0594    /// \param[in] name The name of the column to redefine.
0595    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0596    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
0597    /// \return the first node of the computation graph for which the new quantity is defined.
0598    ///
0599    /// The old value of the column can be used as an input for the expression.
0600    /// An exception is thrown in case the column to re-define does not already exist.
0601    ///
0602    /// See DefineSlotEntry() for more information.
0603    // clang-format on
0604    template <typename F>
0605    RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
0606    {
0607       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
0608                                                                         "RedefineSlotEntry");
0609    }
0610 
0611    ////////////////////////////////////////////////////////////////////////////
0612    /// \brief Overwrite the value and/or type of an existing column.
0613    /// \param[in] name The name of the column to redefine.
0614    /// \param[in] expression An expression in C++ which represents the defined value
0615    /// \return the first node of the computation graph for which the new quantity is defined.
0616    ///
0617    /// The expression is just-in-time compiled and used to produce the column entries.
0618    /// It must be valid C++ syntax in which variable names are substituted with the names
0619    /// of branches/columns.
0620    ///
0621    /// The old value of the column can be used as an input for the expression.
0622    /// An exception is thrown in case the column to re-define does not already exist.
0623    ///
0624    /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
0625    RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression)
0626    {
0627       constexpr auto where = "Redefine";
0628       RDFInternal::CheckValidCppVarName(name, where);
0629       RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
0630                                       GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0631       RDFInternal::CheckForNoVariations(where, name, fColRegister);
0632 
0633       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0634       auto jittedDefine = RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister,
0635                                                      fLoopManager->GetBranchNames(), upcastNodeOnHeap);
0636 
0637       RDFInternal::RColumnRegister newCols(fColRegister);
0638       newCols.AddDefine(std::move(jittedDefine));
0639 
0640       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0641 
0642       return newInterface;
0643    }
0644 
0645    ////////////////////////////////////////////////////////////////////////////
0646    /// \brief In case the value in the given column is missing, provide a default value
0647    /// \tparam T The type of the column
0648    /// \param[in] column Column name where missing values should be replaced by the given default value
0649    /// \param[in] defaultValue Value to provide instead of a missing value
0650    /// \return The node of the graph that will provide a default value
0651    ///
0652    /// This operation is useful in case an entry of the dataset is incomplete,
0653    /// i.e. if one or more of the columns do not have valid values. It does not
0654    /// modify the values of the column, but in case any entry is missing, it
0655    /// will provide the default value to downstream nodes instead.
0656    ///
0657    /// Use cases include:
0658    /// * When processing multiple files, one or more of them is missing a column
0659    /// * In horizontal joining with entry matching, a certain dataset has no
0660    ///   match for the current entry.
0661    ///
0662    /// ### Example usage:
0663    ///
0664    /// \code{.cpp}
0665    /// // Assume a dataset with columns [idx, x] matching another dataset with
0666    /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
0667    /// ROOT::RDataFrame df{dataset};
0668    /// auto df_default = df.DefaultValueFor("y", 33)
0669    ///                     .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
0670    /// auto colz = df_default.Take<int>("z");
0671    /// \endcode
0672    ///
0673    /// \code{.py}
0674    /// df = ROOT.RDataFrame(dataset)
0675    /// df_default = df.DefaultValueFor("y", 33).Define("z", "x + y")
0676    /// colz = df_default.Take[int]("z")
0677    /// \endcode
0678    template <typename T>
0679    RInterface<Proxied, DS_t> DefaultValueFor(std::string_view column, const T &defaultValue)
0680    {
0681       constexpr auto where{"DefaultValueFor"};
0682       RDFInternal::CheckForNoVariations(where, column, fColRegister);
0683       // For now disable this functionality in case of an empty data source and
0684       // the column name was not defined previously.
0685       if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
0686          RDFInternal::CheckForDefinition(where, column, fColRegister, fLoopManager->GetBranchNames(),
0687                                          GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0688 
0689       // Declare return type to the interpreter, for future use by jitted actions
0690       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(T));
0691       if (retTypeName.empty()) {
0692          // The type is not known to the interpreter.
0693          // We must not error out here, but if/when this column is used in jitted code
0694          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(T));
0695          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
0696       }
0697 
0698       const auto validColumnNames = ColumnNames_t{column.data()};
0699       auto newColumn = std::make_shared<ROOT::Internal::RDF::RDefaultValueFor<T>>(
0700          column, retTypeName, defaultValue, validColumnNames, fColRegister, *fLoopManager);
0701       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>{});
0702 
0703       RDFInternal::RColumnRegister newCols(fColRegister);
0704       newCols.AddDefine(std::move(newColumn));
0705 
0706       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0707 
0708       return newInterface;
0709    }
0710 
0711    // clang-format off
0712    ////////////////////////////////////////////////////////////////////////////
0713    /// \brief Define a new column that is updated when the input sample changes.
0714    /// \param[in] name The name of the defined column.
0715    /// \param[in] expression A C++ callable that computes the new value of the defined column.
0716    /// \return the first node of the computation graph for which the new quantity is defined.
0717    ///
0718    /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
0719    /// where:
0720    /// - `T` is the type of the defined column
0721    /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
0722    ///   the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
0723    /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
0724    ///   being processed (see the class docs for more information).
0725    ///
0726    /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
0727    /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
0728    /// starts rather than at every entry.
0729    ///
0730    /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
0731    ///
0732    /// ### Example usage:
0733    /// ~~~{.cpp}
0734    /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
0735    /// df.DefinePerSample("weightbysample",
0736    ///                    [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
0737    ///                    { return id.Contains("sample1") ? 1.0f : 2.0f; });
0738    /// ~~~
0739    // clang-format on
0740    // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
0741    template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
0742    RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
0743    {
0744       RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
0745       RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
0746                                         GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0747 
0748       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
0749       if (retTypeName.empty()) {
0750          // The type is not known to the interpreter.
0751          // We must not error out here, but if/when this column is used in jitted code
0752          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
0753          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
0754       }
0755 
0756       auto newColumn =
0757          std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
0758 
0759       RDFInternal::RColumnRegister newCols(fColRegister);
0760       newCols.AddDefine(std::move(newColumn));
0761       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0762       return newInterface;
0763    }
0764 
0765    // clang-format off
0766    ////////////////////////////////////////////////////////////////////////////
0767    /// \brief Define a new column that is updated when the input sample changes.
0768    /// \param[in] name The name of the defined column.
0769    /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
0770    /// \return the first node of the computation graph for which the new quantity is defined.
0771    ///
0772    /// The expression is just-in-time compiled and used to produce the column entries.
0773    /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
0774    /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
0775    /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
0776    ///
0777    /// ### Example usage:
0778    /// ~~~{.py}
0779    /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
0780    /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
0781    /// ~~~
0782    ///
0783    /// \note
0784    /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
0785    /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
0786    /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
0787    /// ~~~{.py}
0788    /// ROOT.gInterpreter.Declare(
0789    /// """
0790    /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
0791    ///    return id.Contains("sample1") ? 1.0f : 2.0f;
0792    /// }
0793    /// """)
0794    /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
0795    /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
0796    /// ~~~
0797    ///
0798    /// \note
0799    /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
0800    /// column names other than those mentioned above: the expression is evaluated once before the processing of the
0801    /// sample even starts, so column values are not accessible.
0802    // clang-format on
0803    RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
0804    {
0805       RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
0806       // these checks must be done before jitting lest we throw exceptions in jitted code
0807       RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister, fLoopManager->GetBranchNames(),
0808                                         GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0809 
0810       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0811       auto jittedDefine =
0812          RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
0813 
0814       RDFInternal::RColumnRegister newCols(fColRegister);
0815       newCols.AddDefine(std::move(jittedDefine));
0816 
0817       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0818 
0819       return newInterface;
0820    }
0821 
0822    /// \brief Register systematic variations for a single existing column using custom variation tags.
0823    /// \param[in] colName name of the column for which varied values are provided.
0824    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0825    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0826    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0827    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0828    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0829    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0830    ///
0831    /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
0832    /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
0833    /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with
0834    /// ROOT::RDF::Experimental::VariationsFor (see the example below).
0835    ///
0836    /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
0837    /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
0838    /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
0839    /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
0840    ///
0841    /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
0842    /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
0843    /// ~~~{.cpp}
0844    /// auto nominal_hx =
0845    ///     df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
0846    ///       .Filter("pt > k")
0847    ///       .Define("x", someFunc, {"pt"})
0848    ///       .Histo1D("x");
0849    ///
0850    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0851    /// hx["nominal"].Draw();
0852    /// hx["pt:down"].Draw("SAME");
0853    /// hx["pt:up"].Draw("SAME");
0854    /// ~~~
0855    /// RDataFrame computes all variations as part of a single loop over the data.
0856    /// In particular, this means that I/O and computation of values shared
0857    /// among variations only happen once for all variations. Thus, the event loop
0858    /// run-time typically scales much better than linearly with the number of
0859    /// variations.
0860    ///
0861    /// RDataFrame lazily computes the varied values required to produce the
0862    /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref
0863    /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only
0864    /// run for the nominal case.
0865    ///
0866    /// See other overloads for examples when variations are added for multiple existing columns,
0867    /// or when the tags are auto-generated instead of being directly defined.
0868    template <typename F>
0869    RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
0870                                   const std::vector<std::string> &variationTags, std::string_view variationName = "")
0871    {
0872       std::vector<std::string> colNames{{std::string(colName)}};
0873       const std::string theVariationName{variationName.empty() ? colName : variationName};
0874 
0875       return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
0876                             theVariationName);
0877    }
0878 
0879    /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
0880    /// \param[in] colName name of the column for which varied values are provided.
0881    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0882    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0883    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0884    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0885    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0886    /// `"1"`, etc. 
0887    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0888    ///            colName is used if none is provided.
0889    ///
0890    /// This overload of Vary takes an nVariations parameter instead of a list of tag names.
0891    /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
0892    /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
0893    ///
0894    /// Example usage:
0895    /// ~~~{.cpp}
0896    /// auto nominal_hx =
0897    ///   df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
0898    ///     .Histo1D("x");
0899    ///
0900    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0901    /// hx["nominal"].Draw();
0902    /// hx["x:0"].Draw("SAME");
0903    /// hx["x:1"].Draw("SAME");
0904    /// ~~~
0905    ///
0906    /// \note See also This Vary() overload for more information.
0907    template <typename F>
0908    RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
0909                                   std::size_t nVariations, std::string_view variationName = "")
0910    {
0911       R__ASSERT(nVariations > 0 && "Must have at least one variation.");
0912 
0913       std::vector<std::string> variationTags;
0914       variationTags.reserve(nVariations);
0915       for (std::size_t i = 0u; i < nVariations; ++i)
0916          variationTags.emplace_back(std::to_string(i));
0917 
0918       const std::string theVariationName{variationName.empty() ? colName : variationName};
0919 
0920       return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
0921    }
0922 
0923    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
0924    /// \param[in] colNames set of names of the columns for which varied values are provided.
0925    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0926    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0927    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0928    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0929    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0930    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`
0931    ///
0932    /// This overload of Vary takes a list of column names as first argument and
0933    /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
0934    /// affected column. The `variationTags` are defined as `{"down", "up"}`.
0935    ///
0936    /// Example usage:
0937    /// ~~~{.cpp}
0938    /// // produce variations "ptAndEta:down" and "ptAndEta:up"
0939    /// auto nominal_hx =
0940    ///   df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
0941    ///         [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
0942    ///         {"pt", "eta"},  // inputs to the Vary expression, independent of what columns are varied
0943    ///         {"down", "up"}, // variation tags
0944    ///         "ptAndEta")    // variation name
0945    ///     .Histo1D("pt", "eta");
0946    ///
0947    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0948    /// hx["nominal"].Draw();
0949    /// hx["ptAndEta:down"].Draw("SAME");
0950    /// hx["ptAndEta:up"].Draw("SAME");
0951    /// ~~~
0952    ///
0953    /// \note See also This Vary() overload for more information.
0954 
0955    template <typename F>
0956    RInterface<Proxied, DS_t>
0957    Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
0958         const std::vector<std::string> &variationTags, std::string_view variationName)
0959    {
0960       return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
0961    }
0962 
0963    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
0964    /// \param[in] colNames set of names of the columns for which varied values are provided.
0965    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0966    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0967    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0968    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0969    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0970    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0971    ///            colName is used if none is provided.
0972    ///
0973    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
0974    /// is avoided.
0975    ///
0976    /// \note See also This Vary() overload for more information.
0977    template <typename F>
0978    RInterface<Proxied, DS_t>
0979    Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
0980         const std::vector<std::string> &variationTags, std::string_view variationName)
0981    {
0982       return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName);
0983    }
0984 
0985    /// \brief Register systematic variations for multiple existing columns using auto-generated tags.
0986    /// \param[in] colNames set of names of the columns for which varied values are provided.
0987    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0988    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0989    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0990    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0991    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0992    /// `"1"`, etc. 
0993    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0994    ///            colName is used if none is provided.
0995    ///
0996    /// This overload of Vary takes a list of column names as first argument.
0997    /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names
0998    /// will be auto-generated as the sequence 0...``nVariations-1``.
0999    ///
1000    /// Example usage:
1001    /// ~~~{.cpp}
1002    /// auto nominal_hx =
1003    ///   df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
1004    ///         [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
1005    ///         {"pt", "eta"},  // inputs to the Vary expression, independent of what columns are varied
1006    ///         2, // auto-generated variation tags
1007    ///         "ptAndEta")    // variation name
1008    ///     .Histo1D("pt", "eta");
1009    ///
1010    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1011    /// hx["nominal"].Draw();
1012    /// hx["ptAndEta:0"].Draw("SAME");
1013    /// hx["ptAndEta:1"].Draw("SAME");
1014    /// ~~~
1015    ///
1016    /// \note See also This Vary() overload for more information.
1017    template <typename F>
1018    RInterface<Proxied, DS_t>
1019    Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
1020         std::size_t nVariations, std::string_view variationName)
1021    {
1022       R__ASSERT(nVariations > 0 && "Must have at least one variation.");
1023 
1024       std::vector<std::string> variationTags;
1025       variationTags.reserve(nVariations);
1026       for (std::size_t i = 0u; i < nVariations; ++i)
1027          variationTags.emplace_back(std::to_string(i));
1028 
1029       return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
1030    }
1031 
1032    /// \brief Register systematic variations for for multiple existing columns using custom variation tags.
1033    /// \param[in] colNames set of names of the columns for which varied values are provided.
1034    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
1035    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
1036    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
1037    /// \param[in] inputColumns the names of the columns to be passed to the callable.
1038    /// \param[in] inputColumns the names of the columns to be passed to the callable.
1039    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1040    /// `"1"`, etc. 
1041    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1042    ///            colName is used if none is provided.
1043    ///
1044    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1045    /// is avoided.
1046    ///
1047    /// \note See also This Vary() overload for more information.
1048    template <typename F>
1049    RInterface<Proxied, DS_t>
1050    Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
1051         std::size_t nVariations, std::string_view variationName)
1052    {
1053       return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName);
1054    }
1055 
1056    /// \brief Register systematic variations for a single existing column using custom variation tags.
1057    /// \param[in] colName name of the column for which varied values are provided.
1058    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1059    ///            values for the specified column.
1060    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1061    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1062    ///            colName is used if none is provided.
1063    ///
1064    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1065    /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1066    /// defined as `{"down", "up"}`.
1067    /// ~~~{.cpp}
1068    /// auto nominal_hx =
1069    ///     df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
1070    ///       .Filter("pt > k")
1071    ///       .Define("x", someFunc, {"pt"})
1072    ///       .Histo1D("x");
1073    ///
1074    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1075    /// hx["nominal"].Draw();
1076    /// hx["pt:down"].Draw("SAME");
1077    /// hx["pt:up"].Draw("SAME");
1078    /// ~~~
1079    ///
1080    /// \note See also This Vary() overload for more information.
1081    RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
1082                                   const std::vector<std::string> &variationTags, std::string_view variationName = "")
1083    {
1084       std::vector<std::string> colNames{{std::string(colName)}};
1085       const std::string theVariationName{variationName.empty() ? colName : variationName};
1086 
1087       return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
1088    }
1089 
1090    /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
1091    /// \param[in] colName name of the column for which varied values are provided.
1092    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1093    ///            values for the specified column.
1094    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1095    /// `"1"`, etc. 
1096    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1097    ///            colName is used if none is provided.
1098    ///
1099    /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time
1100    /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1101    /// auto-generated.
1102    /// ~~~{.cpp}
1103    /// auto nominal_hx =
1104    ///     df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2)
1105    ///       .Histo1D("pt");
1106    ///
1107    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1108    /// hx["nominal"].Draw();
1109    /// hx["pt:0"].Draw("SAME");
1110    /// hx["pt:1"].Draw("SAME");
1111    /// ~~~
1112    ///
1113    /// \note See also This Vary() overload for more information.
1114    RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
1115                                   std::string_view variationName = "")
1116    {
1117       std::vector<std::string> variationTags;
1118       variationTags.reserve(nVariations);
1119       for (std::size_t i = 0u; i < nVariations; ++i)
1120          variationTags.emplace_back(std::to_string(i));
1121 
1122       return Vary(colName, expression, std::move(variationTags), variationName);
1123    }
1124 
1125    /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1126    /// \param[in] colNames set of names of the columns for which varied values are provided.
1127    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1128    ///            values for the specified columns.
1129    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1130    /// `"1"`, etc. 
1131    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1132    ///
1133    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1134    /// compiled. It takes an nVariations parameter instead of a list of tag names.
1135    /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
1136    /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
1137    /// The example below shows how Vary() is used while dealing with multiple columns.
1138    ///
1139    /// ~~~{.cpp}
1140    /// auto nominal_hx =
1141    ///     df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
1142    ///       .Histo1D("x", "y");
1143    ///
1144    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1145    /// hx["nominal"].Draw();
1146    /// hx["xy:0"].Draw("SAME");
1147    /// hx["xy:1"].Draw("SAME");
1148    /// ~~~
1149    ///
1150    /// \note See also This Vary() overload for more information.
1151    RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1152                                   std::size_t nVariations, std::string_view variationName)
1153    {
1154       std::vector<std::string> variationTags;
1155       variationTags.reserve(nVariations);
1156       for (std::size_t i = 0u; i < nVariations; ++i)
1157          variationTags.emplace_back(std::to_string(i));
1158 
1159       return Vary(colNames, expression, std::move(variationTags), variationName);
1160    }
1161 
1162    /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1163    /// \param[in] colNames set of names of the columns for which varied values are provided.
1164    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1165    ///            values for the specified column.
1166    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1167    /// `"1"`, etc. 
1168    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1169    ///            colName is used if none is provided.
1170    ///
1171    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1172    /// is avoided.
1173    ///
1174    /// \note See also This Vary() overload for more information.
1175    RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression,
1176                                   std::size_t nVariations, std::string_view variationName)
1177    {
1178       return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName);
1179    }
1180 
1181    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
1182    /// \param[in] colNames set of names of the columns for which varied values are provided.
1183    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1184    ///            values for the specified columns.
1185    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1186    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1187    ///
1188    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1189    /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as
1190    /// `{"down", "up"}`.
1191    /// ~~~{.cpp}
1192    /// auto nominal_hx =
1193    ///     df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
1194    ///       .Histo1D("x", "y");
1195    ///
1196    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1197    /// hx["nominal"].Draw();
1198    /// hx["xy:down"].Draw("SAME");
1199    /// hx["xy:up"].Draw("SAME");
1200    /// ~~~
1201    ///
1202    /// \note See also This Vary() overload for more information.
1203    RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1204                                   const std::vector<std::string> &variationTags, std::string_view variationName)
1205    {
1206       return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
1207    }
1208 
1209    ////////////////////////////////////////////////////////////////////////////
1210    /// \brief Allow to refer to a column with a different name.
1211    /// \param[in] alias name of the column alias
1212    /// \param[in] columnName of the column to be aliased
1213    /// \return the first node of the computation graph for which the alias is available.
1214    ///
1215    /// Aliasing an alias is supported.
1216    ///
1217    /// ### Example usage:
1218    /// ~~~{.cpp}
1219    /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
1220    /// ~~~
1221    RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
1222    {
1223       // The symmetry with Define is clear. We want to:
1224       // - Create globally the alias and return this very node, unchanged
1225       // - Make aliases accessible based on chains and not globally
1226 
1227       // Helper to find out if a name is a column
1228       auto &dsColumnNames = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{};
1229 
1230       constexpr auto where = "Alias";
1231       RDFInternal::CheckValidCppVarName(alias, where);
1232       // If the alias name is a column name, there is a problem
1233       RDFInternal::CheckForRedefinition(where, alias, fColRegister, fLoopManager->GetBranchNames(), dsColumnNames);
1234 
1235       const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
1236 
1237       RDFInternal::RColumnRegister newCols(fColRegister);
1238       newCols.AddAlias(alias, validColumnName);
1239 
1240       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
1241 
1242       return newInterface;
1243    }
1244 
1245    ////////////////////////////////////////////////////////////////////////////
1246    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1247    /// \tparam ColumnTypes variadic list of branch/column types.
1248    /// \param[in] treename The name of the output TTree.
1249    /// \param[in] filename The name of the output TFile.
1250    /// \param[in] columnList The list of names of the columns/branches to be written.
1251    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1252    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1253    ///
1254    /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
1255    /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1256    /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1257    /// written out and it appears before the array in the columnList.
1258    ///
1259    /// By default, in case of TTree or TChain inputs, Snapshot will try to write out all top-level branches. For other
1260    /// types of inputs, all columns returned by GetColumnNames() will be written out. If friend trees or chains are
1261    /// present, by default all friend top-level branches that have names that do not collide with
1262    /// names of branches in the main TTree/TChain will be written out. Since v6.24, Snapshot will also write out
1263    /// friend branches with the same names of branches in the main TTree/TChain with names of the form
1264    /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1265    ///
1266    /// ### Writing to a sub-directory
1267    ///
1268    /// Snapshot supports writing the TTree in a sub-directory inside the TFile. It is sufficient to specify the path to
1269    /// the TTree as part of the TTree name, e.g. `df.Snapshot("subdir/t", "f.root")` write TTree `t` in the
1270    /// sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1271    ///
1272    /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1273    /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled
1274    /// with respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in
1275    /// wrong associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1276    /// error out if such a "shuffled" TTree is used in a friendship.
1277    ///
1278    /// \note In case no events are written out (e.g. because no event passes all filters), Snapshot will still write the
1279    /// requested output TTree to the file, with all the branches requested to preserve the dataset schema.
1280    ///
1281    /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1282    /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1283    /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1284    /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1285    ///
1286    /// ### Example invocations:
1287    ///
1288    /// ~~~{.cpp}
1289    /// // without specifying template parameters (column types automatically deduced)
1290    /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1291    ///
1292    /// // specifying template parameters ("x" is `int`, "y" is `float`)
1293    /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
1294    /// ~~~
1295    ///
1296    /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1297    /// `RSnapshotOptions`:
1298    /// ~~~{.cpp}
1299    /// RSnapshotOptions opts;
1300    /// opts.fLazy = true;
1301    /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1302    /// ~~~
1303    template <typename... ColumnTypes>
1304    RResultPtr<RInterface<RLoopManager>>
1305    Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
1306             const RSnapshotOptions &options = RSnapshotOptions())
1307    {
1308       return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
1309    }
1310 
1311    ////////////////////////////////////////////////////////////////////////////
1312    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1313    /// \param[in] treename The name of the output TTree.
1314    /// \param[in] filename The name of the output TFile.
1315    /// \param[in] columnList The list of names of the columns/branches to be written.
1316    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1317    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1318    ///
1319    /// This function returns a `RDataFrame` built with the output tree as a source.
1320    /// The types of the columns are automatically inferred and do not need to be specified.
1321    ///
1322    /// See above for a more complete description and example usages.
1323    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1324                                                  const ColumnNames_t &columnList,
1325                                                  const RSnapshotOptions &options = RSnapshotOptions())
1326    {
1327       // like columnList but with `#var` columns removed
1328       auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1329       // like columnListWithoutSizeColumns but with aliases resolved
1330       auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
1331       RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases);
1332       // like validCols but with missing size branches required by array branches added in the right positions
1333       const auto pairOfColumnLists = RDFInternal::AddSizeBranches(
1334          fLoopManager->GetBranchNames(), GetDataSource(), std::move(colListNoAliases), std::move(colListNoPoundSizes));
1335       const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1336       const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1337 
1338       const auto fullTreeName = treename;
1339       const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1340       treename = parsedTreePath.fTreeName;
1341       const auto &dirname = parsedTreePath.fDirName;
1342 
1343       ::TDirectory::TContext ctxt;
1344 
1345       RResultPtr<RInterface<RLoopManager>> resPtr;
1346 
1347       if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
1348          if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") {
1349             throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended "
1350                                      "way to convert TTrees to RNTuple is through the RNTupleImporter.");
1351          }
1352 
1353          // The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one
1354          // without a data source for now, and set it once the actual data source can be created (i.e., after
1355          // writing the RNTuple).
1356          auto newRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(colListNoPoundSizes));
1357 
1358          auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1359             std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1360             options, newRDF->GetLoopManager(), GetLoopManager(), true /* fToNTuple */});
1361 
1362          // The Snapshot helper will use colListNoAliasesWithSizeBranches (with aliases resolved) as input columns, and
1363          // colListWithAliasesAndSizeBranches (still with aliases in it, passed through snapHelperArgs) as output column
1364          // names.
1365          resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1366             colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr,
1367             colListNoAliasesWithSizeBranches.size());
1368       } else {
1369          if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" &&
1370              options.fOutputFormat == ESnapshotOutputFormat::kDefault) {
1371             Warning("Snapshot",
1372                     "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you "
1373                     "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in "
1374                     "RSnapshotOptions. Note that this current default behaviour might change in the future.");
1375          }
1376 
1377          // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset
1378          // has actually been created and written to TFile, i.e. at the end of the Snapshot execution.
1379          auto newRDF = std::make_shared<RInterface<RLoopManager>>(
1380             std::make_shared<RLoopManager>(colListNoAliasesWithSizeBranches));
1381 
1382          auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1383             std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1384             options, newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */});
1385 
1386          resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, RDFDetail::RInferredType>(
1387             colListNoAliasesWithSizeBranches, newRDF, snapHelperArgs, fProxiedPtr,
1388             colListNoAliasesWithSizeBranches.size(), options.fVector2RVec);
1389       }
1390 
1391       if (!options.fLazy)
1392          *resPtr;
1393       return resPtr;
1394    }
1395 
1396    // clang-format off
1397    ////////////////////////////////////////////////////////////////////////////
1398    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1399    /// \param[in] treename The name of the output TTree.
1400    /// \param[in] filename The name of the output TFile.
1401    /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1402    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
1403    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1404    ///
1405    /// This function returns a `RDataFrame` built with the output tree as a source.
1406    /// The types of the columns are automatically inferred and do not need to be specified.
1407    ///
1408    /// See above for a more complete description and example usages.
1409    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1410                                                  std::string_view columnNameRegexp = "",
1411                                                  const RSnapshotOptions &options = RSnapshotOptions())
1412    {
1413       const auto definedColumns = fColRegister.GenerateColumnNames();
1414       auto *tree = fLoopManager->GetTree();
1415 
1416       const auto treeBranchNames = tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1417       const auto dsColumns = GetDataSource() ? ROOT::Internal::RDF::GetTopLevelFieldNames(*GetDataSource()) : ColumnNames_t{};
1418       // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1419       ColumnNames_t dsColumnsWithoutSizeColumns;
1420       std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1421                    [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1422       ColumnNames_t columnNames;
1423       columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumnsWithoutSizeColumns.size());
1424       columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1425       columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1426       columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1427 
1428       // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1429       // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1430       RDFInternal::RemoveDuplicates(columnNames);
1431 
1432       auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1433 
1434       if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS") {
1435          RDFInternal::RemoveRNTupleSubFields(selectedColumns);
1436       }
1437 
1438       return Snapshot(treename, filename, selectedColumns, options);
1439    }
1440    // clang-format on
1441 
1442    // clang-format off
1443    ////////////////////////////////////////////////////////////////////////////
1444    /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
1445    /// \param[in] treename The name of the output TTree.
1446    /// \param[in] filename The name of the output TFile.
1447    /// \param[in] columnList The list of names of the columns/branches to be written.
1448    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
1449    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1450    ///
1451    /// This function returns a `RDataFrame` built with the output tree as a source.
1452    /// The types of the columns are automatically inferred and do not need to be specified.
1453    ///
1454    /// See above for a more complete description and example usages.
1455    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1456                                                  std::initializer_list<std::string> columnList,
1457                                                  const RSnapshotOptions &options = RSnapshotOptions())
1458    {
1459       ColumnNames_t selectedColumns(columnList);
1460       return Snapshot(treename, filename, selectedColumns, options);
1461    }
1462    // clang-format on
1463 
1464    ////////////////////////////////////////////////////////////////////////////
1465    /// \brief Save selected columns in memory.
1466    /// \tparam ColumnTypes variadic list of branch/column types.
1467    /// \param[in] columnList columns to be cached in memory.
1468    /// \return a `RDataFrame` that wraps the cached dataset.
1469    ///
1470    /// This action returns a new `RDataFrame` object, completely detached from
1471    /// the originating `RDataFrame`. The new dataframe only contains the cached
1472    /// columns and stores their content in memory for fast, zero-copy subsequent access.
1473    ///
1474    /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1475    /// fits in memory and that will be accessed many times.
1476    ///
1477    /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1478    /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1479    /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1480    /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1481    ///
1482    /// ### Example usage:
1483    ///
1484    /// **Types and columns specified:**
1485    /// ~~~{.cpp}
1486    /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1487    /// ~~~
1488    ///
1489    /// **Types inferred and columns specified (this invocation relies on jitting):**
1490    /// ~~~{.cpp}
1491    /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1492    /// ~~~
1493    ///
1494    /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1495    /// ~~~{.cpp}
1496    /// auto cache_all_cols_df = df.Cache(myRegexp);
1497    /// ~~~
1498    template <typename... ColumnTypes>
1499    RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1500    {
1501       auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1502       return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1503    }
1504 
1505    ////////////////////////////////////////////////////////////////////////////
1506    /// \brief Save selected columns in memory.
1507    /// \param[in] columnList columns to be cached in memory
1508    /// \return a `RDataFrame` that wraps the cached dataset.
1509    ///
1510    /// See the previous overloads for more information.
1511    RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1512    {
1513       // Early return: if the list of columns is empty, just return an empty RDF
1514       // If we proceed, the jitted call will not compile!
1515       if (columnList.empty()) {
1516          auto nEntries = *this->Count();
1517          RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1518          return emptyRDF;
1519       }
1520 
1521       std::stringstream cacheCall;
1522       auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1523       RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1524                                                                                       fColRegister);
1525       // build a string equivalent to
1526       // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1527       RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1528       cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1529                 << RDFInternal::PrettyPrintAddr(&resRDF)
1530                 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1531                 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1532 
1533       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1534 
1535       const auto validColumnNames =
1536          GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1537       const auto colTypes = GetValidatedArgTypes(validColumnNames, fColRegister, fLoopManager->GetTree(),
1538                                                  GetDataSource(), "Cache", /*vector2RVec=*/false);
1539       for (const auto &colType : colTypes)
1540          cacheCall << colType << ", ";
1541       if (!columnListWithoutSizeColumns.empty())
1542          cacheCall.seekp(-2, cacheCall.cur);                         // remove the last ",
1543       cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1544                 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1545 
1546       // book the code to jit with the RLoopManager and trigger the event loop
1547       fLoopManager->ToJitExec(cacheCall.str());
1548       fLoopManager->Jit();
1549 
1550       return resRDF;
1551    }
1552 
1553    ////////////////////////////////////////////////////////////////////////////
1554    /// \brief Save selected columns in memory.
1555    /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1556    /// \return a `RDataFrame` that wraps the cached dataset.
1557    ///
1558    /// The existing columns are matched against the regular expression. If the string provided
1559    /// is empty, all columns are selected. See the previous overloads for more information.
1560    RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
1561    {
1562       const auto definedColumns = fColRegister.GenerateColumnNames();
1563       auto *tree = fLoopManager->GetTree();
1564       const auto treeBranchNames =
1565          tree != nullptr ? ROOT::Internal::TreeUtils::GetTopLevelBranchNames(*tree) : ColumnNames_t{};
1566       const auto dsColumns = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{};
1567       // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1568       ColumnNames_t dsColumnsWithoutSizeColumns;
1569       std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1570                    [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1571       ColumnNames_t columnNames;
1572       columnNames.reserve(definedColumns.size() + treeBranchNames.size() + dsColumns.size());
1573       columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1574       columnNames.insert(columnNames.end(), treeBranchNames.begin(), treeBranchNames.end());
1575       columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1576       const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1577       return Cache(selectedColumns);
1578    }
1579 
1580    ////////////////////////////////////////////////////////////////////////////
1581    /// \brief Save selected columns in memory.
1582    /// \param[in] columnList columns to be cached in memory.
1583    /// \return a `RDataFrame` that wraps the cached dataset.
1584    ///
1585    /// See the previous overloads for more information.
1586    RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1587    {
1588       ColumnNames_t selectedColumns(columnList);
1589       return Cache(selectedColumns);
1590    }
1591 
1592    // clang-format off
1593    ////////////////////////////////////////////////////////////////////////////
1594    /// \brief Creates a node that filters entries based on range: [begin, end).
1595    /// \param[in] begin Initial entry number considered for this range.
1596    /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1597    /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1598    /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1599    ///
1600    /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1601    /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1602    ///
1603    /// ### Example usage:
1604    /// ~~~{.cpp}
1605    /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1606    /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1607    /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1608    /// ~~~
1609    // clang-format on
1610    RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1611    {
1612       // check invariants
1613       if (stride == 0 || (end != 0 && end < begin))
1614          throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1615       CheckIMTDisabled("Range");
1616 
1617       using Range_t = RDFDetail::RRange<Proxied>;
1618       auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1619       RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister);
1620       return newInterface;
1621    }
1622 
1623    // clang-format off
1624    ////////////////////////////////////////////////////////////////////////////
1625    /// \brief Creates a node that filters entries based on range.
1626    /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1627    /// \return a node of the computation graph for which the range is defined.
1628    ///
1629    /// See the other Range overload for a detailed description.
1630    // clang-format on
1631    RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1632 
1633    // clang-format off
1634    ////////////////////////////////////////////////////////////////////////////
1635    /// \brief Execute a user-defined function on each entry (*instant action*).
1636    /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1637    /// \param[in] columns Names of the columns/branches in input to the user function.
1638    ///
1639    /// The callable `f` is invoked once per entry. This is an *instant action*:
1640    /// upon invocation, an event loop as well as execution of all scheduled actions
1641    /// is triggered.
1642    /// Users are responsible for the thread-safety of this callable when executing
1643    /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1644    ///
1645    /// ### Example usage:
1646    /// ~~~{.cpp}
1647    /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1648    /// ~~~
1649    // clang-format on
1650    template <typename F>
1651    void Foreach(F f, const ColumnNames_t &columns = {})
1652    {
1653       using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1654       using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1655       ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1656    }
1657 
1658    // clang-format off
1659    ////////////////////////////////////////////////////////////////////////////
1660    /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1661    /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1662    /// \param[in] columns Names of the columns/branches in input to the user function.
1663    ///
1664    /// Same as `Foreach`, but the user-defined function takes an extra
1665    /// `unsigned int` as its first parameter, the *processing slot index*.
1666    /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1667    /// for each thread of execution.
1668    /// This is meant as a helper in writing thread-safe `Foreach`
1669    /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1670    /// The user-defined processing callable is able to follow different
1671    /// *streams of processing* indexed by the first parameter.
1672    /// `ForeachSlot` works just as well with single-thread execution: in that
1673    /// case `slot` will always be `0`.
1674    ///
1675    /// ### Example usage:
1676    /// ~~~{.cpp}
1677    /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1678    /// ~~~
1679    // clang-format on
1680    template <typename F>
1681    void ForeachSlot(F f, const ColumnNames_t &columns = {})
1682    {
1683       using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1684       constexpr auto nColumns = ColTypes_t::list_size;
1685 
1686       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1687       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1688 
1689       using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1690       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1691 
1692       auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1693 
1694       fLoopManager->Run();
1695    }
1696 
1697    // clang-format off
1698    ////////////////////////////////////////////////////////////////////////////
1699    /// \brief Execute a user-defined reduce operation on the values of a column.
1700    /// \tparam F The type of the reduce callable. Automatically deduced.
1701    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1702    /// \param[in] f A callable with signature `T(T,T)`
1703    /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1704    /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1705    ///
1706    /// A reduction takes two values of a column and merges them into one (e.g.
1707    /// by summing them, taking the maximum, etc). This action performs the
1708    /// specified reduction operation on all processed column values, returning
1709    /// a single value of the same type. The callable f must satisfy the general
1710    /// requirements of a *processing function* besides having signature `T(T,T)`
1711    /// where `T` is the type of column columnName.
1712    ///
1713    /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1714    /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1715    /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1716    /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1717    /// overload.
1718    ///
1719    /// ### Example usage:
1720    /// ~~~{.cpp}
1721    /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1722    /// ~~~
1723    ///
1724    /// This action is *lazy*: upon invocation of this method the calculation is
1725    /// booked but not executed. Also see RResultPtr.
1726    // clang-format on
1727    template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1728    RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1729    {
1730       static_assert(
1731          std::is_default_constructible<T>::value,
1732          "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1733       return Reduce(std::move(f), columnName, T());
1734    }
1735 
1736    ////////////////////////////////////////////////////////////////////////////
1737    /// \brief Execute a user-defined reduce operation on the values of a column.
1738    /// \tparam F The type of the reduce callable. Automatically deduced.
1739    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1740    /// \param[in] f A callable with signature `T(T,T)`
1741    /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1742    /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1743    /// \return the reduced quantity wrapped in a RResultPtr.
1744    ///
1745    /// ### Example usage:
1746    /// ~~~{.cpp}
1747    /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1748    /// ~~~
1749    /// See the description of the first Reduce overload for more information.
1750    template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1751    RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1752    {
1753       return Aggregate(f, f, columnName, redIdentity);
1754    }
1755 
1756    ////////////////////////////////////////////////////////////////////////////
1757    /// \brief Return the number of entries processed (*lazy action*).
1758    /// \return the number of entries wrapped in a RResultPtr.
1759    ///
1760    /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1761    /// This action is *lazy*: upon invocation of this method the calculation is
1762    /// booked but not executed. Also see RResultPtr.
1763    ///
1764    /// ### Example usage:
1765    /// ~~~{.cpp}
1766    /// auto nEntriesAfterCuts = myFilteredDf.Count();
1767    /// ~~~
1768    ///
1769    RResultPtr<ULong64_t> Count()
1770    {
1771       const auto nSlots = fLoopManager->GetNSlots();
1772       auto cSPtr = std::make_shared<ULong64_t>(0);
1773       using Helper_t = RDFInternal::CountHelper;
1774       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1775       auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1776                                                RDFInternal::RColumnRegister(fColRegister));
1777       return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1778    }
1779 
1780    ////////////////////////////////////////////////////////////////////////////
1781    /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1782    /// \tparam T The type of the column.
1783    /// \tparam COLL The type of collection used to store the values.
1784    /// \param[in] column The name of the column to collect the values of.
1785    /// \return the content of the selected column wrapped in a RResultPtr.
1786    ///
1787    /// The collection type to be specified for C-style array columns is `RVec<T>`:
1788    /// in this case the returned collection is a `std::vector<RVec<T>>`.
1789    /// ### Example usage:
1790    /// ~~~{.cpp}
1791    /// // In this case intCol is a std::vector<int>
1792    /// auto intCol = rdf.Take<int>("integerColumn");
1793    /// // Same content as above but in this case taken as a RVec<int>
1794    /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1795    /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1796    /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1797    /// ~~~
1798    /// This action is *lazy*: upon invocation of this method the calculation is
1799    /// booked but not executed. Also see RResultPtr.
1800    template <typename T, typename COLL = std::vector<T>>
1801    RResultPtr<COLL> Take(std::string_view column = "")
1802    {
1803       const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1804 
1805       const auto validColumnNames = GetValidatedColumnNames(1, columns);
1806       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1807 
1808       using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1809       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1810       auto valuesPtr = std::make_shared<COLL>();
1811       const auto nSlots = fLoopManager->GetNSlots();
1812 
1813       auto action =
1814          std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1815       return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1816    }
1817 
1818    ////////////////////////////////////////////////////////////////////////////
1819    /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1820    /// \tparam V The type of the column used to fill the histogram.
1821    /// \param[in] model The returned histogram will be constructed using this as a model.
1822    /// \param[in] vName The name of the column that will fill the histogram.
1823    /// \return the monodimensional histogram wrapped in a RResultPtr.
1824    ///
1825    /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1826    /// is filled with each one of the elements of the container. In case multiple columns of container type
1827    /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1828    /// possibly different lengths between events).
1829    /// This action is *lazy*: upon invocation of this method the calculation is
1830    /// booked but not executed. Also see RResultPtr.
1831    ///
1832    /// ### Example usage:
1833    /// ~~~{.cpp}
1834    /// // Deduce column type (this invocation needs jitting internally)
1835    /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1836    /// // Explicit column type
1837    /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1838    /// ~~~
1839    ///
1840    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1841    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1842    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1843    template <typename V = RDFDetail::RInferredType>
1844    RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1845    {
1846       const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1847 
1848       const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1849 
1850       std::shared_ptr<::TH1D> h(nullptr);
1851       {
1852          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1853          h = model.GetHistogram();
1854          h->SetDirectory(nullptr);
1855       }
1856 
1857       if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1858          RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
1859       return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr);
1860    }
1861 
1862    ////////////////////////////////////////////////////////////////////////////
1863    /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1864    /// \tparam V The type of the column used to fill the histogram.
1865    /// \param[in] vName The name of the column that will fill the histogram.
1866    /// \return the monodimensional histogram wrapped in a RResultPtr.
1867    ///
1868    /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1869    /// The "name" and "title" strings are built starting from the input column name.
1870    /// See the description of the first Histo1D() overload for more details.
1871    ///
1872    /// ### Example usage:
1873    /// ~~~{.cpp}
1874    /// // Deduce column type (this invocation needs jitting internally)
1875    /// auto myHist1 = myDf.Histo1D("myColumn");
1876    /// // Explicit column type
1877    /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1878    /// ~~~
1879    template <typename V = RDFDetail::RInferredType>
1880    RResultPtr<::TH1D> Histo1D(std::string_view vName)
1881    {
1882       const auto h_name = std::string(vName);
1883       const auto h_title = h_name + ";" + h_name + ";count";
1884       return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1885    }
1886 
1887    ////////////////////////////////////////////////////////////////////////////
1888    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1889    /// \tparam V The type of the column used to fill the histogram.
1890    /// \tparam W The type of the column used as weights.
1891    /// \param[in] model The returned histogram will be constructed using this as a model.
1892    /// \param[in] vName The name of the column that will fill the histogram.
1893    /// \param[in] wName The name of the column that will provide the weights.
1894    /// \return the monodimensional histogram wrapped in a RResultPtr.
1895    ///
1896    /// See the description of the first Histo1D() overload for more details.
1897    ///
1898    /// ### Example usage:
1899    /// ~~~{.cpp}
1900    /// // Deduce column type (this invocation needs jitting internally)
1901    /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1902    /// // Explicit column type
1903    /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1904    /// ~~~
1905    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1906    RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1907    {
1908       const std::vector<std::string_view> columnViews = {vName, wName};
1909       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1910                                   ? ColumnNames_t()
1911                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
1912       std::shared_ptr<::TH1D> h(nullptr);
1913       {
1914          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1915          h = model.GetHistogram();
1916       }
1917       return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr);
1918    }
1919 
1920    ////////////////////////////////////////////////////////////////////////////
1921    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1922    /// \tparam V The type of the column used to fill the histogram.
1923    /// \tparam W The type of the column used as weights.
1924    /// \param[in] vName The name of the column that will fill the histogram.
1925    /// \param[in] wName The name of the column that will provide the weights.
1926    /// \return the monodimensional histogram wrapped in a RResultPtr.
1927    ///
1928    /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1929    /// The "name" and "title" strings are built starting from the input column names.
1930    /// See the description of the first Histo1D() overload for more details.
1931    ///
1932    /// ### Example usage:
1933    /// ~~~{.cpp}
1934    /// // Deduce column types (this invocation needs jitting internally)
1935    /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1936    /// // Explicit column types
1937    /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1938    /// ~~~
1939    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1940    RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1941    {
1942       // We build name and title based on the value and weight column names
1943       std::string str_vName{vName};
1944       std::string str_wName{wName};
1945       const auto h_name = str_vName + "_weighted_" + str_wName;
1946       const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1947       return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1948    }
1949 
1950    ////////////////////////////////////////////////////////////////////////////
1951    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1952    /// \tparam V The type of the column used to fill the histogram.
1953    /// \tparam W The type of the column used as weights.
1954    /// \param[in] model The returned histogram will be constructed using this as a model.
1955    /// \return the monodimensional histogram wrapped in a RResultPtr.
1956    ///
1957    /// This overload will use the first two default columns as column names.
1958    /// See the description of the first Histo1D() overload for more details.
1959    template <typename V, typename W>
1960    RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1961    {
1962       return Histo1D<V, W>(model, "", "");
1963    }
1964 
1965    ////////////////////////////////////////////////////////////////////////////
1966    /// \brief Fill and return a two-dimensional histogram (*lazy action*).
1967    /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1968    /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1969    /// \param[in] model The returned histogram will be constructed using this as a model.
1970    /// \param[in] v1Name The name of the column that will fill the x axis.
1971    /// \param[in] v2Name The name of the column that will fill the y axis.
1972    /// \return the bidimensional histogram wrapped in a RResultPtr.
1973    ///
1974    /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1975    /// is filled with each one of the elements of the container. In case multiple columns of container type
1976    /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1977    /// possibly different lengths between events).
1978    /// This action is *lazy*: upon invocation of this method the calculation is
1979    /// booked but not executed. Also see RResultPtr.
1980    ///
1981    /// ### Example usage:
1982    /// ~~~{.cpp}
1983    /// // Deduce column types (this invocation needs jitting internally)
1984    /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1985    /// // Explicit column types
1986    /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1987    /// ~~~
1988    ///
1989    ///
1990    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1991    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1992    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1993    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1994    RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1995    {
1996       std::shared_ptr<::TH2D> h(nullptr);
1997       {
1998          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1999          h = model.GetHistogram();
2000       }
2001       if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2002          throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2003       }
2004       const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2005       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2006                                   ? ColumnNames_t()
2007                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2008       return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr);
2009    }
2010 
2011    ////////////////////////////////////////////////////////////////////////////
2012    /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
2013    /// \tparam V1 The type of the column used to fill the x axis of the histogram.
2014    /// \tparam V2 The type of the column used to fill the y axis of the histogram.
2015    /// \tparam W The type of the column used for the weights of the histogram.
2016    /// \param[in] model The returned histogram will be constructed using this as a model.
2017    /// \param[in] v1Name The name of the column that will fill the x axis.
2018    /// \param[in] v2Name The name of the column that will fill the y axis.
2019    /// \param[in] wName The name of the column that will provide the weights.
2020    /// \return the bidimensional histogram wrapped in a RResultPtr.
2021    ///
2022    /// This action is *lazy*: upon invocation of this method the calculation is
2023    /// booked but not executed. Also see RResultPtr.
2024    ///
2025    /// ### Example usage:
2026    /// ~~~{.cpp}
2027    /// // Deduce column types (this invocation needs jitting internally)
2028    /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2029    /// // Explicit column types
2030    /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2031    /// ~~~
2032    ///
2033    /// See the documentation of the first Histo2D() overload for more details.
2034    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2035              typename W = RDFDetail::RInferredType>
2036    RResultPtr<::TH2D>
2037    Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2038    {
2039       std::shared_ptr<::TH2D> h(nullptr);
2040       {
2041          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2042          h = model.GetHistogram();
2043       }
2044       if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2045          throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2046       }
2047       const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2048       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2049                                   ? ColumnNames_t()
2050                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2051       return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2052    }
2053 
2054    template <typename V1, typename V2, typename W>
2055    RResultPtr<::TH2D> Histo2D(const TH2DModel &model)
2056    {
2057       return Histo2D<V1, V2, W>(model, "", "", "");
2058    }
2059 
2060    ////////////////////////////////////////////////////////////////////////////
2061    /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2062    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2063    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2064    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2065    /// \param[in] model The returned histogram will be constructed using this as a model.
2066    /// \param[in] v1Name The name of the column that will fill the x axis.
2067    /// \param[in] v2Name The name of the column that will fill the y axis.
2068    /// \param[in] v3Name The name of the column that will fill the z axis.
2069    /// \return the tridimensional histogram wrapped in a RResultPtr.
2070    ///
2071    /// This action is *lazy*: upon invocation of this method the calculation is
2072    /// booked but not executed. Also see RResultPtr.
2073    ///
2074    /// ### Example usage:
2075    /// ~~~{.cpp}
2076    /// // Deduce column types (this invocation needs jitting internally)
2077    /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2078    ///                             "myValueX", "myValueY", "myValueZ");
2079    /// // Explicit column types
2080    /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2081    ///                                                    "myValueX", "myValueY", "myValueZ");
2082    /// ~~~
2083    ///
2084    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
2085    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2086    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2087    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2088              typename V3 = RDFDetail::RInferredType>
2089    RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
2090                               std::string_view v3Name = "")
2091    {
2092       std::shared_ptr<::TH3D> h(nullptr);
2093       {
2094          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2095          h = model.GetHistogram();
2096       }
2097       if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2098          throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2099       }
2100       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2101       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2102                                   ? ColumnNames_t()
2103                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2104       return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2105    }
2106 
2107    ////////////////////////////////////////////////////////////////////////////
2108    /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2109    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2110    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2111    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2112    /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2113    /// \param[in] model The returned histogram will be constructed using this as a model.
2114    /// \param[in] v1Name The name of the column that will fill the x axis.
2115    /// \param[in] v2Name The name of the column that will fill the y axis.
2116    /// \param[in] v3Name The name of the column that will fill the z axis.
2117    /// \param[in] wName The name of the column that will provide the weights.
2118    /// \return the tridimensional histogram wrapped in a RResultPtr.
2119    ///
2120    /// This action is *lazy*: upon invocation of this method the calculation is
2121    /// booked but not executed. Also see RResultPtr.
2122    ///
2123    /// ### Example usage:
2124    /// ~~~{.cpp}
2125    /// // Deduce column types (this invocation needs jitting internally)
2126    /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2127    ///                             "myValueX", "myValueY", "myValueZ", "myWeight");
2128    /// // Explicit column types
2129    /// using d_t = double;
2130    /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2131    ///                                                    "myValueX", "myValueY", "myValueZ", "myWeight");
2132    /// ~~~
2133    ///
2134    ///
2135    /// See the documentation of the first Histo2D() overload for more details.
2136    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2137              typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2138    RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
2139                               std::string_view v3Name, std::string_view wName)
2140    {
2141       std::shared_ptr<::TH3D> h(nullptr);
2142       {
2143          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2144          h = model.GetHistogram();
2145       }
2146       if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2147          throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2148       }
2149       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2150       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2151                                   ? ColumnNames_t()
2152                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2153       return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2154    }
2155 
2156    template <typename V1, typename V2, typename V3, typename W>
2157    RResultPtr<::TH3D> Histo3D(const TH3DModel &model)
2158    {
2159       return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
2160    }
2161 
2162    ////////////////////////////////////////////////////////////////////////////
2163    /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2164    /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
2165    /// present.
2166    /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
2167    /// object.
2168    /// \param[in] model The returned histogram will be constructed using this as a model.
2169    /// \param[in] columnList
2170    /// A list containing the names of the columns that will be passed when calling `Fill`.
2171    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
2172    /// \return the N-dimensional histogram wrapped in a RResultPtr.
2173    ///
2174    /// This action is *lazy*: upon invocation of this method the calculation is
2175    /// booked but not executed. See RResultPtr documentation.
2176    ///
2177    /// ### Example usage:
2178    /// ~~~{.cpp}
2179    /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
2180    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2181    ///                                               {"col0", "col1", "col2", "col3"});
2182    /// ~~~
2183    ///
2184    template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
2185    RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2186    {
2187       std::shared_ptr<::THnD> h(nullptr);
2188       {
2189          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2190          h = model.GetHistogram();
2191 
2192          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2193             h->Sumw2();
2194          } else if (int(columnList.size()) != h->GetNdimensions()) {
2195             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2196          }
2197       }
2198       return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
2199                                                                                           fProxiedPtr);
2200    }
2201 
2202    ////////////////////////////////////////////////////////////////////////////
2203    /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2204    /// \param[in] model The returned histogram will be constructed using this as a model.
2205    /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2206    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
2207    /// \return the N-dimensional histogram wrapped in a RResultPtr.
2208    ///
2209    /// This action is *lazy*: upon invocation of this method the calculation is
2210    /// booked but not executed. Also see RResultPtr.
2211    ///
2212    /// ### Example usage:
2213    /// ~~~{.cpp}
2214    /// auto myFilledObj = myDf.HistoND({"name","title", 4,
2215    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2216    ///                                               {"col0", "col1", "col2", "col3"});
2217    /// ~~~
2218    ///
2219    RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2220    {
2221       std::shared_ptr<::THnD> h(nullptr);
2222       {
2223          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2224          h = model.GetHistogram();
2225 
2226          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2227             h->Sumw2();
2228          } else if (int(columnList.size()) != h->GetNdimensions()) {
2229             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2230          }
2231       }
2232       return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr,
2233                                                                                       columnList.size());
2234    }
2235 
2236    ////////////////////////////////////////////////////////////////////////////
2237    /// \brief Fill and return a TGraph object (*lazy action*).
2238    /// \tparam X The type of the column used to fill the x axis.
2239    /// \tparam Y The type of the column used to fill the y axis.
2240    /// \param[in] x The name of the column that will fill the x axis.
2241    /// \param[in] y The name of the column that will fill the y axis.
2242    /// \return the TGraph wrapped in a RResultPtr.
2243    ///
2244    /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
2245    /// is filled with each one of the elements of the container.
2246    /// If Multithreading is enabled, the order in which points are inserted is undefined.
2247    /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
2248    /// A name and a title to the TGraph is given based on the input column names.
2249    ///
2250    /// This action is *lazy*: upon invocation of this method the calculation is
2251    /// booked but not executed. Also see RResultPtr.
2252    ///
2253    /// ### Example usage:
2254    /// ~~~{.cpp}
2255    /// // Deduce column types (this invocation needs jitting internally)
2256    /// auto myGraph1 = myDf.Graph("xValues", "yValues");
2257    /// // Explicit column types
2258    /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
2259    /// ~~~
2260    ///
2261    /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
2262    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2263    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2264    template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
2265    RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "")
2266    {
2267       auto graph = std::make_shared<::TGraph>();
2268       const std::vector<std::string_view> columnViews = {x, y};
2269       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2270                                   ? ColumnNames_t()
2271                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2272 
2273       const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
2274 
2275       // We build a default name and title based on the input columns
2276       const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2277       const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2278       graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2279       graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2280       graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2281 
2282       return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr);
2283    }
2284 
2285    ////////////////////////////////////////////////////////////////////////////
2286    /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
2287    /// \param[in] x The name of the column that will fill the x axis.
2288    /// \param[in] y The name of the column that will fill the y axis.
2289    /// \param[in] exl The name of the column of X low errors
2290    /// \param[in] exh The name of the column of X high errors
2291    /// \param[in] eyl The name of the column of Y low errors
2292    /// \param[in] eyh The name of the column of Y high errors
2293    /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
2294    ///
2295    /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
2296    /// is filled with each one of the elements of the container.
2297    /// If Multithreading is enabled, the order in which points are inserted is undefined.
2298    ///
2299    /// This action is *lazy*: upon invocation of this method the calculation is
2300    /// booked but not executed. Also see RResultPtr.
2301    ///
2302    /// ### Example usage:
2303    /// ~~~{.cpp}
2304    /// // Deduce column types (this invocation needs jitting internally)
2305    /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2306    /// // Explicit column types
2307    /// using f = float
2308    /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2309    /// ~~~
2310    ///
2311    /// `GraphAssymErrors` should also be used for the cases in which values associated only with 
2312    /// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero. 
2313    /// In such cases, user should do the following: 
2314    /// ~~~{.cpp}
2315    /// // Create a column of zeros in RDataFrame
2316    /// auto rdf_withzeros = rdf.Define("zero", "0"); 
2317    /// // or alternatively: 
2318    /// auto rdf_withzeros = rdf.Define("zero", []() -> double { return 0.;});
2319    /// // Create the graph with y errors only
2320    /// auto rdf_errorsOnYOnly = rdf_withzeros.GraphAsymmErrors("xValues", "yValues", "zero", "zero", "eyl", "eyh");
2321    /// ~~~
2322    ///
2323    /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2324    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2325    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2326    template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2327              typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
2328              typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
2329    RResultPtr<::TGraphAsymmErrors>
2330    GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "",
2331                     std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2332    {
2333       auto graph = std::make_shared<::TGraphAsymmErrors>();
2334       const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2335       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2336                                   ? ColumnNames_t()
2337                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2338 
2339       const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
2340 
2341       // We build a default name and title based on the input columns
2342       const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2343       const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2344       graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2345       graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2346       graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2347 
2348       return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
2349                                                                                                graph, fProxiedPtr);
2350    }
2351 
2352    ////////////////////////////////////////////////////////////////////////////
2353    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2354    /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2355    /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2356    /// \param[in] model The model to be considered to build the new return value.
2357    /// \param[in] v1Name The name of the column that will fill the x axis.
2358    /// \param[in] v2Name The name of the column that will fill the y axis.
2359    /// \return the monodimensional profile wrapped in a RResultPtr.
2360    ///
2361    /// This action is *lazy*: upon invocation of this method the calculation is
2362    /// booked but not executed. Also see RResultPtr.
2363    ///
2364    /// ### Example usage:
2365    /// ~~~{.cpp}
2366    /// // Deduce column types (this invocation needs jitting internally)
2367    /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2368    /// // Explicit column types
2369    /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2370    /// ~~~
2371    ///
2372    /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2373    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2374    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2375    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2376    RResultPtr<::TProfile>
2377    Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2378    {
2379       std::shared_ptr<::TProfile> h(nullptr);
2380       {
2381          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2382          h = model.GetProfile();
2383       }
2384 
2385       if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2386          throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2387       }
2388       const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2389       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2390                                   ? ColumnNames_t()
2391                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2392       return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr);
2393    }
2394 
2395    ////////////////////////////////////////////////////////////////////////////
2396    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2397    /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2398    /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2399    /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2400    /// \param[in] model The model to be considered to build the new return value.
2401    /// \param[in] v1Name The name of the column that will fill the x axis.
2402    /// \param[in] v2Name The name of the column that will fill the y axis.
2403    /// \param[in] wName The name of the column that will provide the weights.
2404    /// \return the monodimensional profile wrapped in a RResultPtr.
2405    ///
2406    /// This action is *lazy*: upon invocation of this method the calculation is
2407    /// booked but not executed. Also see RResultPtr.
2408    ///
2409    /// ### Example usage:
2410    /// ~~~{.cpp}
2411    /// // Deduce column types (this invocation needs jitting internally)
2412    /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2413    /// // Explicit column types
2414    /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2415    ///                                                   "xValues", "yValues", "weight");
2416    /// ~~~
2417    ///
2418    /// See the first Profile1D() overload for more details.
2419    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2420              typename W = RDFDetail::RInferredType>
2421    RResultPtr<::TProfile>
2422    Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2423    {
2424       std::shared_ptr<::TProfile> h(nullptr);
2425       {
2426          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2427          h = model.GetProfile();
2428       }
2429 
2430       if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2431          throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2432       }
2433       const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2434       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2435                                   ? ColumnNames_t()
2436                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2437       return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2438    }
2439 
2440    ////////////////////////////////////////////////////////////////////////////
2441    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2442    /// See the first Profile1D() overload for more details.
2443    template <typename V1, typename V2, typename W>
2444    RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model)
2445    {
2446       return Profile1D<V1, V2, W>(model, "", "", "");
2447    }
2448 
2449    ////////////////////////////////////////////////////////////////////////////
2450    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2451    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2452    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2453    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2454    /// \param[in] model The returned profile will be constructed using this as a model.
2455    /// \param[in] v1Name The name of the column that will fill the x axis.
2456    /// \param[in] v2Name The name of the column that will fill the y axis.
2457    /// \param[in] v3Name The name of the column that will fill the z axis.
2458    /// \return the bidimensional profile wrapped in a RResultPtr.
2459    ///
2460    /// This action is *lazy*: upon invocation of this method the calculation is
2461    /// booked but not executed. Also see RResultPtr.
2462    ///
2463    /// ### Example usage:
2464    /// ~~~{.cpp}
2465    /// // Deduce column types (this invocation needs jitting internally)
2466    /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2467    ///                               "xValues", "yValues", "zValues");
2468    /// // Explicit column types
2469    /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2470    ///                                                   "xValues", "yValues", "zValues");
2471    /// ~~~
2472    ///
2473    /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2474    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2475    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2476    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2477              typename V3 = RDFDetail::RInferredType>
2478    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2479                                       std::string_view v2Name = "", std::string_view v3Name = "")
2480    {
2481       std::shared_ptr<::TProfile2D> h(nullptr);
2482       {
2483          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2484          h = model.GetProfile();
2485       }
2486 
2487       if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2488          throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2489       }
2490       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2491       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2492                                   ? ColumnNames_t()
2493                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2494       return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2495    }
2496 
2497    ////////////////////////////////////////////////////////////////////////////
2498    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2499    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2500    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2501    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2502    /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2503    /// \param[in] model The returned histogram will be constructed using this as a model.
2504    /// \param[in] v1Name The name of the column that will fill the x axis.
2505    /// \param[in] v2Name The name of the column that will fill the y axis.
2506    /// \param[in] v3Name The name of the column that will fill the z axis.
2507    /// \param[in] wName The name of the column that will provide the weights.
2508    /// \return the bidimensional profile wrapped in a RResultPtr.
2509    ///
2510    /// This action is *lazy*: upon invocation of this method the calculation is
2511    /// booked but not executed. Also see RResultPtr.
2512    ///
2513    /// ### Example usage:
2514    /// ~~~{.cpp}
2515    /// // Deduce column types (this invocation needs jitting internally)
2516    /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2517    ///                               "xValues", "yValues", "zValues", "weight");
2518    /// // Explicit column types
2519    /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2520    ///                                                        "xValues", "yValues", "zValues", "weight");
2521    /// ~~~
2522    ///
2523    /// See the first Profile2D() overload for more details.
2524    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2525              typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2526    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2527                                       std::string_view v3Name, std::string_view wName)
2528    {
2529       std::shared_ptr<::TProfile2D> h(nullptr);
2530       {
2531          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2532          h = model.GetProfile();
2533       }
2534 
2535       if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2536          throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2537       }
2538       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2539       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2540                                   ? ColumnNames_t()
2541                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2542       return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2543    }
2544 
2545    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2546    /// See the first Profile2D() overload for more details.
2547    template <typename V1, typename V2, typename V3, typename W>
2548    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model)
2549    {
2550       return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2551    }
2552 
2553    ////////////////////////////////////////////////////////////////////////////
2554    /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2555    ///
2556    /// Type T must provide at least:
2557    /// - a copy-constructor
2558    /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2559    ///   (these types can also be passed as template parameters to this method)
2560    /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2561    ///   objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2562    ///   if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2563    ///   the TCollection*).
2564    ///
2565    /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2566    /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2567    /// \tparam T The type of the object to fill. Automatically deduced.
2568    /// \param[in] model The model to be considered to build the new return value.
2569    /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2570    /// \return the filled object wrapped in a RResultPtr.
2571    ///
2572    /// The user gives up ownership of the model object.
2573    /// The list of column names to be used for filling must always be specified.
2574    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2575    /// Also see RResultPtr.
2576    ///
2577    /// ### Example usage:
2578    /// ~~~{.cpp}
2579    /// MyClass obj;
2580    /// // Deduce column types (this invocation needs jitting internally, and in this case
2581    /// // MyClass needs to be known to the interpreter)
2582    /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2583    /// // explicit column types
2584    /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2585    /// ~~~
2586    ///
2587    template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2588    RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2589    {
2590       auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2591       if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2592          throw std::runtime_error("The absence of axes limits is not supported yet.");
2593       }
2594       return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2595                                                                                        columnList.size());
2596    }
2597 
2598    ////////////////////////////////////////////////////////////////////////////
2599    /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2600    ///
2601    /// \tparam V The type of the value column
2602    /// \param[in] value The name of the column with the values to fill the statistics with.
2603    /// \return the filled TStatistic object wrapped in a RResultPtr.
2604    ///
2605    /// ### Example usage:
2606    /// ~~~{.cpp}
2607    /// // Deduce column type (this invocation needs jitting internally)
2608    /// auto stats0 = myDf.Stats("values");
2609    /// // Explicit column type
2610    /// auto stats1 = myDf.Stats<float>("values");
2611    /// ~~~
2612    ///
2613    template <typename V = RDFDetail::RInferredType>
2614    RResultPtr<TStatistic> Stats(std::string_view value = "")
2615    {
2616       ColumnNames_t columns;
2617       if (!value.empty()) {
2618          columns.emplace_back(std::string(value));
2619       }
2620       const auto validColumnNames = GetValidatedColumnNames(1, columns);
2621       if (std::is_same<V, RDFDetail::RInferredType>::value) {
2622          return Fill(TStatistic(), validColumnNames);
2623       } else {
2624          return Fill<V>(TStatistic(), validColumnNames);
2625       }
2626    }
2627 
2628    ////////////////////////////////////////////////////////////////////////////
2629    /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2630    ///
2631    /// \tparam V The type of the value column
2632    /// \tparam W The type of the weight column
2633    /// \param[in] value The name of the column with the values to fill the statistics with.
2634    /// \param[in] weight The name of the column with the weights to fill the statistics with.
2635    /// \return the filled TStatistic object wrapped in a RResultPtr.
2636    ///
2637    /// ### Example usage:
2638    /// ~~~{.cpp}
2639    /// // Deduce column types (this invocation needs jitting internally)
2640    /// auto stats0 = myDf.Stats("values", "weights");
2641    /// // Explicit column types
2642    /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2643    /// ~~~
2644    ///
2645    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2646    RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2647    {
2648       ColumnNames_t columns{std::string(value), std::string(weight)};
2649       constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2650       constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2651       const auto validColumnNames = GetValidatedColumnNames(2, columns);
2652       // We have 3 cases:
2653       // 1. Both types are inferred: we use Fill and let the jit kick in.
2654       // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2655       // 3. Both types are explicit: we invoke the fully compiled Fill method.
2656       if (vIsInferred && wIsInferred) {
2657          return Fill(TStatistic(), validColumnNames);
2658       } else if (vIsInferred != wIsInferred) {
2659          std::string error("The ");
2660          error += vIsInferred ? "value " : "weight ";
2661          error += "column type is explicit, while the ";
2662          error += vIsInferred ? "weight " : "value ";
2663          error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2664          throw std::runtime_error(error);
2665       } else {
2666          return Fill<V, W>(TStatistic(), validColumnNames);
2667       }
2668    }
2669 
2670    ////////////////////////////////////////////////////////////////////////////
2671    /// \brief Return the minimum of processed column values (*lazy action*).
2672    /// \tparam T The type of the branch/column.
2673    /// \param[in] columnName The name of the branch/column to be treated.
2674    /// \return the minimum value of the selected column wrapped in a RResultPtr.
2675    ///
2676    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2677    /// template specialization of this method.
2678    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2679    ///
2680    /// This action is *lazy*: upon invocation of this method the calculation is
2681    /// booked but not executed. Also see RResultPtr.
2682    ///
2683    /// ### Example usage:
2684    /// ~~~{.cpp}
2685    /// // Deduce column type (this invocation needs jitting internally)
2686    /// auto minVal0 = myDf.Min("values");
2687    /// // Explicit column type
2688    /// auto minVal1 = myDf.Min<double>("values");
2689    /// ~~~
2690    ///
2691    template <typename T = RDFDetail::RInferredType>
2692    RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
2693    {
2694       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2695       using RetType_t = RDFDetail::MinReturnType_t<T>;
2696       auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2697       return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr);
2698    }
2699 
2700    ////////////////////////////////////////////////////////////////////////////
2701    /// \brief Return the maximum of processed column values (*lazy action*).
2702    /// \tparam T The type of the branch/column.
2703    /// \param[in] columnName The name of the branch/column to be treated.
2704    /// \return the maximum value of the selected column wrapped in a RResultPtr.
2705    ///
2706    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2707    /// template specialization of this method.
2708    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2709    ///
2710    /// This action is *lazy*: upon invocation of this method the calculation is
2711    /// booked but not executed. Also see RResultPtr.
2712    ///
2713    /// ### Example usage:
2714    /// ~~~{.cpp}
2715    /// // Deduce column type (this invocation needs jitting internally)
2716    /// auto maxVal0 = myDf.Max("values");
2717    /// // Explicit column type
2718    /// auto maxVal1 = myDf.Max<double>("values");
2719    /// ~~~
2720    ///
2721    template <typename T = RDFDetail::RInferredType>
2722    RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
2723    {
2724       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2725       using RetType_t = RDFDetail::MaxReturnType_t<T>;
2726       auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2727       return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr);
2728    }
2729 
2730    ////////////////////////////////////////////////////////////////////////////
2731    /// \brief Return the mean of processed column values (*lazy action*).
2732    /// \tparam T The type of the branch/column.
2733    /// \param[in] columnName The name of the branch/column to be treated.
2734    /// \return the mean value of the selected column wrapped in a RResultPtr.
2735    ///
2736    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2737    /// template specialization of this method.
2738    ///
2739    /// This action is *lazy*: upon invocation of this method the calculation is
2740    /// booked but not executed. Also see RResultPtr.
2741    ///
2742    /// ### Example usage:
2743    /// ~~~{.cpp}
2744    /// // Deduce column type (this invocation needs jitting internally)
2745    /// auto meanVal0 = myDf.Mean("values");
2746    /// // Explicit column type
2747    /// auto meanVal1 = myDf.Mean<double>("values");
2748    /// ~~~
2749    ///
2750    template <typename T = RDFDetail::RInferredType>
2751    RResultPtr<double> Mean(std::string_view columnName = "")
2752    {
2753       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2754       auto meanV = std::make_shared<double>(0);
2755       return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr);
2756    }
2757 
2758    ////////////////////////////////////////////////////////////////////////////
2759    /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2760    /// \tparam T The type of the branch/column.
2761    /// \param[in] columnName The name of the branch/column to be treated.
2762    /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2763    ///
2764    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2765    /// template specialization of this method.
2766    ///
2767    /// This action is *lazy*: upon invocation of this method the calculation is
2768    /// booked but not executed. Also see RResultPtr.
2769    ///
2770    /// ### Example usage:
2771    /// ~~~{.cpp}
2772    /// // Deduce column type (this invocation needs jitting internally)
2773    /// auto stdDev0 = myDf.StdDev("values");
2774    /// // Explicit column type
2775    /// auto stdDev1 = myDf.StdDev<double>("values");
2776    /// ~~~
2777    ///
2778    template <typename T = RDFDetail::RInferredType>
2779    RResultPtr<double> StdDev(std::string_view columnName = "")
2780    {
2781       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2782       auto stdDeviationV = std::make_shared<double>(0);
2783       return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr);
2784    }
2785 
2786    // clang-format off
2787    ////////////////////////////////////////////////////////////////////////////
2788    /// \brief Return the sum of processed column values (*lazy action*).
2789    /// \tparam T The type of the branch/column.
2790    /// \param[in] columnName The name of the branch/column.
2791    /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2792    /// \return the sum of the selected column wrapped in a RResultPtr.
2793    ///
2794    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2795    /// template specialization of this method.
2796    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2797    ///
2798    /// This action is *lazy*: upon invocation of this method the calculation is
2799    /// booked but not executed. Also see RResultPtr.
2800    ///
2801    /// ### Example usage:
2802    /// ~~~{.cpp}
2803    /// // Deduce column type (this invocation needs jitting internally)
2804    /// auto sum0 = myDf.Sum("values");
2805    /// // Explicit column type
2806    /// auto sum1 = myDf.Sum<double>("values");
2807    /// ~~~
2808    ///
2809    template <typename T = RDFDetail::RInferredType>
2810    RResultPtr<RDFDetail::SumReturnType_t<T>>
2811    Sum(std::string_view columnName = "",
2812        const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2813    {
2814       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2815       auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2816       return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr);
2817    }
2818    // clang-format on
2819 
2820    ////////////////////////////////////////////////////////////////////////////
2821    /// \brief Gather filtering statistics.
2822    /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2823    ///
2824    /// Calling `Report` on the main `RDataFrame` object gathers stats for
2825    /// all named filters in the call graph. Calling this method on a
2826    /// stored chain state (i.e. a graph node different from the first) gathers
2827    /// the stats for all named filters in the chain section between the original
2828    /// `RDataFrame` and that node (included). Stats are gathered in the same
2829    /// order as the named filters have been added to the graph.
2830    /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
2831    /// effects cuts had.
2832    ///
2833    /// This action is *lazy*: upon invocation of
2834    /// this method the calculation is booked but not executed. See RResultPtr
2835    /// documentation.
2836    ///
2837    /// ### Example usage:
2838    /// ~~~{.cpp}
2839    /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
2840    /// auto cutReport = filtered3.Report();
2841    /// cutReport->Print();
2842    /// ~~~
2843    ///
2844    RResultPtr<RCutFlowReport> Report()
2845    {
2846       bool returnEmptyReport = false;
2847       // if this is a RInterface<RLoopManager> on which `Define` has been called, users
2848       // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
2849       // certainly does not contain named filters.
2850       // The number 4 takes into account the implicit columns for entry and slot number
2851       // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
2852       if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4)
2853          returnEmptyReport = true;
2854 
2855       auto rep = std::make_shared<RCutFlowReport>();
2856       using Helper_t = RDFInternal::ReportHelper<Proxied>;
2857       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2858 
2859       auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
2860                                                fProxiedPtr, RDFInternal::RColumnRegister(fColRegister));
2861 
2862       return MakeResultPtr(rep, *fLoopManager, std::move(action));
2863    }
2864 
2865    /// \brief Returns the names of the filters created.
2866    /// \return the container of filters names.
2867    ///
2868    /// If called on a root node, all the filters in the computation graph will
2869    /// be printed. For any other node, only the filters upstream of that node.
2870    /// Filters without a name are printed as "Unnamed Filter"
2871    /// This is not an action nor a transformation, just a query to the RDataFrame object.
2872    ///
2873    /// ### Example usage:
2874    /// ~~~{.cpp}
2875    /// auto filtNames = d.GetFilterNames();
2876    /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
2877    /// ~~~
2878    ///
2879    std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
2880 
2881    // clang-format off
2882    ////////////////////////////////////////////////////////////////////////////
2883    /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2884    /// \tparam F The type of the aggregator callable. Automatically deduced.
2885    /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2886    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2887    /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
2888    /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2889    /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2890    /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
2891    /// \return the result of the aggregation wrapped in a RResultPtr.
2892    ///
2893    /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2894    /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2895    /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2896    /// the value of the column columnName.
2897    /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2898    /// Otherwise the signature of aggregator must be `void(U&,T)`.
2899    ///
2900    /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2901    /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2902    /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2903    ///
2904    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
2905    ///
2906    /// Example usage:
2907    /// ~~~{.cpp}
2908    /// auto aggregator = [](double acc, double x) { return acc * x; };
2909    /// ROOT::EnableImplicitMT();
2910    /// // If multithread is enabled, the aggregator function will be called by more threads
2911    /// // and will produce a vector of partial accumulators.
2912    /// // The merger function performs the final aggregation of these partial results.
2913    /// auto merger = [](std::vector<double> &accumulators) {
2914    ///    for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2915    ///       accumulators[0] *= accumulators[i];
2916    ///    }
2917    /// };
2918    ///
2919    /// // The accumulator is initialized at this value by every thread.
2920    /// double initValue = 1.;
2921    ///
2922    /// // Multiplies all elements of the column "x"
2923    /// auto result = d.Aggregate(aggregator, merger, "x", initValue);
2924    /// ~~~
2925    // clang-format on
2926    template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2927              typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2928              typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2929              typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2930              typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2931    RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2932    {
2933       RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2934       const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2935 
2936       const auto validColumnNames = GetValidatedColumnNames(1, columns);
2937       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
2938 
2939       auto accObjPtr = std::make_shared<U>(aggIdentity);
2940       using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2941       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2942       auto action = std::make_unique<Action_t>(
2943          Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2944          fProxiedPtr, fColRegister);
2945       return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2946    }
2947 
2948    // clang-format off
2949    ////////////////////////////////////////////////////////////////////////////
2950    /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
2951    /// \tparam F The type of the aggregator callable. Automatically deduced.
2952    /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2953    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2954    /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2955    /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2956    /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2957    /// \return the result of the aggregation wrapped in a RResultPtr.
2958    ///
2959    /// See previous Aggregate overload for more information.
2960    // clang-format on
2961    template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2962              typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2963              typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2964              typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2965    RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2966    {
2967       static_assert(
2968          std::is_default_constructible<U>::value,
2969          "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2970       return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2971    }
2972 
2973    // clang-format off
2974    ////////////////////////////////////////////////////////////////////////////
2975    /// \brief Book execution of a custom action using a user-defined helper object.
2976    /// \tparam FirstColumn The type of the first column used by this action.  Inferred together with OtherColumns if not present.
2977    /// \tparam OtherColumns A list of the types of the other columns used by this action
2978    /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2979    /// \param[in] helper The Action Helper to be scheduled.
2980    /// \param[in] columns The names of the columns on which the helper acts.
2981    /// \return the result of the helper wrapped in a RResultPtr.
2982    ///
2983    /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2984    /// Helper object provided by the caller. The required interface for the helper is described below (more
2985    /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2986    ///
2987    /// ### Mandatory interface
2988    ///
2989    /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>`
2990    /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible.
2991    /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged.
2992    /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type
2993    ///   Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
2994    ///   _before_ Initialize(), because the RResultPtr is constructed before the event loop is started.
2995    /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations.
2996    ///   It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
2997    ///   or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
2998    /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event
2999    ///   loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader
3000    ///   that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations
3001    ///   it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
3002    /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method
3003    ///   during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
3004    ///   this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
3005    ///   the requested columns for the particular entry being processed.
3006    /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
3007    /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in
3008    ///    diagnostics, SaveGraph(), etc.
3009    ///
3010    /// ### Optional methods
3011    ///
3012    /// If these methods are implemented they enable extra functionality as per the description below.
3013    ///
3014    /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'.
3015    ///   Different threads might call this method concurrently, but will do so with different 'slot' numbers.
3016    ///   RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
3017    /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the
3018    ///   appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
3019    ///   of every sample, as in DefinePerSample().
3020    /// * `Helper MakeNew(void *newResult, std::string_view variation = "nominal")`: if implemented, it enables varying
3021    ///   the action's result with VariationsFor(). It takes a type-erased new result that can be safely cast to a
3022    ///   `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should be used as the action's output result.
3023    ///   The function optionally takes the name of the current variation which could be useful in customizing its behaviour.
3024    ///
3025    /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled
3026    /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.
3027    ///
3028    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
3029    ///
3030    /// ### Examples
3031    /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.
3032    ///
3033    /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
3034    ///
3035    // clang-format on
3036    template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
3037    RResultPtr<typename std::decay_t<Helper>::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
3038    {
3039       using HelperT = std::decay_t<Helper>;
3040       // TODO add more static sanity checks on Helper
3041       using AH = RDFDetail::RActionImpl<HelperT>;
3042       static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
3043                     "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
3044 
3045       auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
3046       auto resPtr = hPtr->GetResultPtr();
3047 
3048       if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
3049          return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
3050       } else {
3051          return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
3052                                                                                           fProxiedPtr, columns.size());
3053       }
3054    }
3055 
3056    ////////////////////////////////////////////////////////////////////////////
3057    /// \brief Provides a representation of the columns in the dataset.
3058    /// \tparam ColumnTypes variadic list of branch/column types.
3059    /// \param[in] columnList Names of the columns to be displayed.
3060    /// \param[in] nRows Number of events for each column to be displayed.
3061    /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3062    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3063    ///
3064    /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
3065    /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
3066    /// return a complete version through `RDisplay::AsString()`.
3067    ///
3068    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
3069    /// RResultPtr.
3070    ///
3071    /// Example usage:
3072    /// ~~~{.cpp}
3073    /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
3074    /// auto d1 = rdf.Display("");
3075    /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
3076    /// auto d2 = d.Display({"x", "y"}, 128);
3077    /// // Printing the short representations, the event loop will run
3078    /// d1->Print();
3079    /// d2->Print();
3080    /// ~~~
3081    template <typename... ColumnTypes>
3082    RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3083    {
3084       CheckIMTDisabled("Display");
3085       auto newCols = columnList;
3086       newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3087       auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3088       using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3089       // Need to add ULong64_t type corresponding to the first column rdfentry_
3090       return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
3091          std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
3092    }
3093 
3094    ////////////////////////////////////////////////////////////////////////////
3095    /// \brief Provides a representation of the columns in the dataset.
3096    /// \param[in] columnList Names of the columns to be displayed.
3097    /// \param[in] nRows Number of events for each column to be displayed.
3098    /// \param[in] nMaxCollectionElements  Maximum number of collection elements to display per row.
3099    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3100    ///
3101    /// This overload automatically infers the column types.
3102    /// See the previous overloads for further details.
3103    ///
3104    /// Invoked when no types are specified to Display
3105    RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3106    {
3107       CheckIMTDisabled("Display");
3108       auto newCols = columnList;
3109       newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3110       auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3111       using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3112       return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
3113          std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
3114          columnList.size() + 1);
3115    }
3116 
3117    ////////////////////////////////////////////////////////////////////////////
3118    /// \brief Provides a representation of the columns in the dataset.
3119    /// \param[in] columnNameRegexp A regular expression to select the columns.
3120    /// \param[in] nRows Number of events for each column to be displayed.
3121    /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3122    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3123    ///
3124    /// The existing columns are matched against the regular expression. If the string provided
3125    /// is empty, all columns are selected.
3126    /// See the previous overloads for further details.
3127    RResultPtr<RDisplay>
3128    Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
3129    {
3130       const auto columnNames = GetColumnNames();
3131       const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
3132       return Display(selectedColumns, nRows, nMaxCollectionElements);
3133    }
3134 
3135    ////////////////////////////////////////////////////////////////////////////
3136    /// \brief Provides a representation of the columns in the dataset.
3137    /// \param[in] columnList Names of the columns to be displayed.
3138    /// \param[in] nRows Number of events for each column to be displayed.
3139    /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
3140    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3141    ///
3142    /// See the previous overloads for further details.
3143    RResultPtr<RDisplay>
3144    Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3145    {
3146       ColumnNames_t selectedColumns(columnList);
3147       return Display(selectedColumns, nRows, nMaxCollectionElements);
3148    }
3149 
3150 private:
3151    template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3152    std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
3153    DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3154    {
3155       if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3156          RDFInternal::CheckValidCppVarName(name, where);
3157          RDFInternal::CheckForRedefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
3158                                            GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3159       } else {
3160          RDFInternal::CheckForDefinition(where, name, fColRegister, fLoopManager->GetBranchNames(),
3161                                          GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3162          RDFInternal::CheckForNoVariations(where, name, fColRegister);
3163       }
3164 
3165       using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3166       using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
3167          std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type;
3168       using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
3169          std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type;
3170 
3171       constexpr auto nColumns = ColTypes_t::list_size;
3172 
3173       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3174       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
3175 
3176       // Declare return type to the interpreter, for future use by jitted actions
3177       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3178       if (retTypeName.empty()) {
3179          // The type is not known to the interpreter.
3180          // We must not error out here, but if/when this column is used in jitted code
3181          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3182          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3183       }
3184 
3185       using NewCol_t = RDFDetail::RDefine<F, DefineType>;
3186       auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3187                                                   fColRegister, *fLoopManager);
3188 
3189       RDFInternal::RColumnRegister newCols(fColRegister);
3190       newCols.AddDefine(std::move(newColumn));
3191 
3192       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3193 
3194       return newInterface;
3195    }
3196 
3197    // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3198    // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3199    // this way compilation of `Define` has no way to continue after throwing the error.
3200    template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3201              bool IsFStringConv = std::is_convertible<F, std::string>::value,
3202              bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3203    std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3204    DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
3205    {
3206       static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3207                     "Error in `Define`: type returned by expression is not default-constructible");
3208       return *this; // never reached
3209    }
3210 
3211    template <typename... ColumnTypes>
3212    RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view fullTreeName, std::string_view filename,
3213                                                      const ColumnNames_t &columnList, const RSnapshotOptions &options)
3214    {
3215       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3216 
3217       RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnListWithoutSizeColumns.size());
3218       // validCols has aliases resolved, while columnListWithoutSizeColumns still has aliases in it.
3219       const auto validCols = GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
3220       RDFInternal::CheckForDuplicateSnapshotColumns(validCols);
3221       CheckAndFillDSColumns(validCols, TTraits::TypeList<ColumnTypes...>());
3222 
3223       const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
3224       const auto &treename = parsedTreePath.fTreeName;
3225       const auto &dirname = parsedTreePath.fDirName;
3226 
3227       ::TDirectory::TContext ctxt;
3228 
3229       RResultPtr<RInterface<RLoopManager>> resPtr;
3230 
3231       if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
3232          if (RDFInternal::GetDataSourceLabel(*this) == "TTreeDS") {
3233             throw std::runtime_error("Snapshotting from TTree to RNTuple is not yet supported. The current recommended "
3234                                      "way to convert TTrees to RNTuple is through the RNTupleImporter.");
3235          }
3236 
3237          auto newRDF =
3238             std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(columnListWithoutSizeColumns));
3239 
3240          auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3241             std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options,
3242             newRDF->GetLoopManager(), GetLoopManager(), true /* fToRNTuple */});
3243 
3244          // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
3245          // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column
3246          // names.
3247          resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
3248                                                                                   fProxiedPtr);
3249       } else {
3250          if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" &&
3251              options.fOutputFormat == ESnapshotOutputFormat::kDefault) {
3252             Warning("Snapshot",
3253                     "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you "
3254                     "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in "
3255                     "RSnapshotOptions. Note that this current default behaviour might change in the future.");
3256          }
3257 
3258          // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset
3259          // has actually been created and written to TFile, i.e. at the end of the Snapshot execution.
3260          auto newRDF =
3261             std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(columnListWithoutSizeColumns));
3262 
3263          auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
3264             std::string(filename), std::string(dirname), std::string(treename), columnListWithoutSizeColumns, options,
3265             newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */});
3266 
3267          // The Snapshot helper will use validCols (with aliases resolved) as input columns, and
3268          // columnListWithoutSizeColumns (still with aliases in it, passed through snapHelperArgs) as output column
3269          // names.
3270          resPtr = CreateAction<RDFInternal::ActionTags::Snapshot, ColumnTypes...>(validCols, newRDF, snapHelperArgs,
3271                                                                                   fProxiedPtr);
3272       }
3273 
3274       if (!options.fLazy)
3275          *resPtr;
3276       return resPtr;
3277    }
3278 
3279    ////////////////////////////////////////////////////////////////////////////
3280    /// \brief Implementation of cache.
3281    template <typename... ColTypes, std::size_t... S>
3282    RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3283    {
3284       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3285 
3286       // Check at compile time that the columns types are copy constructible
3287       constexpr bool areCopyConstructible =
3288          RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3289       static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3290 
3291       RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3292 
3293       auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3294       auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3295          std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3296 
3297       RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3298 
3299       return cachedRDF;
3300    }
3301 
3302    template <bool IsSingleColumn, typename F>
3303    RInterface<Proxied, DS_t>
3304    VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
3305             const std::vector<std::string> &variationTags, std::string_view variationName)
3306    {
3307       using F_t = std::decay_t<F>;
3308       using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
3309       using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
3310       constexpr auto nColumns = ColTypes_t::list_size;
3311 
3312       SanityChecksForVary<RetType>(colNames, variationTags, variationName);
3313 
3314       const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
3315       CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
3316 
3317       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3318       if (retTypeName.empty()) {
3319          // The type is not known to the interpreter, but we don't want to error out
3320          // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
3321          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3322          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3323       }
3324 
3325       auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
3326          colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
3327          validColumnNames);
3328 
3329       RDFInternal::RColumnRegister newCols(fColRegister);
3330       newCols.AddVariation(std::move(variation));
3331 
3332       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3333 
3334       return newInterface;
3335    }
3336 
3337    RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
3338                                             const std::vector<std::string> &variationTags,
3339                                             std::string_view variationName, bool isSingleColumn)
3340    {
3341       R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
3342       R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
3343       R__ASSERT(!variationName.empty() && "Must provide a variation name.");
3344 
3345       for (auto &colName : colNames) {
3346          RDFInternal::CheckValidCppVarName(colName, "Vary");
3347          RDFInternal::CheckForDefinition("Vary", colName, fColRegister, fLoopManager->GetBranchNames(),
3348                                          GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3349       }
3350       RDFInternal::CheckValidCppVarName(variationName, "Vary");
3351 
3352       // when varying multiple columns, they must be different columns
3353       if (colNames.size() > 1) {
3354          std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3355          if (uniqueCols.size() != colNames.size())
3356             throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3357       }
3358 
3359       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3360       auto jittedVariation = RDFInternal::BookVariationJit(
3361          colNames, variationName, variationTags, expression, *fLoopManager, GetDataSource(), fColRegister,
3362          fLoopManager->GetBranchNames(), upcastNodeOnHeap, isSingleColumn);
3363 
3364       RDFInternal::RColumnRegister newColRegister(fColRegister);
3365       newColRegister.AddVariation(std::move(jittedVariation));
3366 
3367       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister));
3368 
3369       return newInterface;
3370    }
3371 
3372    template <typename Helper, typename ActionResultType>
3373    auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3374                                               const std::shared_ptr<Helper> &hPtr,
3375                                               TTraits::TypeList<RDFDetail::RInferredType>)
3376       -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3377    {
3378       return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u);
3379    }
3380 
3381    template <typename Helper, typename ActionResultType, typename... Others>
3382    RResultPtr<ActionResultType>
3383    CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3384                                          const std::shared_ptr<Helper>& /*hPtr*/,
3385                                          Others...)
3386    {
3387       throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3388                                          "columns! The action helper type was ") +
3389                              typeid(Helper).name());
3390       return {};
3391    }
3392 
3393 protected:
3394    RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
3395               const RDFInternal::RColumnRegister &colRegister)
3396       : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied)
3397    {
3398    }
3399 
3400    const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3401 };
3402 
3403 } // namespace RDF
3404 
3405 } // namespace ROOT
3406 
3407 #endif // ROOT_RDF_INTERFACE