Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-27 11:36:24

0001 // Author: Enrico Guiraud, Danilo Piparo CERN  03/2017
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #ifndef ROOT_RDF_TINTERFACE
0012 #define ROOT_RDF_TINTERFACE
0013 
0014 #include "ROOT/RDataSource.hxx"
0015 #include "ROOT/RDF/ActionHelpers.hxx"
0016 #include "ROOT/RDF/HistoModels.hxx"
0017 #include "ROOT/RDF/InterfaceUtils.hxx"
0018 #include "ROOT/RDF/RColumnRegister.hxx"
0019 #include "ROOT/RDF/RDefaultValueFor.hxx"
0020 #include "ROOT/RDF/RDefine.hxx"
0021 #include "ROOT/RDF/RDefinePerSample.hxx"
0022 #include "ROOT/RDF/RFilter.hxx"
0023 #include "ROOT/RDF/RInterfaceBase.hxx"
0024 #include "ROOT/RDF/RVariation.hxx"
0025 #include "ROOT/RDF/RLazyDSImpl.hxx"
0026 #include "ROOT/RDF/RLoopManager.hxx"
0027 #include "ROOT/RDF/RRange.hxx"
0028 #include "ROOT/RDF/RFilterWithMissingValues.hxx"
0029 #include "ROOT/RDF/Utils.hxx"
0030 #include "ROOT/RDF/RDFDescription.hxx"
0031 #include "ROOT/RDF/RVariationsDescription.hxx"
0032 #include "ROOT/RResultPtr.hxx"
0033 #include "ROOT/RSnapshotOptions.hxx"
0034 #include <string_view>
0035 #include "ROOT/RVec.hxx"
0036 #include "ROOT/TypeTraits.hxx"
0037 #include "RtypesCore.h" // for ULong64_t
0038 #include "TDirectory.h"
0039 #include "TH1.h" // For Histo actions
0040 #include "TH2.h" // For Histo actions
0041 #include "TH3.h" // For Histo actions
0042 #include "THn.h"
0043 #include "THnSparse.h"
0044 #include "TProfile.h"
0045 #include "TProfile2D.h"
0046 #include "TStatistic.h"
0047 
0048 // TODO: Needed to show the info message in Snapshot, remove in 6.40
0049 #include "ROOT/RLogger.hxx"
0050 #include "ROOT/RVersion.hxx"
0051 #include "TEnv.h"
0052 #include <cstdlib>
0053 #include <cstring>
0054 
0055 #include <algorithm>
0056 #include <cstddef>
0057 #include <initializer_list>
0058 #include <iterator> // std::back_insterter
0059 #include <limits>
0060 #include <memory>
0061 #include <set>
0062 #include <sstream>
0063 #include <stdexcept>
0064 #include <string>
0065 #include <type_traits> // is_same, enable_if
0066 #include <typeinfo>
0067 #include <unordered_set>
0068 #include <utility> // std::index_sequence
0069 #include <vector>
0070 #include <any>
0071 
0072 class TGraph;
0073 
0074 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
0075 namespace ROOT {
0076 void DisableImplicitMT();
0077 bool IsImplicitMTEnabled();
0078 void EnableImplicitMT(UInt_t numthreads);
0079 class RDataFrame;
0080 } // namespace ROOT
0081 namespace cling {
0082 std::string printValue(ROOT::RDataFrame *tdf);
0083 }
0084 
0085 namespace ROOT {
0086 namespace RDF {
0087 namespace RDFDetail = ROOT::Detail::RDF;
0088 namespace RDFInternal = ROOT::Internal::RDF;
0089 namespace TTraits = ROOT::TypeTraits;
0090 
0091 template <typename Proxied, typename DataSource>
0092 class RInterface;
0093 
0094 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
0095 } // namespace RDF
0096 
0097 namespace Internal {
0098 namespace RDF {
0099 class GraphCreatorHelper;
0100 void ChangeEmptyEntryRange(const ROOT::RDF::RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
0101 void ChangeBeginAndEndEntries(const RNode &node, Long64_t begin, Long64_t end);
0102 void ChangeSpec(const ROOT::RDF::RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
0103 void TriggerRun(ROOT::RDF::RNode node);
0104 std::string GetDataSourceLabel(const ROOT::RDF::RNode &node);
0105 void SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline);
0106 } // namespace RDF
0107 } // namespace Internal
0108 
0109 namespace RDF {
0110 
0111 // clang-format off
0112 /**
0113  * \class ROOT::RDF::RInterface
0114  * \ingroup dataframe
0115  * \brief The public interface to the RDataFrame federation of classes.
0116  * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
0117  * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
0118  *
0119  * The documentation of each method features a one liner illustrating how to use the method, for example showing how
0120  * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
0121  */
0122 // clang-format on
0123 template <typename Proxied, typename DataSource = void>
0124 class RInterface : public RInterfaceBase {
0125    using DS_t = DataSource;
0126    using RFilterBase = RDFDetail::RFilterBase;
0127    using RRangeBase = RDFDetail::RRangeBase;
0128    using RLoopManager = RDFDetail::RLoopManager;
0129    friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
0130    friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
0131 
0132    template <typename T, typename W>
0133    friend class RInterface;
0134 
0135    friend void RDFInternal::TriggerRun(RNode node);
0136    friend void RDFInternal::ChangeEmptyEntryRange(const RNode &node, std::pair<ULong64_t, ULong64_t> &&newRange);
0137    friend void RDFInternal::ChangeBeginAndEndEntries(const RNode &node, Long64_t start, Long64_t end);
0138    friend void RDFInternal::ChangeSpec(const RNode &node, ROOT::RDF::Experimental::RDatasetSpec &&spec);
0139    friend std::string ROOT::Internal::RDF::GetDataSourceLabel(const RNode &node);
0140    friend void ROOT::Internal::RDF::SetTTreeLifeline(ROOT::RDF::RNode &node, std::any lifeline);
0141    std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
0142 
0143 public:
0144    ////////////////////////////////////////////////////////////////////////////
0145    /// \brief Copy-assignment operator for RInterface.
0146    RInterface &operator=(const RInterface &) = default;
0147 
0148    ////////////////////////////////////////////////////////////////////////////
0149    /// \brief Copy-ctor for RInterface.
0150    RInterface(const RInterface &) = default;
0151 
0152    ////////////////////////////////////////////////////////////////////////////
0153    /// \brief Move-ctor for RInterface.
0154    RInterface(RInterface &&) = default;
0155 
0156    ////////////////////////////////////////////////////////////////////////////
0157    /// \brief Move-assignment operator for RInterface.
0158    RInterface &operator=(RInterface &&) = default;
0159 
0160    ////////////////////////////////////////////////////////////////////////////
0161    /// \brief Build a RInterface from a RLoopManager.
0162    /// This constructor is only available for RInterface<RLoopManager>.
0163    template <typename T = Proxied, typename = std::enable_if_t<std::is_same<T, RLoopManager>::value, int>>
0164    RInterface(const std::shared_ptr<RLoopManager> &proxied) : RInterfaceBase(proxied), fProxiedPtr(proxied)
0165    {
0166    }
0167 
0168    ////////////////////////////////////////////////////////////////////////////
0169    /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
0170    /// Different RDataFrame methods return different C++ types. All nodes, however,
0171    /// can be cast to this common type at the cost of a small performance penalty.
0172    /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
0173    /// around via (non-template, C++11) helper functions.
0174    /// Example usage:
0175    /// ~~~{.cpp}
0176    /// // a function that conditionally adds a Range to a RDataFrame node.
0177    /// RNode MaybeAddRange(RNode df, bool mustAddRange)
0178    /// {
0179    ///    return mustAddRange ? df.Range(1) : df;
0180    /// }
0181    /// // use as :
0182    /// ROOT::RDataFrame df(10);
0183    /// auto maybeRanged = MaybeAddRange(df, true);
0184    /// ~~~
0185    /// Note that it is not a problem to pass RNode's by value.
0186    operator RNode() const
0187    {
0188       return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fColRegister);
0189    }
0190 
0191    ////////////////////////////////////////////////////////////////////////////
0192    /// \brief Append a filter to the call graph.
0193    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0194    /// signalling whether the event has passed the selection (true) or not (false).
0195    /// \param[in] columns Names of the columns/branches in input to the filter function.
0196    /// \param[in] name Optional name of this filter. See `Report`.
0197    /// \return the filter node of the computation graph.
0198    ///
0199    /// Append a filter node at the point of the call graph corresponding to the
0200    /// object this method is called on.
0201    /// The callable `f` should not have side-effects (e.g. modification of an
0202    /// external or static variable) to ensure correct results when implicit
0203    /// multi-threading is active.
0204    ///
0205    /// RDataFrame only evaluates filters when necessary: if multiple filters
0206    /// are chained one after another, they are executed in order and the first
0207    /// one returning false causes the event to be discarded.
0208    /// Even if multiple actions or transformations depend on the same filter,
0209    /// it is executed once per entry. If its result is requested more than
0210    /// once, the cached result is served.
0211    ///
0212    /// ### Example usage:
0213    /// ~~~{.cpp}
0214    /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
0215    /// auto filtered = df.Filter(myCut, {"x", "y"});
0216    ///
0217    /// // String: it must contain valid C++ except that column names can be used instead of variable names
0218    /// auto filtered = df.Filter("x*y > 0");
0219    /// ~~~
0220    ///
0221    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0222    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0223    /// ~~~{.cpp}
0224    /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0225    /// ~~~
0226    /// but instead this will:
0227    /// ~~~{.cpp}
0228    /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0229    /// ~~~
0230    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0231    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t>
0232    Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
0233    {
0234       RDFInternal::CheckFilter(f);
0235       using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
0236       constexpr auto nColumns = ColTypes_t::list_size;
0237       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
0238       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
0239 
0240       using F_t = RDFDetail::RFilter<F, Proxied>;
0241 
0242       auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, fColRegister, name);
0243       return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
0244    }
0245 
0246    ////////////////////////////////////////////////////////////////////////////
0247    /// \brief Append a filter to the call graph.
0248    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0249    /// signalling whether the event has passed the selection (true) or not (false).
0250    /// \param[in] name Optional name of this filter. See `Report`.
0251    /// \return the filter node of the computation graph.
0252    ///
0253    /// Refer to the first overload of this method for the full documentation.
0254    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0255    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name)
0256    {
0257       // The sfinae is there in order to pick up the overloaded method which accepts two strings
0258       // rather than this template method.
0259       return Filter(f, {}, name);
0260    }
0261 
0262    ////////////////////////////////////////////////////////////////////////////
0263    /// \brief Append a filter to the call graph.
0264    /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
0265    /// signalling whether the event has passed the selection (true) or not (false).
0266    /// \param[in] columns Names of the columns/branches in input to the filter function.
0267    /// \return the filter node of the computation graph.
0268    ///
0269    /// Refer to the first overload of this method for the full documentation.
0270    template <typename F>
0271    RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
0272    {
0273       return Filter(f, ColumnNames_t{columns});
0274    }
0275 
0276    ////////////////////////////////////////////////////////////////////////////
0277    /// \brief Append a filter to the call graph.
0278    /// \param[in] expression The filter expression in C++
0279    /// \param[in] name Optional name of this filter. See `Report`.
0280    /// \return the filter node of the computation graph.
0281    ///
0282    /// The expression is just-in-time compiled and used to filter entries. It must
0283    /// be valid C++ syntax in which variable names are substituted with the names
0284    /// of branches/columns.
0285    ///
0286    /// ### Example usage:
0287    /// ~~~{.cpp}
0288    /// auto filtered_df = df.Filter("myCollection.size() > 3");
0289    /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
0290    /// ~~~
0291    ///
0292    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0293    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0294    /// ~~~{.cpp}
0295    /// df.Filter("Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0296    /// ~~~
0297    /// but instead this will:
0298    /// ~~~{.cpp}
0299    /// df.Filter("return Sum(Map(vec, [](float e) { return e*e > 0.5; }))")
0300    /// ~~~
0301    RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
0302    {
0303       // deleted by the jitted call to JitFilterHelper
0304       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0305       using BaseNodeType_t = typename std::remove_pointer_t<decltype(upcastNodeOnHeap)>::element_type;
0306       RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fColRegister);
0307       const auto jittedFilter =
0308          RDFInternal::BookFilterJit(upcastNodeOnHeap, name, expression, fColRegister, nullptr, GetDataSource());
0309 
0310       return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fColRegister);
0311    }
0312 
0313    ////////////////////////////////////////////////////////////////////////////
0314    /// \brief Discard entries with missing values
0315    /// \param[in] column Column name whose entries with missing values should be discarded
0316    /// \return The filter node of the computation graph
0317    ///
0318    /// This operation is useful in case an entry of the dataset is incomplete,
0319    /// i.e. if one or more of the columns do not have valid values. If the value
0320    /// of the input column is missing for an entry, the entire entry will be
0321    /// discarded from the rest of this branch of the computation graph.
0322    ///
0323    /// Use cases include:
0324    /// * When processing multiple files, one or more of them is missing a column
0325    /// * In horizontal joining with entry matching, a certain dataset has no
0326    ///   match for the current entry.
0327    ///
0328    /// ### Example usage:
0329    ///
0330    /// \code{.py}
0331    /// # Assume a dataset with columns [idx, x] matching another dataset with
0332    /// # columns [idx, y]. For idx == 42, the right-hand dataset has no match
0333    /// df = ROOT.RDataFrame(dataset)
0334    /// df_nomissing = df.FilterAvailable("idx").Define("z", "x + y")
0335    /// colz = df_nomissing.Take[int]("z")
0336    /// \endcode
0337    ///
0338    /// \code{.cpp}
0339    /// // Assume a dataset with columns [idx, x] matching another dataset with
0340    /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
0341    /// ROOT::RDataFrame df{dataset};
0342    /// auto df_nomissing = df.FilterAvailable("idx")
0343    ///                       .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
0344    /// auto colz = df_nomissing.Take<int>("z");
0345    /// \endcode
0346    ///
0347    /// \note See FilterMissing() if you want to keep only the entries with
0348    ///       missing values instead.
0349    RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterAvailable(std::string_view column)
0350    {
0351       const auto columns = ColumnNames_t{column.data()};
0352       // For now disable this functionality in case of an empty data source and
0353       // the column name was not defined previously.
0354       if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
0355          throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
0356       using F_t = RDFDetail::RFilterWithMissingValues<Proxied>;
0357       auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ true, fProxiedPtr, fColRegister, columns);
0358       CheckAndFillDSColumns(columns, TTraits::TypeList<void>{});
0359       return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
0360    }
0361 
0362    ////////////////////////////////////////////////////////////////////////////
0363    /// \brief Keep only the entries that have missing values.
0364    /// \param[in] column Column name whose entries with missing values should be kept
0365    /// \return The filter node of the computation graph
0366    ///
0367    /// This operation is useful in case an entry of the dataset is incomplete,
0368    /// i.e. if one or more of the columns do not have valid values. It only
0369    /// keeps the entries for which the value of the input column is missing.
0370    ///
0371    /// Use cases include:
0372    /// * When processing multiple files, one or more of them is missing a column
0373    /// * In horizontal joining with entry matching, a certain dataset has no
0374    ///   match for the current entry.
0375    ///
0376    /// ### Example usage:
0377    ///
0378    /// \code{.py}
0379    /// # Assume a dataset made of two files vertically chained together, one has
0380    /// # column "x" and the other has column "y"
0381    /// df = ROOT.RDataFrame(dataset)
0382    /// df_valid_col_x = df.FilterMissing("y")
0383    /// df_valid_col_y = df.FilterMissing("x")
0384    /// display_x = df_valid_col_x.Display(("x",))
0385    /// display_y = df_valid_col_y.Display(("y",))
0386    /// \endcode
0387    ///
0388    /// \code{.cpp}
0389    /// // Assume a dataset made of two files vertically chained together, one has
0390    /// // column "x" and the other has column "y"
0391    /// ROOT.RDataFrame df{dataset};
0392    /// auto df_valid_col_x = df.FilterMissing("y");
0393    /// auto df_valid_col_y = df.FilterMissing("x");
0394    /// auto display_x = df_valid_col_x.Display<int>({"x"});
0395    /// auto display_y = df_valid_col_y.Display<int>({"y"});
0396    /// \endcode
0397    ///
0398    /// \note See FilterAvailable() if you want to discard the entries in case
0399    ///       there is a missing value instead.
0400    RInterface<RDFDetail::RFilterWithMissingValues<Proxied>, DS_t> FilterMissing(std::string_view column)
0401    {
0402       const auto columns = ColumnNames_t{column.data()};
0403       // For now disable this functionality in case of an empty data source and
0404       // the column name was not defined previously.
0405       if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
0406          throw std::runtime_error("Unknown column: \"" + std::string(column) + "\"");
0407       using F_t = RDFDetail::RFilterWithMissingValues<Proxied>;
0408       auto filterPtr = std::make_shared<F_t>(/*discardEntry*/ false, fProxiedPtr, fColRegister, columns);
0409       CheckAndFillDSColumns(columns, TTraits::TypeList<void>{});
0410       return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, fColRegister);
0411    }
0412 
0413    // clang-format off
0414    ////////////////////////////////////////////////////////////////////////////
0415    /// \brief Define a new column.
0416    /// \param[in] name The name of the defined column.
0417    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0418    /// \param[in] columns Names of the columns/branches in input to the producer function.
0419    /// \return the first node of the computation graph for which the new quantity is defined.
0420    ///
0421    /// Define a column that will be visible from all subsequent nodes
0422    /// of the functional chain. The `expression` is only evaluated for entries that pass
0423    /// all the preceding filters.
0424    /// A new variable is created called `name`, accessible as if it was contained
0425    /// in the dataset from subsequent transformations/actions.
0426    ///
0427    /// Use cases include:
0428    /// * caching the results of complex calculations for easy and efficient multiple access
0429    /// * extraction of quantities of interest from complex objects
0430    ///
0431    /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
0432    ///
0433    /// ### Example usage:
0434    /// ~~~{.cpp}
0435    /// // assuming a function with signature:
0436    /// double myComplexCalculation(const RVec<float> &muon_pts);
0437    /// // we can pass it directly to Define
0438    /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
0439    /// // alternatively, we can pass the body of the function as a string, as in Filter:
0440    /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
0441    /// ~~~
0442    ///
0443    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0444    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0445    /// ~~~{.cpp}
0446    /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
0447    /// ~~~
0448    /// but instead this will:
0449    /// ~~~{.cpp}
0450    /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
0451    /// ~~~
0452    template <typename F, typename std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0453    RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
0454    {
0455       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Define");
0456    }
0457    // clang-format on
0458 
0459    // clang-format off
0460    ////////////////////////////////////////////////////////////////////////////
0461    /// \brief Define a new column with a value dependent on the processing slot.
0462    /// \param[in] name The name of the defined column.
0463    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0464    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
0465    /// \return the first node of the computation graph for which the new quantity is defined.
0466    ///
0467    /// This alternative implementation of `Define` is meant as a helper to evaluate new column values in a thread-safe manner.
0468    /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
0469    /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
0470    /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
0471    /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
0472    /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
0473    ///
0474    /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
0475    /// ~~~{.cpp}
0476    /// int function(unsigned int, double, double);
0477    /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
0478    /// df.DefineSlot("x", function, {"column1", "column2"})
0479    /// ~~~
0480    ///
0481    /// See Define() for more information.
0482    template <typename F>
0483    RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
0484    {
0485       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "DefineSlot");
0486    }
0487    // clang-format on
0488 
0489    // clang-format off
0490    ////////////////////////////////////////////////////////////////////////////
0491    /// \brief Define a new column with a value dependent on the processing slot and the current entry.
0492    /// \param[in] name The name of the defined column.
0493    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0494    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
0495    /// \return the first node of the computation graph for which the new quantity is defined.
0496    ///
0497    /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
0498    /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
0499    /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
0500    /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
0501    /// different slot numbers - slot numbers will range from zero to ROOT::GetThreadPoolSize()-1.
0502    /// Note that there is no guarantee as to how often each slot will be reached during the event loop.
0503    /// The second parameter is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
0504    ///
0505    /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
0506    /// ~~~{.cpp}
0507    /// int function(unsigned int, ULong64_t, double, double);
0508    /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
0509    /// DefineSlotEntry("x", function, {"column1", "column2"})
0510    /// ~~~
0511    ///
0512    /// See Define() for more information.
0513    template <typename F>
0514    RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
0515    {
0516       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
0517                                                                         "DefineSlotEntry");
0518    }
0519    // clang-format on
0520 
0521    ////////////////////////////////////////////////////////////////////////////
0522    /// \brief Define a new column.
0523    /// \param[in] name The name of the defined column.
0524    /// \param[in] expression An expression in C++ which represents the defined value
0525    /// \return the first node of the computation graph for which the new quantity is defined.
0526    ///
0527    /// The expression is just-in-time compiled and used to produce the column entries.
0528    /// It must be valid C++ syntax in which variable names are substituted with the names
0529    /// of branches/columns.
0530    ///
0531    /// \note If the body of the string expression contains an explicit `return` statement (even if it is in a nested
0532    /// scope), RDataFrame _will not_ add another one in front of the expression. So this will not work:
0533    /// ~~~{.cpp}
0534    /// df.Define("x2", "Map(v, [](float e) { return e*e; })")
0535    /// ~~~
0536    /// but instead this will:
0537    /// ~~~{.cpp}
0538    /// df.Define("x2", "return Map(v, [](float e) { return e*e; })")
0539    /// ~~~
0540    ///
0541    /// Refer to the first overload of this method for the full documentation.
0542    RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
0543    {
0544       constexpr auto where = "Define";
0545       RDFInternal::CheckValidCppVarName(name, where);
0546       // these checks must be done before jitting lest we throw exceptions in jitted code
0547       RDFInternal::CheckForRedefinition(where, name, fColRegister,
0548                                         GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0549 
0550       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0551       auto jittedDefine =
0552          RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister, upcastNodeOnHeap);
0553 
0554       RDFInternal::RColumnRegister newCols(fColRegister);
0555       newCols.AddDefine(std::move(jittedDefine));
0556 
0557       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0558 
0559       return newInterface;
0560    }
0561 
0562    ////////////////////////////////////////////////////////////////////////////
0563    /// \brief Overwrite the value and/or type of an existing column.
0564    /// \param[in] name The name of the column to redefine.
0565    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0566    /// \param[in] columns Names of the columns/branches in input to the expression.
0567    /// \return the first node of the computation graph for which the quantity is redefined.
0568    ///
0569    /// The old value of the column can be used as an input for the expression.
0570    ///
0571    /// An exception is thrown in case the column to redefine does not already exist.
0572    /// See Define() for more information.
0573    template <typename F, std::enable_if_t<!std::is_convertible<F, std::string>::value, int> = 0>
0574    RInterface<Proxied, DS_t> Redefine(std::string_view name, F expression, const ColumnNames_t &columns = {})
0575    {
0576       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::None>(name, std::move(expression), columns, "Redefine");
0577    }
0578 
0579    // clang-format off
0580    ////////////////////////////////////////////////////////////////////////////
0581    /// \brief Overwrite the value and/or type of an existing column.
0582    /// \param[in] name The name of the column to redefine.
0583    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0584    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot).
0585    /// \return the first node of the computation graph for which the new quantity is defined.
0586    ///
0587    /// The old value of the column can be used as an input for the expression.
0588    /// An exception is thrown in case the column to redefine does not already exist.
0589    ///
0590    /// See DefineSlot() for more information.
0591    // clang-format on
0592    template <typename F>
0593    RInterface<Proxied, DS_t> RedefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
0594    {
0595       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::Slot>(name, std::move(expression), columns, "RedefineSlot");
0596    }
0597 
0598    // clang-format off
0599    ////////////////////////////////////////////////////////////////////////////
0600    /// \brief Overwrite the value and/or type of an existing column.
0601    /// \param[in] name The name of the column to redefine.
0602    /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the defined value. Returns the value that will be assigned to the defined column.
0603    /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
0604    /// \return the first node of the computation graph for which the new quantity is defined.
0605    ///
0606    /// The old value of the column can be used as an input for the expression.
0607    /// An exception is thrown in case the column to re-define does not already exist.
0608    ///
0609    /// See DefineSlotEntry() for more information.
0610    // clang-format on
0611    template <typename F>
0612    RInterface<Proxied, DS_t> RedefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
0613    {
0614       return DefineImpl<F, RDFDetail::ExtraArgsForDefine::SlotAndEntry>(name, std::move(expression), columns,
0615                                                                         "RedefineSlotEntry");
0616    }
0617 
0618    ////////////////////////////////////////////////////////////////////////////
0619    /// \brief Overwrite the value and/or type of an existing column.
0620    /// \param[in] name The name of the column to redefine.
0621    /// \param[in] expression An expression in C++ which represents the defined value
0622    /// \return the first node of the computation graph for which the new quantity is defined.
0623    ///
0624    /// The expression is just-in-time compiled and used to produce the column entries.
0625    /// It must be valid C++ syntax in which variable names are substituted with the names
0626    /// of branches/columns.
0627    ///
0628    /// The old value of the column can be used as an input for the expression.
0629    /// An exception is thrown in case the column to re-define does not already exist.
0630    ///
0631    /// Aliases cannot be overridden. See the corresponding Define() overload for more information.
0632    RInterface<Proxied, DS_t> Redefine(std::string_view name, std::string_view expression)
0633    {
0634       constexpr auto where = "Redefine";
0635       RDFInternal::CheckValidCppVarName(name, where);
0636       RDFInternal::CheckForDefinition(where, name, fColRegister,
0637                                       GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0638       RDFInternal::CheckForNoVariations(where, name, fColRegister);
0639 
0640       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0641       auto jittedDefine =
0642          RDFInternal::BookDefineJit(name, expression, *fLoopManager, GetDataSource(), fColRegister, upcastNodeOnHeap);
0643 
0644       RDFInternal::RColumnRegister newCols(fColRegister);
0645       newCols.AddDefine(std::move(jittedDefine));
0646 
0647       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0648 
0649       return newInterface;
0650    }
0651 
0652    ////////////////////////////////////////////////////////////////////////////
0653    /// \brief In case the value in the given column is missing, provide a default value
0654    /// \tparam T The type of the column
0655    /// \param[in] column Column name where missing values should be replaced by the given default value
0656    /// \param[in] defaultValue Value to provide instead of a missing value
0657    /// \return The node of the graph that will provide a default value
0658    ///
0659    /// This operation is useful in case an entry of the dataset is incomplete,
0660    /// i.e. if one or more of the columns do not have valid values. It does not
0661    /// modify the values of the column, but in case any entry is missing, it
0662    /// will provide the default value to downstream nodes instead.
0663    ///
0664    /// Use cases include:
0665    /// * When processing multiple files, one or more of them is missing a column
0666    /// * In horizontal joining with entry matching, a certain dataset has no
0667    ///   match for the current entry.
0668    ///
0669    /// ### Example usage:
0670    ///
0671    /// \code{.cpp}
0672    /// // Assume a dataset with columns [idx, x] matching another dataset with
0673    /// // columns [idx, y]. For idx == 42, the right-hand dataset has no match
0674    /// ROOT::RDataFrame df{dataset};
0675    /// auto df_default = df.DefaultValueFor("y", 33)
0676    ///                     .Define("z", [](int x, int y) { return x + y; }, {"x", "y"});
0677    /// auto colz = df_default.Take<int>("z");
0678    /// \endcode
0679    ///
0680    /// \code{.py}
0681    /// df = ROOT.RDataFrame(dataset)
0682    /// df_default = df.DefaultValueFor("y", 33).Define("z", "x + y")
0683    /// colz = df_default.Take[int]("z")
0684    /// \endcode
0685    template <typename T>
0686    RInterface<Proxied, DS_t> DefaultValueFor(std::string_view column, const T &defaultValue)
0687    {
0688       constexpr auto where{"DefaultValueFor"};
0689       RDFInternal::CheckForNoVariations(where, column, fColRegister);
0690       // For now disable this functionality in case of an empty data source and
0691       // the column name was not defined previously.
0692       if (ROOT::Internal::RDF::GetDataSourceLabel(*this) == "EmptyDS")
0693          RDFInternal::CheckForDefinition(where, column, fColRegister,
0694                                          GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0695 
0696       // Declare return type to the interpreter, for future use by jitted actions
0697       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(T));
0698       if (retTypeName.empty()) {
0699          // The type is not known to the interpreter.
0700          // We must not error out here, but if/when this column is used in jitted code
0701          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(T));
0702          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
0703       }
0704 
0705       const auto validColumnNames = ColumnNames_t{column.data()};
0706       auto newColumn = std::make_shared<ROOT::Internal::RDF::RDefaultValueFor<T>>(
0707          column, retTypeName, defaultValue, validColumnNames, fColRegister, *fLoopManager);
0708       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>{});
0709 
0710       RDFInternal::RColumnRegister newCols(fColRegister);
0711       newCols.AddDefine(std::move(newColumn));
0712 
0713       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0714 
0715       return newInterface;
0716    }
0717 
0718    // clang-format off
0719    ////////////////////////////////////////////////////////////////////////////
0720    /// \brief Define a new column that is updated when the input sample changes.
0721    /// \param[in] name The name of the defined column.
0722    /// \param[in] expression A C++ callable that computes the new value of the defined column.
0723    /// \return the first node of the computation graph for which the new quantity is defined.
0724    ///
0725    /// The signature of the callable passed as second argument should be `T(unsigned int slot, const ROOT::RDF::RSampleInfo &id)`
0726    /// where:
0727    /// - `T` is the type of the defined column
0728    /// - `slot` is a number in the range [0, nThreads) that is different for each processing thread. This can simplify
0729    ///   the definition of thread-safe callables if you are interested in using parallel capabilities of RDataFrame.
0730    /// - `id` is an instance of a ROOT::RDF::RSampleInfo object which contains information about the sample which is
0731    ///   being processed (see the class docs for more information).
0732    ///
0733    /// DefinePerSample() is useful to e.g. define a quantity that depends on which TTree in which TFile is being
0734    /// processed or to inject a callback into the event loop that is only called when the processing of a new sample
0735    /// starts rather than at every entry.
0736    ///
0737    /// The callable will be invoked once per input TTree or once per multi-thread task, whichever is more often.
0738    ///
0739    /// ### Example usage:
0740    /// ~~~{.cpp}
0741    /// ROOT::RDataFrame df{"mytree", {"sample1.root","sample2.root"}};
0742    /// df.DefinePerSample("weightbysample",
0743    ///                    [](unsigned int slot, const ROOT::RDF::RSampleInfo &id)
0744    ///                    { return id.Contains("sample1") ? 1.0f : 2.0f; });
0745    /// ~~~
0746    // clang-format on
0747    // TODO we could SFINAE on F's signature to provide friendlier compilation errors in case of signature mismatch
0748    template <typename F, typename RetType_t = typename TTraits::CallableTraits<F>::ret_type>
0749    RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, F expression)
0750    {
0751       RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
0752       RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister,
0753                                         GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0754 
0755       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType_t));
0756       if (retTypeName.empty()) {
0757          // The type is not known to the interpreter.
0758          // We must not error out here, but if/when this column is used in jitted code
0759          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType_t));
0760          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
0761       }
0762 
0763       auto newColumn =
0764          std::make_shared<RDFDetail::RDefinePerSample<F>>(name, retTypeName, std::move(expression), *fLoopManager);
0765 
0766       RDFInternal::RColumnRegister newCols(fColRegister);
0767       newCols.AddDefine(std::move(newColumn));
0768       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0769       return newInterface;
0770    }
0771 
0772    // clang-format off
0773    ////////////////////////////////////////////////////////////////////////////
0774    /// \brief Define a new column that is updated when the input sample changes.
0775    /// \param[in] name The name of the defined column.
0776    /// \param[in] expression A valid C++ expression as a string, which will be used to compute the defined value.
0777    /// \return the first node of the computation graph for which the new quantity is defined.
0778    ///
0779    /// The expression is just-in-time compiled and used to produce the column entries.
0780    /// It must be valid C++ syntax and the usage of the special variable names `rdfslot_` and `rdfsampleinfo_` is
0781    /// permitted, where these variables will take the same values as the `slot` and `id` parameters described at the
0782    /// DefinePerSample(std::string_view name, F expression) overload. See the documentation of that overload for more information.
0783    ///
0784    /// ### Example usage:
0785    /// ~~~{.py}
0786    /// df = ROOT.RDataFrame('mytree', ['sample1.root','sample2.root'])
0787    /// df.DefinePerSample('weightbysample', 'rdfsampleinfo_.Contains("sample1") ? 1.0f : 2.0f')
0788    /// ~~~
0789    ///
0790    /// \note
0791    /// If you have declared some C++ function to the interpreter, the correct syntax to call that function with this
0792    /// overload of DefinePerSample is by calling it explicitly with the special names `rdfslot_` and `rdfsampleinfo_` as
0793    /// input parameters. This is for example the correct way to call this overload when working in PyROOT:
0794    /// ~~~{.py}
0795    /// ROOT.gInterpreter.Declare(
0796    /// """
0797    /// float weights(unsigned int slot, const ROOT::RDF::RSampleInfo &id){
0798    ///    return id.Contains("sample1") ? 1.0f : 2.0f;
0799    /// }
0800    /// """)
0801    /// df = ROOT.RDataFrame("mytree", ["sample1.root","sample2.root"])
0802    /// df.DefinePerSample("weightsbysample", "weights(rdfslot_, rdfsampleinfo_)")
0803    /// ~~~
0804    ///
0805    /// \note
0806    /// Differently from what happens in Define(), the string expression passed to DefinePerSample cannot contain
0807    /// column names other than those mentioned above: the expression is evaluated once before the processing of the
0808    /// sample even starts, so column values are not accessible.
0809    // clang-format on
0810    RInterface<Proxied, DS_t> DefinePerSample(std::string_view name, std::string_view expression)
0811    {
0812       RDFInternal::CheckValidCppVarName(name, "DefinePerSample");
0813       // these checks must be done before jitting lest we throw exceptions in jitted code
0814       RDFInternal::CheckForRedefinition("DefinePerSample", name, fColRegister,
0815                                         GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
0816 
0817       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
0818       auto jittedDefine =
0819          RDFInternal::BookDefinePerSampleJit(name, expression, *fLoopManager, fColRegister, upcastNodeOnHeap);
0820 
0821       RDFInternal::RColumnRegister newCols(fColRegister);
0822       newCols.AddDefine(std::move(jittedDefine));
0823 
0824       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
0825 
0826       return newInterface;
0827    }
0828 
0829    /// \brief Register systematic variations for a single existing column using custom variation tags.
0830    /// \param[in] colName name of the column for which varied values are provided.
0831    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0832    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0833    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0834    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0835    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0836    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0837    ///
0838    /// Vary provides a natural and flexible syntax to define systematic variations that automatically propagate to
0839    /// Filters, Defines and results. RDataFrame usage of columns with attached variations does not change, but for
0840    /// results that depend on any varied quantity, a map/dictionary of varied results can be produced with
0841    /// ROOT::RDF::Experimental::VariationsFor (see the example below).
0842    ///
0843    /// The dictionary will contain a "nominal" value (accessed with the "nominal" key) for the unchanged result, and
0844    /// values for each of the systematic variations that affected the result (via upstream Filters or via direct or
0845    /// indirect dependencies of the column values on some registered variations). The keys will be a composition of
0846    /// variation names and tags, e.g. "pt:up" and "pt:down" for the example below.
0847    ///
0848    /// In the following example we add up/down variations of pt and fill a histogram with a quantity that depends on pt.
0849    /// We automatically obtain three histograms in output ("nominal", "pt:up" and "pt:down"):
0850    /// ~~~{.cpp}
0851    /// auto nominal_hx =
0852    ///     df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, {"down", "up"})
0853    ///       .Filter("pt > k")
0854    ///       .Define("x", someFunc, {"pt"})
0855    ///       .Histo1D("x");
0856    ///
0857    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0858    /// hx["nominal"].Draw();
0859    /// hx["pt:down"].Draw("SAME");
0860    /// hx["pt:up"].Draw("SAME");
0861    /// ~~~
0862    /// RDataFrame computes all variations as part of a single loop over the data.
0863    /// In particular, this means that I/O and computation of values shared
0864    /// among variations only happen once for all variations. Thus, the event loop
0865    /// run-time typically scales much better than linearly with the number of
0866    /// variations.
0867    ///
0868    /// RDataFrame lazily computes the varied values required to produce the
0869    /// outputs of \ref ROOT::RDF::Experimental::VariationsFor "VariationsFor()". If \ref
0870    /// ROOT::RDF::Experimental::VariationsFor "VariationsFor()" was not called for a result, the computations are only
0871    /// run for the nominal case.
0872    ///
0873    /// See other overloads for examples when variations are added for multiple existing columns,
0874    /// or when the tags are auto-generated instead of being directly defined.
0875    template <typename F>
0876    RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
0877                                   const std::vector<std::string> &variationTags, std::string_view variationName = "")
0878    {
0879       std::vector<std::string> colNames{{std::string(colName)}};
0880       const std::string theVariationName{variationName.empty() ? colName : variationName};
0881 
0882       return VaryImpl<true>(std::move(colNames), std::forward<F>(expression), inputColumns, variationTags,
0883                             theVariationName);
0884    }
0885 
0886    /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
0887    /// \param[in] colName name of the column for which varied values are provided.
0888    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0889    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0890    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0891    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0892    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0893    /// `"1"`, etc.
0894    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0895    ///            colName is used if none is provided.
0896    ///
0897    /// This overload of Vary takes an nVariations parameter instead of a list of tag names.
0898    /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
0899    /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
0900    ///
0901    /// Example usage:
0902    /// ~~~{.cpp}
0903    /// auto nominal_hx =
0904    ///   df.Vary("pt", [] (double pt) { return RVecD{pt*0.9, pt*1.1}; }, 2)
0905    ///     .Histo1D("x");
0906    ///
0907    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0908    /// hx["nominal"].Draw();
0909    /// hx["x:0"].Draw("SAME");
0910    /// hx["x:1"].Draw("SAME");
0911    /// ~~~
0912    ///
0913    /// \note See also This Vary() overload for more information.
0914    template <typename F>
0915    RInterface<Proxied, DS_t> Vary(std::string_view colName, F &&expression, const ColumnNames_t &inputColumns,
0916                                   std::size_t nVariations, std::string_view variationName = "")
0917    {
0918       R__ASSERT(nVariations > 0 && "Must have at least one variation.");
0919 
0920       std::vector<std::string> variationTags;
0921       variationTags.reserve(nVariations);
0922       for (std::size_t i = 0u; i < nVariations; ++i)
0923          variationTags.emplace_back(std::to_string(i));
0924 
0925       const std::string theVariationName{variationName.empty() ? colName : variationName};
0926 
0927       return Vary(colName, std::forward<F>(expression), inputColumns, std::move(variationTags), theVariationName);
0928    }
0929 
0930    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
0931    /// \param[in] colNames set of names of the columns for which varied values are provided.
0932    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0933    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0934    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0935    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0936    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0937    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`
0938    ///
0939    /// This overload of Vary takes a list of column names as first argument and
0940    /// requires that the expression returns an RVec of RVecs of values: one inner RVec for the variations of each
0941    /// affected column. The `variationTags` are defined as `{"down", "up"}`.
0942    ///
0943    /// Example usage:
0944    /// ~~~{.cpp}
0945    /// // produce variations "ptAndEta:down" and "ptAndEta:up"
0946    /// auto nominal_hx =
0947    ///   df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
0948    ///         [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
0949    ///         {"pt", "eta"},  // inputs to the Vary expression, independent of what columns are varied
0950    ///         {"down", "up"}, // variation tags
0951    ///         "ptAndEta")    // variation name
0952    ///     .Histo1D("pt", "eta");
0953    ///
0954    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
0955    /// hx["nominal"].Draw();
0956    /// hx["ptAndEta:down"].Draw("SAME");
0957    /// hx["ptAndEta:up"].Draw("SAME");
0958    /// ~~~
0959    ///
0960    /// \note See also This Vary() overload for more information.
0961 
0962    template <typename F>
0963    RInterface<Proxied, DS_t>
0964    Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
0965         const std::vector<std::string> &variationTags, std::string_view variationName)
0966    {
0967       return VaryImpl<false>(colNames, std::forward<F>(expression), inputColumns, variationTags, variationName);
0968    }
0969 
0970    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
0971    /// \param[in] colNames set of names of the columns for which varied values are provided.
0972    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0973    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0974    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0975    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0976    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
0977    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
0978    ///            colName is used if none is provided.
0979    ///
0980    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
0981    /// is avoided.
0982    ///
0983    /// \note See also This Vary() overload for more information.
0984    template <typename F>
0985    RInterface<Proxied, DS_t>
0986    Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
0987         const std::vector<std::string> &variationTags, std::string_view variationName)
0988    {
0989       return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, variationTags, variationName);
0990    }
0991 
0992    /// \brief Register systematic variations for multiple existing columns using auto-generated tags.
0993    /// \param[in] colNames set of names of the columns for which varied values are provided.
0994    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
0995    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
0996    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
0997    /// \param[in] inputColumns the names of the columns to be passed to the callable.
0998    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
0999    /// `"1"`, etc.
1000    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1001    ///            colName is used if none is provided.
1002    ///
1003    /// This overload of Vary takes a list of column names as first argument.
1004    /// It takes an `nVariations` parameter instead of a list of tag names (`variationTags`). Tag names
1005    /// will be auto-generated as the sequence 0...``nVariations-1``.
1006    ///
1007    /// Example usage:
1008    /// ~~~{.cpp}
1009    /// auto nominal_hx =
1010    ///   df.Vary({"pt", "eta"}, // the columns that will vary simultaneously
1011    ///         [](double pt, double eta) { return RVec<RVecF>{{pt*0.9, pt*1.1}, {eta*0.9, eta*1.1}}; },
1012    ///         {"pt", "eta"},  // inputs to the Vary expression, independent of what columns are varied
1013    ///         2, // auto-generated variation tags
1014    ///         "ptAndEta")    // variation name
1015    ///     .Histo1D("pt", "eta");
1016    ///
1017    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1018    /// hx["nominal"].Draw();
1019    /// hx["ptAndEta:0"].Draw("SAME");
1020    /// hx["ptAndEta:1"].Draw("SAME");
1021    /// ~~~
1022    ///
1023    /// \note See also This Vary() overload for more information.
1024    template <typename F>
1025    RInterface<Proxied, DS_t>
1026    Vary(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
1027         std::size_t nVariations, std::string_view variationName)
1028    {
1029       R__ASSERT(nVariations > 0 && "Must have at least one variation.");
1030 
1031       std::vector<std::string> variationTags;
1032       variationTags.reserve(nVariations);
1033       for (std::size_t i = 0u; i < nVariations; ++i)
1034          variationTags.emplace_back(std::to_string(i));
1035 
1036       return Vary(colNames, std::forward<F>(expression), inputColumns, std::move(variationTags), variationName);
1037    }
1038 
1039    /// \brief Register systematic variations for for multiple existing columns using custom variation tags.
1040    /// \param[in] colNames set of names of the columns for which varied values are provided.
1041    /// \param[in] expression a callable that evaluates the varied values for the specified columns. The callable can
1042    ///            take any column values as input, similarly to what happens during Filter and Define calls. It must
1043    ///            return an RVec of varied values, one for each variation tag, in the same order as the tags.
1044    /// \param[in] inputColumns the names of the columns to be passed to the callable.
1045    /// \param[in] inputColumns the names of the columns to be passed to the callable.
1046    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1047    /// `"1"`, etc.
1048    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1049    ///            colName is used if none is provided.
1050    ///
1051    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1052    /// is avoided.
1053    ///
1054    /// \note See also This Vary() overload for more information.
1055    template <typename F>
1056    RInterface<Proxied, DS_t>
1057    Vary(std::initializer_list<std::string> colNames, F &&expression, const ColumnNames_t &inputColumns,
1058         std::size_t nVariations, std::string_view variationName)
1059    {
1060       return Vary(std::vector<std::string>(colNames), std::forward<F>(expression), inputColumns, nVariations, variationName);
1061    }
1062 
1063    /// \brief Register systematic variations for a single existing column using custom variation tags.
1064    /// \param[in] colName name of the column for which varied values are provided.
1065    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1066    ///            values for the specified column.
1067    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1068    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1069    ///            colName is used if none is provided.
1070    ///
1071    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1072    /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1073    /// defined as `{"down", "up"}`.
1074    /// ~~~{.cpp}
1075    /// auto nominal_hx =
1076    ///     df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", {"down", "up"})
1077    ///       .Filter("pt > k")
1078    ///       .Define("x", someFunc, {"pt"})
1079    ///       .Histo1D("x");
1080    ///
1081    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1082    /// hx["nominal"].Draw();
1083    /// hx["pt:down"].Draw("SAME");
1084    /// hx["pt:up"].Draw("SAME");
1085    /// ~~~
1086    ///
1087    /// \note See also This Vary() overload for more information.
1088    RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression,
1089                                   const std::vector<std::string> &variationTags, std::string_view variationName = "")
1090    {
1091       std::vector<std::string> colNames{{std::string(colName)}};
1092       const std::string theVariationName{variationName.empty() ? colName : variationName};
1093 
1094       return JittedVaryImpl(colNames, expression, variationTags, theVariationName, /*isSingleColumn=*/true);
1095    }
1096 
1097    /// \brief Register systematic variations for a single existing column using auto-generated variation tags.
1098    /// \param[in] colName name of the column for which varied values are provided.
1099    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1100    ///            values for the specified column.
1101    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1102    /// `"1"`, etc.
1103    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1104    ///            colName is used if none is provided.
1105    ///
1106    /// This overload adds the possibility for the expression used to evaluate the varied values to be a just-in-time
1107    /// compiled. The example below shows how Vary() is used while dealing with a single column. The variation tags are
1108    /// auto-generated.
1109    /// ~~~{.cpp}
1110    /// auto nominal_hx =
1111    ///     df.Vary("pt", "ROOT::RVecD{pt*0.9, pt*1.1}", 2)
1112    ///       .Histo1D("pt");
1113    ///
1114    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1115    /// hx["nominal"].Draw();
1116    /// hx["pt:0"].Draw("SAME");
1117    /// hx["pt:1"].Draw("SAME");
1118    /// ~~~
1119    ///
1120    /// \note See also This Vary() overload for more information.
1121    RInterface<Proxied, DS_t> Vary(std::string_view colName, std::string_view expression, std::size_t nVariations,
1122                                   std::string_view variationName = "")
1123    {
1124       std::vector<std::string> variationTags;
1125       variationTags.reserve(nVariations);
1126       for (std::size_t i = 0u; i < nVariations; ++i)
1127          variationTags.emplace_back(std::to_string(i));
1128 
1129       return Vary(colName, expression, std::move(variationTags), variationName);
1130    }
1131 
1132    /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1133    /// \param[in] colNames set of names of the columns for which varied values are provided.
1134    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1135    ///            values for the specified columns.
1136    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1137    /// `"1"`, etc.
1138    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1139    ///
1140    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1141    /// compiled. It takes an nVariations parameter instead of a list of tag names.
1142    /// The varied results will be accessible via the keys of the dictionary with the form `variationName:N` where `N`
1143    /// is the corresponding sequential tag starting at 0 and going up to `nVariations - 1`.
1144    /// The example below shows how Vary() is used while dealing with multiple columns.
1145    ///
1146    /// ~~~{.cpp}
1147    /// auto nominal_hx =
1148    ///     df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", 2, "xy")
1149    ///       .Histo1D("x", "y");
1150    ///
1151    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1152    /// hx["nominal"].Draw();
1153    /// hx["xy:0"].Draw("SAME");
1154    /// hx["xy:1"].Draw("SAME");
1155    /// ~~~
1156    ///
1157    /// \note See also This Vary() overload for more information.
1158    RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1159                                   std::size_t nVariations, std::string_view variationName)
1160    {
1161       std::vector<std::string> variationTags;
1162       variationTags.reserve(nVariations);
1163       for (std::size_t i = 0u; i < nVariations; ++i)
1164          variationTags.emplace_back(std::to_string(i));
1165 
1166       return Vary(colNames, expression, std::move(variationTags), variationName);
1167    }
1168 
1169    /// \brief Register systematic variations for multiple existing columns using auto-generated variation tags.
1170    /// \param[in] colNames set of names of the columns for which varied values are provided.
1171    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec containing the varied
1172    ///            values for the specified column.
1173    /// \param[in] nVariations number of variations returned by the expression. The corresponding tags will be `"0"`,
1174    /// `"1"`, etc.
1175    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1176    ///            colName is used if none is provided.
1177    ///
1178    /// \note This overload ensures that the ambiguity between C++20 string, vector<string> construction from init list
1179    /// is avoided.
1180    ///
1181    /// \note See also This Vary() overload for more information.
1182    RInterface<Proxied, DS_t> Vary(std::initializer_list<std::string> colNames, std::string_view expression,
1183                                   std::size_t nVariations, std::string_view variationName)
1184    {
1185       return Vary(std::vector<std::string>(colNames), expression, nVariations, variationName);
1186    }
1187 
1188    /// \brief Register systematic variations for multiple existing columns using custom variation tags.
1189    /// \param[in] colNames set of names of the columns for which varied values are provided.
1190    /// \param[in] expression a string containing valid C++ code that evaluates to an RVec or RVecs containing the varied
1191    ///            values for the specified columns.
1192    /// \param[in] variationTags names for each of the varied values, e.g. `"up"` and `"down"`.
1193    /// \param[in] variationName a generic name for this set of varied values, e.g. `"ptvariation"`.
1194    ///
1195    /// This overload adds the possibility for the expression used to evaluate the varied values to be just-in-time
1196    /// compiled. The example below shows how Vary() is used while dealing with multiple columns. The tags are defined as
1197    /// `{"down", "up"}`.
1198    /// ~~~{.cpp}
1199    /// auto nominal_hx =
1200    ///     df.Vary({"x", "y"}, "ROOT::RVec<ROOT::RVecD>{{x*0.9, x*1.1}, {y*0.9, y*1.1}}", {"down", "up"}, "xy")
1201    ///       .Histo1D("x", "y");
1202    ///
1203    /// auto hx = ROOT::RDF::Experimental::VariationsFor(nominal_hx);
1204    /// hx["nominal"].Draw();
1205    /// hx["xy:down"].Draw("SAME");
1206    /// hx["xy:up"].Draw("SAME");
1207    /// ~~~
1208    ///
1209    /// \note See also This Vary() overload for more information.
1210    RInterface<Proxied, DS_t> Vary(const std::vector<std::string> &colNames, std::string_view expression,
1211                                   const std::vector<std::string> &variationTags, std::string_view variationName)
1212    {
1213       return JittedVaryImpl(colNames, expression, variationTags, variationName, /*isSingleColumn=*/false);
1214    }
1215 
1216    ////////////////////////////////////////////////////////////////////////////
1217    /// \brief Allow to refer to a column with a different name.
1218    /// \param[in] alias name of the column alias
1219    /// \param[in] columnName of the column to be aliased
1220    /// \return the first node of the computation graph for which the alias is available.
1221    ///
1222    /// Aliasing an alias is supported.
1223    ///
1224    /// ### Example usage:
1225    /// ~~~{.cpp}
1226    /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
1227    /// ~~~
1228    RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
1229    {
1230       // The symmetry with Define is clear. We want to:
1231       // - Create globally the alias and return this very node, unchanged
1232       // - Make aliases accessible based on chains and not globally
1233 
1234       // Helper to find out if a name is a column
1235       auto &dsColumnNames = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{};
1236 
1237       constexpr auto where = "Alias";
1238       RDFInternal::CheckValidCppVarName(alias, where);
1239       // If the alias name is a column name, there is a problem
1240       RDFInternal::CheckForRedefinition(where, alias, fColRegister, dsColumnNames);
1241 
1242       const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
1243 
1244       RDFInternal::RColumnRegister newCols(fColRegister);
1245       newCols.AddAlias(alias, validColumnName);
1246 
1247       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
1248 
1249       return newInterface;
1250    }
1251 
1252    ////////////////////////////////////////////////////////////////////////////
1253    /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1254    /// \deprecated Use other overloads that do not require template arguments.
1255    /// \tparam ColumnTypes variadic list of branch/column types.
1256    /// \param[in] treename The name of the output TTree or RNTuple.
1257    /// \param[in] filename The name of the output TFile.
1258    /// \param[in] columnList The list of names of the columns/branches/fields to be written.
1259    /// \param[in] options RSnapshotOptions struct with extra options to pass to the output TFile and TTree/RNTuple.
1260    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1261    ///
1262    template <typename... ColumnTypes>
1263    R__DEPRECATED(
1264       6, 40, "Snapshot does not need template arguments anymore, you can safely remove them from this function call.")
1265    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1266                                                  const ColumnNames_t &columnList,
1267                                                  const RSnapshotOptions &options = RSnapshotOptions())
1268    {
1269       return Snapshot(treename, filename, columnList, options);
1270    }
1271 
1272    ////////////////////////////////////////////////////////////////////////////
1273    /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1274    /// \param[in] treename The name of the output TTree or RNTuple.
1275    /// \param[in] filename The name of the output TFile.
1276    /// \param[in] columnList The list of names of the columns/branches/fields to be written.
1277    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple.
1278    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1279    ///
1280    /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
1281    /// The types of the columns are automatically inferred and do not need to be specified.
1282    ///
1283    /// Support for writing of nested branches/fields is limited (although RDataFrame is able to read them) and dot ('.')
1284    /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
1285    /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
1286    /// written out and it appears before the array in the columnList.
1287    ///
1288    /// By default, in case of TTree, TChain or RNTuple inputs, Snapshot will try to write out all top-level branches.
1289    /// For other types of inputs, all columns returned by GetColumnNames() will be written out. Systematic variations of
1290    /// columns will be included if the corresponding flag is set in RSnapshotOptions. See \ref snapshot-with-variations
1291    /// "Snapshot with Variations" for more details. If friend trees or chains are present, by default all friend
1292    /// top-level branches that have names that do not collide with names of branches in the main TTree/TChain will be
1293    /// written out. Since v6.24, Snapshot will also write out friend branches with the same names of branches in the
1294    /// main TTree/TChain with names of the form
1295    /// `<friendname>_<branchname>` in order to differentiate them from the branches in the main tree/chain.
1296    ///
1297    /// ### Writing to a sub-directory
1298    ///
1299    /// Snapshot supports writing the TTree or RNTuple in a sub-directory inside the TFile. It is sufficient to specify
1300    /// the directory path as part of the TTree or RNTuple name, e.g. `df.Snapshot("subdir/t", "f.root")` writes TTree
1301    /// `t` in the sub-directory `subdir` of file `f.root` (creating file and sub-directory as needed).
1302    ///
1303    /// \attention In multi-thread runs (i.e. when EnableImplicitMT() has been called) threads will loop over clusters of
1304    /// entries in an undefined order, so Snapshot will produce outputs in which (clusters of) entries will be shuffled
1305    /// with respect to the input TTree. Using such "shuffled" TTrees as friends of the original trees would result in
1306    /// wrong associations between entries in the main TTree and entries in the "shuffled" friend. Since v6.22, ROOT will
1307    /// error out if such a "shuffled" TTree is used in a friendship.
1308    ///
1309    /// \note In case no events are written out (e.g. because no event passes all filters), Snapshot will still write the
1310    /// requested output TTree or RNTuple to the file, with all the branches requested to preserve the dataset schema.
1311    ///
1312    /// \note Snapshot will refuse to process columns with names of the form `#columnname`. These are special columns
1313    /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1314    /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1315    /// Alias(): `df.Alias("nbar", "#bar").Snapshot(..., {"nbar"})`.
1316    ///
1317    /// ### Example invocations:
1318    ///
1319    /// ~~~{.cpp}
1320    /// // No need to specify column types, they are automatically deduced thanks
1321    /// // to information coming from the data source
1322    /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
1323    /// ~~~
1324    ///
1325    /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
1326    /// `RSnapshotOptions`:
1327    /// ~~~{.cpp}
1328    /// RSnapshotOptions opts;
1329    /// opts.fLazy = true;
1330    /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1331    /// ~~~
1332    ///
1333    /// To snapshot to the RNTuple data format, the `fOutputFormat` option in `RSnapshotOptions` needs to be set
1334    /// accordingly:
1335    /// ~~~{.cpp}
1336    /// RSnapshotOptions opts;
1337    /// opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
1338    /// df.Snapshot("outputNTuple", "outputFile.root", {"x"}, opts);
1339    /// ~~~
1340    ///
1341    /// Snapshot systematic variations resulting from a Vary() call (see details \ref snapshot-with-variations "here"):
1342    /// ~~~{.cpp}
1343    /// RSnapshotOptions opts;
1344    /// opts.fIncludeVariations = true;
1345    /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
1346    /// ~~~
1347    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1348                                                  const ColumnNames_t &columnList,
1349                                                  const RSnapshotOptions &options = RSnapshotOptions())
1350    {
1351       // TODO: Remove before releasing 6.40.00
1352 #if ROOT_VERSION_CODE >= ROOT_VERSION(6, 40, 0)
1353       static_assert(false && "Remove information about change of Snapshot defaut compression settings.");
1354 #endif
1355       [[maybe_unused]] static bool once = []() {
1356          if (const char *suppress = std::getenv("ROOT_RDF_SNAPSHOT_INFO"))
1357             if (std::strcmp(suppress, "0") == 0)
1358                return true;
1359          if (const char *suppress = gEnv->GetValue("ROOT.RDF.Snapshot.Info", "1"))
1360             if (std::strcmp(suppress, "0") == 0)
1361                return true;
1362          RLogScopedVerbosity showInfo{ROOT::Detail::RDF::RDFLogChannel(), ROOT::ELogLevel::kInfo};
1363          R__LOG_INFO(ROOT::Detail::RDF::RDFLogChannel())
1364             << "\n\tIn ROOT 6.38, the default compression settings of Snapshot have been changed from 101 (ZLIB with "
1365                "compression level 1, the TTree default) to 505 (ZSTD with compression level 5). This change may result "
1366                "in smaller Snapshot output dataset size by default. In order to suppress this message, set "
1367                "'ROOT_RDF_SNAPSHOT_INFO=0' in your environment or set 'ROOT.RDF.Snapshot.Info: 0' in your .rootrc "
1368                "file.";
1369          return true;
1370       }();
1371       // like columnList but with `#var` columns removed
1372       auto colListNoPoundSizes = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
1373       // like columnListWithoutSizeColumns but with aliases resolved
1374       auto colListNoAliases = GetValidatedColumnNames(colListNoPoundSizes.size(), colListNoPoundSizes);
1375       RDFInternal::CheckForDuplicateSnapshotColumns(colListNoAliases);
1376       // like validCols but with missing size branches required by array branches added in the right positions
1377       const auto pairOfColumnLists =
1378          RDFInternal::AddSizeBranches(GetDataSource(), std::move(colListNoAliases), std::move(colListNoPoundSizes));
1379       const auto &colListNoAliasesWithSizeBranches = pairOfColumnLists.first;
1380       const auto &colListWithAliasesAndSizeBranches = pairOfColumnLists.second;
1381 
1382       const auto fullTreeName = treename;
1383       const auto parsedTreePath = RDFInternal::ParseTreePath(fullTreeName);
1384       treename = parsedTreePath.fTreeName;
1385       const auto &dirname = parsedTreePath.fDirName;
1386 
1387       ::TDirectory::TContext ctxt;
1388 
1389       RResultPtr<RInterface<RLoopManager>> resPtr;
1390 
1391       auto retrieveTypeID = [](const std::string &colName, const std::string &colTypeName,
1392                                bool isRNTuple = false) -> const std::type_info * {
1393          try {
1394             return &ROOT::Internal::RDF::TypeName2TypeID(colTypeName);
1395          } catch (const std::runtime_error &err) {
1396             if (isRNTuple)
1397                return &typeid(ROOT::Internal::RDF::UseNativeDataType);
1398 
1399             if (std::string(err.what()).find("Cannot extract type_info of type") != std::string::npos) {
1400                // We could not find RTTI for this column, thus we cannot write it out at the moment.
1401                std::string trueTypeName{colTypeName};
1402                if (colTypeName.rfind("CLING_UNKNOWN_TYPE", 0) == 0)
1403                   trueTypeName = colTypeName.substr(19);
1404                std::string msg{"No runtime type information is available for column \"" + colName +
1405                                "\" with type name \"" + trueTypeName +
1406                                "\". Thus, it cannot be written to disk with Snapshot. Make sure to generate and load "
1407                                "ROOT dictionaries for the type of this column."};
1408 
1409                throw std::runtime_error(msg);
1410             } else {
1411                throw;
1412             }
1413          }
1414       };
1415 
1416       RDFInternal::CheckSnapshotOptionsFormatCompatibility(options);
1417 
1418       if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
1419          // The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one
1420          // without a data source for now, and set it once the actual data source can be created (i.e., after
1421          // writing the RNTuple).
1422          auto newRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(colListNoPoundSizes));
1423 
1424          auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1425             std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1426             options, newRDF->GetLoopManager(), GetLoopManager(), true /* fToNTuple */, /*fIncludeVariations=*/false});
1427 
1428          auto &&nColumns = colListNoAliasesWithSizeBranches.size();
1429          const auto validColumnNames = GetValidatedColumnNames(nColumns, colListNoAliasesWithSizeBranches);
1430 
1431          const auto nSlots = fLoopManager->GetNSlots();
1432          std::vector<const std::type_info *> colTypeIDs;
1433          colTypeIDs.reserve(nColumns);
1434          for (decltype(nColumns) i{}; i < nColumns; i++) {
1435             const auto &colName = validColumnNames[i];
1436             const auto colTypeName = ROOT::Internal::RDF::ColumnName2ColumnTypeName(
1437                colName, /*tree*/ nullptr, GetDataSource(), fColRegister.GetDefine(colName), options.fVector2RVec);
1438             const std::type_info *colTypeID = retrieveTypeID(colName, colTypeName, /*isRNTuple*/ true);
1439             colTypeIDs.push_back(colTypeID);
1440          }
1441          // Crucial e.g. if the column names do not correspond to already-available column readers created by the data
1442          // source
1443          CheckAndFillDSColumns(validColumnNames, colTypeIDs);
1444 
1445          auto action =
1446             RDFInternal::BuildAction(validColumnNames, snapHelperArgs, nSlots, fProxiedPtr, fColRegister, colTypeIDs);
1447          resPtr = MakeResultPtr(newRDF, *GetLoopManager(), std::move(action));
1448       } else {
1449          if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS" &&
1450              options.fOutputFormat == ESnapshotOutputFormat::kDefault) {
1451             Warning("Snapshot",
1452                     "The default Snapshot output data format is TTree, but the input data format is RNTuple. If you "
1453                     "want to Snapshot to RNTuple or suppress this warning, set the appropriate fOutputFormat option in "
1454                     "RSnapshotOptions. Note that this current default behaviour might change in the future.");
1455          }
1456 
1457          // We create an RLoopManager without a data source. This needs to be initialised when the output TTree dataset
1458          // has actually been created and written to TFile, i.e. at the end of the Snapshot execution.
1459          auto newRDF = std::make_shared<RInterface<RLoopManager>>(
1460             std::make_shared<RLoopManager>(colListNoAliasesWithSizeBranches));
1461 
1462          auto snapHelperArgs = std::make_shared<RDFInternal::SnapshotHelperArgs>(RDFInternal::SnapshotHelperArgs{
1463             std::string(filename), std::string(dirname), std::string(treename), colListWithAliasesAndSizeBranches,
1464             options, newRDF->GetLoopManager(), GetLoopManager(), false /* fToRNTuple */, options.fIncludeVariations});
1465 
1466          auto &&nColumns = colListNoAliasesWithSizeBranches.size();
1467          const auto validColumnNames = GetValidatedColumnNames(nColumns, colListNoAliasesWithSizeBranches);
1468 
1469          const auto nSlots = fLoopManager->GetNSlots();
1470          std::vector<const std::type_info *> colTypeIDs;
1471          colTypeIDs.reserve(nColumns);
1472          for (decltype(nColumns) i{}; i < nColumns; i++) {
1473             const auto &colName = validColumnNames[i];
1474             const auto colTypeName = ROOT::Internal::RDF::ColumnName2ColumnTypeName(
1475                colName, /*tree*/ nullptr, GetDataSource(), fColRegister.GetDefine(colName), options.fVector2RVec);
1476             const std::type_info *colTypeID = retrieveTypeID(colName, colTypeName);
1477             colTypeIDs.push_back(colTypeID);
1478          }
1479          // Crucial e.g. if the column names do not correspond to already-available column readers created by the data
1480          // source
1481          CheckAndFillDSColumns(validColumnNames, colTypeIDs);
1482 
1483          auto action =
1484             RDFInternal::BuildAction(validColumnNames, snapHelperArgs, nSlots, fProxiedPtr, fColRegister, colTypeIDs);
1485          resPtr = MakeResultPtr(newRDF, *GetLoopManager(), std::move(action));
1486       }
1487 
1488       if (!options.fLazy)
1489          *resPtr;
1490       return resPtr;
1491    }
1492 
1493    // clang-format off
1494    ////////////////////////////////////////////////////////////////////////////
1495    /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1496    /// \param[in] treename The name of the output TTree or RNTuple.
1497    /// \param[in] filename The name of the output TFile.
1498    /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1499    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple
1500    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1501    ///
1502    /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
1503    /// The types of the columns are automatically inferred and do not need to be specified.
1504    ///
1505    /// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages.
1506    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1507                                                  std::string_view columnNameRegexp = "",
1508                                                  const RSnapshotOptions &options = RSnapshotOptions())
1509    {
1510       const auto definedColumns = fColRegister.GenerateColumnNames();
1511 
1512       const auto dsColumns = GetDataSource() ? ROOT::Internal::RDF::GetTopLevelFieldNames(*GetDataSource()) : ColumnNames_t{};
1513       // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1514       ColumnNames_t dsColumnsWithoutSizeColumns;
1515       std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1516                    [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1517       ColumnNames_t columnNames;
1518       columnNames.reserve(definedColumns.size() + dsColumnsWithoutSizeColumns.size());
1519       columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1520       columnNames.insert(columnNames.end(), dsColumnsWithoutSizeColumns.begin(), dsColumnsWithoutSizeColumns.end());
1521 
1522       // The only way we can get duplicate entries is if a column coming from a tree or data-source is Redefine'd.
1523       // RemoveDuplicates should preserve ordering of the columns: it might be meaningful.
1524       RDFInternal::RemoveDuplicates(columnNames);
1525 
1526       auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Snapshot");
1527 
1528       if (RDFInternal::GetDataSourceLabel(*this) == "RNTupleDS") {
1529          RDFInternal::RemoveRNTupleSubFields(selectedColumns);
1530       }
1531 
1532       return Snapshot(treename, filename, selectedColumns, options);
1533    }
1534    // clang-format on
1535 
1536    // clang-format off
1537    ////////////////////////////////////////////////////////////////////////////
1538    /// \brief Save selected columns to disk, in a new TTree or RNTuple `treename` in file `filename`.
1539    /// \param[in] treename The name of the output TTree or RNTuple.
1540    /// \param[in] filename The name of the output TFile.
1541    /// \param[in] columnList The list of names of the columns/branches to be written.
1542    /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree/RNTuple.
1543    /// \return a `RDataFrame` that wraps the snapshotted dataset.
1544    ///
1545    /// This function returns a `RDataFrame` built with the output TTree or RNTuple as a source.
1546    /// The types of the columns are automatically inferred and do not need to be specified.
1547    ///
1548    /// See Snapshot(std::string_view, std::string_view, const ColumnNames_t&, const RSnapshotOptions &) for a more complete description and example usages.
1549    RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
1550                                                  std::initializer_list<std::string> columnList,
1551                                                  const RSnapshotOptions &options = RSnapshotOptions())
1552    {
1553       ColumnNames_t selectedColumns(columnList);
1554       return Snapshot(treename, filename, selectedColumns, options);
1555    }
1556    // clang-format on
1557 
1558    ////////////////////////////////////////////////////////////////////////////
1559    /// \brief Save selected columns in memory.
1560    /// \tparam ColumnTypes variadic list of branch/column types.
1561    /// \param[in] columnList columns to be cached in memory.
1562    /// \return a `RDataFrame` that wraps the cached dataset.
1563    ///
1564    /// This action returns a new `RDataFrame` object, completely detached from
1565    /// the originating `RDataFrame`. The new dataframe only contains the cached
1566    /// columns and stores their content in memory for fast, zero-copy subsequent access.
1567    ///
1568    /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
1569    /// fits in memory and that will be accessed many times.
1570    ///
1571    /// \note Cache will refuse to process columns with names of the form `#columnname`. These are special columns
1572    /// made available by some data sources (e.g. RNTupleDS) that represent the size of column `columnname`, and are
1573    /// not meant to be written out with that name (which is not a valid C++ variable name). Instead, go through an
1574    /// Alias(): `df.Alias("nbar", "#bar").Cache<std::size_t>(..., {"nbar"})`.
1575    ///
1576    /// ### Example usage:
1577    ///
1578    /// **Types and columns specified:**
1579    /// ~~~{.cpp}
1580    /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
1581    /// ~~~
1582    ///
1583    /// **Types inferred and columns specified (this invocation relies on jitting):**
1584    /// ~~~{.cpp}
1585    /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
1586    /// ~~~
1587    ///
1588    /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
1589    /// ~~~{.cpp}
1590    /// auto cache_all_cols_df = df.Cache(myRegexp);
1591    /// ~~~
1592    template <typename... ColumnTypes>
1593    RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1594    {
1595       auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
1596       return CacheImpl<ColumnTypes...>(columnList, staticSeq);
1597    }
1598 
1599    ////////////////////////////////////////////////////////////////////////////
1600    /// \brief Save selected columns in memory.
1601    /// \param[in] columnList columns to be cached in memory
1602    /// \return a `RDataFrame` that wraps the cached dataset.
1603    ///
1604    /// See the previous overloads for more information.
1605    RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
1606    {
1607       // Early return: if the list of columns is empty, just return an empty RDF
1608       // If we proceed, the jitted call will not compile!
1609       if (columnList.empty()) {
1610          auto nEntries = *this->Count();
1611          RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
1612          return emptyRDF;
1613       }
1614 
1615       std::stringstream cacheCall;
1616       auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
1617       RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
1618                                                                                       fColRegister);
1619       // build a string equivalent to
1620       // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
1621       RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
1622       cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
1623                 << RDFInternal::PrettyPrintAddr(&resRDF)
1624                 << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
1625                 << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
1626 
1627       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Cache");
1628 
1629       const auto validColumnNames =
1630          GetValidatedColumnNames(columnListWithoutSizeColumns.size(), columnListWithoutSizeColumns);
1631       const auto colTypes =
1632          GetValidatedArgTypes(validColumnNames, fColRegister, nullptr, GetDataSource(), "Cache", /*vector2RVec=*/false);
1633       for (const auto &colType : colTypes)
1634          cacheCall << colType << ", ";
1635       if (!columnListWithoutSizeColumns.empty())
1636          cacheCall.seekp(-2, cacheCall.cur);                         // remove the last ",
1637       cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
1638                 << RDFInternal::PrettyPrintAddr(&columnListWithoutSizeColumns) << "));";
1639 
1640       // book the code to jit with the RLoopManager and trigger the event loop
1641       fLoopManager->ToJitExec(cacheCall.str());
1642       fLoopManager->Jit();
1643 
1644       return resRDF;
1645    }
1646 
1647    ////////////////////////////////////////////////////////////////////////////
1648    /// \brief Save selected columns in memory.
1649    /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
1650    /// \return a `RDataFrame` that wraps the cached dataset.
1651    ///
1652    /// The existing columns are matched against the regular expression. If the string provided
1653    /// is empty, all columns are selected. See the previous overloads for more information.
1654    RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
1655    {
1656       const auto definedColumns = fColRegister.GenerateColumnNames();
1657       const auto dsColumns = GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{};
1658       // Ignore R_rdf_sizeof_* columns coming from datasources: we don't want to Snapshot those
1659       ColumnNames_t dsColumnsWithoutSizeColumns;
1660       std::copy_if(dsColumns.begin(), dsColumns.end(), std::back_inserter(dsColumnsWithoutSizeColumns),
1661                    [](const std::string &name) { return name.size() < 13 || name.substr(0, 13) != "R_rdf_sizeof_"; });
1662       ColumnNames_t columnNames;
1663       columnNames.reserve(definedColumns.size() + dsColumns.size());
1664       columnNames.insert(columnNames.end(), definedColumns.begin(), definedColumns.end());
1665       columnNames.insert(columnNames.end(), dsColumns.begin(), dsColumns.end());
1666       const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Cache");
1667       return Cache(selectedColumns);
1668    }
1669 
1670    ////////////////////////////////////////////////////////////////////////////
1671    /// \brief Save selected columns in memory.
1672    /// \param[in] columnList columns to be cached in memory.
1673    /// \return a `RDataFrame` that wraps the cached dataset.
1674    ///
1675    /// See the previous overloads for more information.
1676    RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
1677    {
1678       ColumnNames_t selectedColumns(columnList);
1679       return Cache(selectedColumns);
1680    }
1681 
1682    // clang-format off
1683    ////////////////////////////////////////////////////////////////////////////
1684    /// \brief Creates a node that filters entries based on range: [begin, end).
1685    /// \param[in] begin Initial entry number considered for this range.
1686    /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1687    /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
1688    /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
1689    ///
1690    /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
1691    /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
1692    ///
1693    /// ### Example usage:
1694    /// ~~~{.cpp}
1695    /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
1696    /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
1697    /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
1698    /// ~~~
1699    // clang-format on
1700    RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
1701    {
1702       // check invariants
1703       if (stride == 0 || (end != 0 && end < begin))
1704          throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
1705       CheckIMTDisabled("Range");
1706 
1707       using Range_t = RDFDetail::RRange<Proxied>;
1708       auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
1709       RInterface<RDFDetail::RRange<Proxied>, DS_t> newInterface(std::move(rangePtr), *fLoopManager, fColRegister);
1710       return newInterface;
1711    }
1712 
1713    // clang-format off
1714    ////////////////////////////////////////////////////////////////////////////
1715    /// \brief Creates a node that filters entries based on range.
1716    /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
1717    /// \return a node of the computation graph for which the range is defined.
1718    ///
1719    /// See the other Range overload for a detailed description.
1720    // clang-format on
1721    RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
1722 
1723    // clang-format off
1724    ////////////////////////////////////////////////////////////////////////////
1725    /// \brief Execute a user-defined function on each entry (*instant action*).
1726    /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1727    /// \param[in] columns Names of the columns/branches in input to the user function.
1728    ///
1729    /// The callable `f` is invoked once per entry. This is an *instant action*:
1730    /// upon invocation, an event loop as well as execution of all scheduled actions
1731    /// is triggered.
1732    /// Users are responsible for the thread-safety of this callable when executing
1733    /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
1734    ///
1735    /// ### Example usage:
1736    /// ~~~{.cpp}
1737    /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
1738    /// ~~~
1739    // clang-format on
1740    template <typename F>
1741    void Foreach(F f, const ColumnNames_t &columns = {})
1742    {
1743       using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
1744       using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
1745       ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
1746    }
1747 
1748    // clang-format off
1749    ////////////////////////////////////////////////////////////////////////////
1750    /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*).
1751    /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
1752    /// \param[in] columns Names of the columns/branches in input to the user function.
1753    ///
1754    /// Same as `Foreach`, but the user-defined function takes an extra
1755    /// `unsigned int` as its first parameter, the *processing slot index*.
1756    /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
1757    /// for each thread of execution.
1758    /// This is meant as a helper in writing thread-safe `Foreach`
1759    /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
1760    /// The user-defined processing callable is able to follow different
1761    /// *streams of processing* indexed by the first parameter.
1762    /// `ForeachSlot` works just as well with single-thread execution: in that
1763    /// case `slot` will always be `0`.
1764    ///
1765    /// ### Example usage:
1766    /// ~~~{.cpp}
1767    /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
1768    /// ~~~
1769    // clang-format on
1770    template <typename F>
1771    void ForeachSlot(F f, const ColumnNames_t &columns = {})
1772    {
1773       using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
1774       constexpr auto nColumns = ColTypes_t::list_size;
1775 
1776       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
1777       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
1778 
1779       using Helper_t = RDFInternal::ForeachSlotHelper<F>;
1780       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1781 
1782       auto action = std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, fColRegister);
1783 
1784       fLoopManager->Run();
1785    }
1786 
1787    // clang-format off
1788    ////////////////////////////////////////////////////////////////////////////
1789    /// \brief Execute a user-defined reduce operation on the values of a column.
1790    /// \tparam F The type of the reduce callable. Automatically deduced.
1791    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1792    /// \param[in] f A callable with signature `T(T,T)`
1793    /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1794    /// \return the reduced quantity wrapped in a ROOT::RDF:RResultPtr.
1795    ///
1796    /// A reduction takes two values of a column and merges them into one (e.g.
1797    /// by summing them, taking the maximum, etc). This action performs the
1798    /// specified reduction operation on all processed column values, returning
1799    /// a single value of the same type. The callable f must satisfy the general
1800    /// requirements of a *processing function* besides having signature `T(T,T)`
1801    /// where `T` is the type of column columnName.
1802    ///
1803    /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
1804    /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
1805    /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
1806    /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
1807    /// overload.
1808    ///
1809    /// ### Example usage:
1810    /// ~~~{.cpp}
1811    /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
1812    /// ~~~
1813    ///
1814    /// This action is *lazy*: upon invocation of this method the calculation is
1815    /// booked but not executed. Also see RResultPtr.
1816    // clang-format on
1817    template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1818    RResultPtr<T> Reduce(F f, std::string_view columnName = "")
1819    {
1820       static_assert(
1821          std::is_default_constructible<T>::value,
1822          "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
1823       return Reduce(std::move(f), columnName, T());
1824    }
1825 
1826    ////////////////////////////////////////////////////////////////////////////
1827    /// \brief Execute a user-defined reduce operation on the values of a column.
1828    /// \tparam F The type of the reduce callable. Automatically deduced.
1829    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1830    /// \param[in] f A callable with signature `T(T,T)`
1831    /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
1832    /// \param[in] redIdentity The reduced object of each thread is initialized to this value.
1833    /// \return the reduced quantity wrapped in a RResultPtr.
1834    ///
1835    /// ### Example usage:
1836    /// ~~~{.cpp}
1837    /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
1838    /// ~~~
1839    /// See the description of the first Reduce overload for more information.
1840    template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
1841    RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
1842    {
1843       return Aggregate(f, f, columnName, redIdentity);
1844    }
1845 
1846    ////////////////////////////////////////////////////////////////////////////
1847    /// \brief Return the number of entries processed (*lazy action*).
1848    /// \return the number of entries wrapped in a RResultPtr.
1849    ///
1850    /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
1851    /// This action is *lazy*: upon invocation of this method the calculation is
1852    /// booked but not executed. Also see RResultPtr.
1853    ///
1854    /// ### Example usage:
1855    /// ~~~{.cpp}
1856    /// auto nEntriesAfterCuts = myFilteredDf.Count();
1857    /// ~~~
1858    ///
1859    RResultPtr<ULong64_t> Count()
1860    {
1861       const auto nSlots = fLoopManager->GetNSlots();
1862       auto cSPtr = std::make_shared<ULong64_t>(0);
1863       using Helper_t = RDFInternal::CountHelper;
1864       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1865       auto action = std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr,
1866                                                RDFInternal::RColumnRegister(fColRegister));
1867       return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
1868    }
1869 
1870    ////////////////////////////////////////////////////////////////////////////
1871    /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default).
1872    /// \tparam T The type of the column.
1873    /// \tparam COLL The type of collection used to store the values.
1874    /// \param[in] column The name of the column to collect the values of.
1875    /// \return the content of the selected column wrapped in a RResultPtr.
1876    ///
1877    /// The collection type to be specified for C-style array columns is `RVec<T>`:
1878    /// in this case the returned collection is a `std::vector<RVec<T>>`.
1879    /// ### Example usage:
1880    /// ~~~{.cpp}
1881    /// // In this case intCol is a std::vector<int>
1882    /// auto intCol = rdf.Take<int>("integerColumn");
1883    /// // Same content as above but in this case taken as a RVec<int>
1884    /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
1885    /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
1886    /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
1887    /// ~~~
1888    /// This action is *lazy*: upon invocation of this method the calculation is
1889    /// booked but not executed. Also see RResultPtr.
1890    template <typename T, typename COLL = std::vector<T>>
1891    RResultPtr<COLL> Take(std::string_view column = "")
1892    {
1893       const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
1894 
1895       const auto validColumnNames = GetValidatedColumnNames(1, columns);
1896       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
1897 
1898       using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
1899       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1900       auto valuesPtr = std::make_shared<COLL>();
1901       const auto nSlots = fLoopManager->GetNSlots();
1902 
1903       auto action =
1904          std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, fColRegister);
1905       return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
1906    }
1907 
1908    ////////////////////////////////////////////////////////////////////////////
1909    /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1910    /// \tparam V The type of the column used to fill the histogram.
1911    /// \param[in] model The returned histogram will be constructed using this as a model.
1912    /// \param[in] vName The name of the column that will fill the histogram.
1913    /// \return the monodimensional histogram wrapped in a RResultPtr.
1914    ///
1915    /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
1916    /// is filled with each one of the elements of the container. In case multiple columns of container type
1917    /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1918    /// possibly different lengths between events).
1919    /// This action is *lazy*: upon invocation of this method the calculation is
1920    /// booked but not executed. Also see RResultPtr.
1921    ///
1922    /// ### Example usage:
1923    /// ~~~{.cpp}
1924    /// // Deduce column type (this invocation needs jitting internally)
1925    /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1926    /// // Explicit column type
1927    /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
1928    /// ~~~
1929    ///
1930    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
1931    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
1932    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
1933    template <typename V = RDFDetail::RInferredType>
1934    RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
1935    {
1936       const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
1937 
1938       const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
1939 
1940       std::shared_ptr<::TH1D> h(nullptr);
1941       {
1942          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1943          h = model.GetHistogram();
1944       }
1945 
1946       if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
1947          h->SetCanExtend(::TH1::kAllAxes);
1948       return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h, h, fProxiedPtr);
1949    }
1950 
1951    ////////////////////////////////////////////////////////////////////////////
1952    /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*).
1953    /// \tparam V The type of the column used to fill the histogram.
1954    /// \param[in] vName The name of the column that will fill the histogram.
1955    /// \return the monodimensional histogram wrapped in a RResultPtr.
1956    ///
1957    /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1958    /// The "name" and "title" strings are built starting from the input column name.
1959    /// See the description of the first Histo1D() overload for more details.
1960    ///
1961    /// ### Example usage:
1962    /// ~~~{.cpp}
1963    /// // Deduce column type (this invocation needs jitting internally)
1964    /// auto myHist1 = myDf.Histo1D("myColumn");
1965    /// // Explicit column type
1966    /// auto myHist2 = myDf.Histo1D<float>("myColumn");
1967    /// ~~~
1968    template <typename V = RDFDetail::RInferredType>
1969    RResultPtr<::TH1D> Histo1D(std::string_view vName)
1970    {
1971       const auto h_name = std::string(vName);
1972       const auto h_title = h_name + ";" + h_name + ";count";
1973       return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
1974    }
1975 
1976    ////////////////////////////////////////////////////////////////////////////
1977    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
1978    /// \tparam V The type of the column used to fill the histogram.
1979    /// \tparam W The type of the column used as weights.
1980    /// \param[in] model The returned histogram will be constructed using this as a model.
1981    /// \param[in] vName The name of the column that will fill the histogram.
1982    /// \param[in] wName The name of the column that will provide the weights.
1983    /// \return the monodimensional histogram wrapped in a RResultPtr.
1984    ///
1985    /// See the description of the first Histo1D() overload for more details.
1986    ///
1987    /// ### Example usage:
1988    /// ~~~{.cpp}
1989    /// // Deduce column type (this invocation needs jitting internally)
1990    /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1991    /// // Explicit column type
1992    /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1993    /// ~~~
1994    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1995    RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1996    {
1997       const std::vector<std::string_view> columnViews = {vName, wName};
1998       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1999                                   ? ColumnNames_t()
2000                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2001       std::shared_ptr<::TH1D> h(nullptr);
2002       {
2003          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2004          h = model.GetHistogram();
2005       }
2006 
2007       if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
2008          h->SetCanExtend(::TH1::kAllAxes);
2009       return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h, h, fProxiedPtr);
2010    }
2011 
2012    ////////////////////////////////////////////////////////////////////////////
2013    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
2014    /// \tparam V The type of the column used to fill the histogram.
2015    /// \tparam W The type of the column used as weights.
2016    /// \param[in] vName The name of the column that will fill the histogram.
2017    /// \param[in] wName The name of the column that will provide the weights.
2018    /// \return the monodimensional histogram wrapped in a RResultPtr.
2019    ///
2020    /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
2021    /// The "name" and "title" strings are built starting from the input column names.
2022    /// See the description of the first Histo1D() overload for more details.
2023    ///
2024    /// ### Example usage:
2025    /// ~~~{.cpp}
2026    /// // Deduce column types (this invocation needs jitting internally)
2027    /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
2028    /// // Explicit column types
2029    /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
2030    /// ~~~
2031    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2032    RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
2033    {
2034       // We build name and title based on the value and weight column names
2035       std::string str_vName{vName};
2036       std::string str_wName{wName};
2037       const auto h_name = str_vName + "_weighted_" + str_wName;
2038       const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
2039       return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
2040    }
2041 
2042    ////////////////////////////////////////////////////////////////////////////
2043    /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*).
2044    /// \tparam V The type of the column used to fill the histogram.
2045    /// \tparam W The type of the column used as weights.
2046    /// \param[in] model The returned histogram will be constructed using this as a model.
2047    /// \return the monodimensional histogram wrapped in a RResultPtr.
2048    ///
2049    /// This overload will use the first two default columns as column names.
2050    /// See the description of the first Histo1D() overload for more details.
2051    template <typename V, typename W>
2052    RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
2053    {
2054       return Histo1D<V, W>(model, "", "");
2055    }
2056 
2057    ////////////////////////////////////////////////////////////////////////////
2058    /// \brief Fill and return a two-dimensional histogram (*lazy action*).
2059    /// \tparam V1 The type of the column used to fill the x axis of the histogram.
2060    /// \tparam V2 The type of the column used to fill the y axis of the histogram.
2061    /// \param[in] model The returned histogram will be constructed using this as a model.
2062    /// \param[in] v1Name The name of the column that will fill the x axis.
2063    /// \param[in] v2Name The name of the column that will fill the y axis.
2064    /// \return the bidimensional histogram wrapped in a RResultPtr.
2065    ///
2066    /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
2067    /// is filled with each one of the elements of the container. In case multiple columns of container type
2068    /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
2069    /// possibly different lengths between events).
2070    /// This action is *lazy*: upon invocation of this method the calculation is
2071    /// booked but not executed. Also see RResultPtr.
2072    ///
2073    /// ### Example usage:
2074    /// ~~~{.cpp}
2075    /// // Deduce column types (this invocation needs jitting internally)
2076    /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
2077    /// // Explicit column types
2078    /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
2079    /// ~~~
2080    ///
2081    ///
2082    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
2083    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2084    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2085    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2086    RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2087    {
2088       std::shared_ptr<::TH2D> h(nullptr);
2089       {
2090          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2091          h = model.GetHistogram();
2092       }
2093       if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2094          throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2095       }
2096       const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2097       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2098                                   ? ColumnNames_t()
2099                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2100       return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h, h, fProxiedPtr);
2101    }
2102 
2103    ////////////////////////////////////////////////////////////////////////////
2104    /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*).
2105    /// \tparam V1 The type of the column used to fill the x axis of the histogram.
2106    /// \tparam V2 The type of the column used to fill the y axis of the histogram.
2107    /// \tparam W The type of the column used for the weights of the histogram.
2108    /// \param[in] model The returned histogram will be constructed using this as a model.
2109    /// \param[in] v1Name The name of the column that will fill the x axis.
2110    /// \param[in] v2Name The name of the column that will fill the y axis.
2111    /// \param[in] wName The name of the column that will provide the weights.
2112    /// \return the bidimensional histogram wrapped in a RResultPtr.
2113    ///
2114    /// This action is *lazy*: upon invocation of this method the calculation is
2115    /// booked but not executed. Also see RResultPtr.
2116    ///
2117    /// ### Example usage:
2118    /// ~~~{.cpp}
2119    /// // Deduce column types (this invocation needs jitting internally)
2120    /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2121    /// // Explicit column types
2122    /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
2123    /// ~~~
2124    ///
2125    /// See the documentation of the first Histo2D() overload for more details.
2126    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2127              typename W = RDFDetail::RInferredType>
2128    RResultPtr<::TH2D>
2129    Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2130    {
2131       std::shared_ptr<::TH2D> h(nullptr);
2132       {
2133          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2134          h = model.GetHistogram();
2135       }
2136       if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
2137          throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
2138       }
2139       const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2140       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2141                                   ? ColumnNames_t()
2142                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2143       return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2144    }
2145 
2146    template <typename V1, typename V2, typename W>
2147    RResultPtr<::TH2D> Histo2D(const TH2DModel &model)
2148    {
2149       return Histo2D<V1, V2, W>(model, "", "", "");
2150    }
2151 
2152    ////////////////////////////////////////////////////////////////////////////
2153    /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2154    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2155    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2156    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2157    /// \param[in] model The returned histogram will be constructed using this as a model.
2158    /// \param[in] v1Name The name of the column that will fill the x axis.
2159    /// \param[in] v2Name The name of the column that will fill the y axis.
2160    /// \param[in] v3Name The name of the column that will fill the z axis.
2161    /// \return the tridimensional histogram wrapped in a RResultPtr.
2162    ///
2163    /// This action is *lazy*: upon invocation of this method the calculation is
2164    /// booked but not executed. Also see RResultPtr.
2165    ///
2166    /// ### Example usage:
2167    /// ~~~{.cpp}
2168    /// // Deduce column types (this invocation needs jitting internally)
2169    /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2170    ///                             "myValueX", "myValueY", "myValueZ");
2171    /// // Explicit column types
2172    /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2173    ///                                                    "myValueX", "myValueY", "myValueZ");
2174    /// ~~~
2175    /// \note If three-dimensional histograms consume too much memory in multithreaded runs, the cloning of TH3D
2176    /// per thread can be reduced using ROOT::RDF::Experimental::ThreadsPerTH3(). See the section "Memory Usage" in
2177    /// the RDataFrame description.
2178    /// \note Differently from other ROOT interfaces, the returned histogram is not associated to gDirectory
2179    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2180    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2181    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2182              typename V3 = RDFDetail::RInferredType>
2183    RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
2184                               std::string_view v3Name = "")
2185    {
2186       std::shared_ptr<::TH3D> h(nullptr);
2187       {
2188          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2189          h = model.GetHistogram();
2190       }
2191       if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2192          throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2193       }
2194       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2195       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2196                                   ? ColumnNames_t()
2197                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2198       return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2199    }
2200 
2201    ////////////////////////////////////////////////////////////////////////////
2202    /// \brief Fill and return a three-dimensional histogram (*lazy action*).
2203    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2204    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2205    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2206    /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2207    /// \param[in] model The returned histogram will be constructed using this as a model.
2208    /// \param[in] v1Name The name of the column that will fill the x axis.
2209    /// \param[in] v2Name The name of the column that will fill the y axis.
2210    /// \param[in] v3Name The name of the column that will fill the z axis.
2211    /// \param[in] wName The name of the column that will provide the weights.
2212    /// \return the tridimensional histogram wrapped in a RResultPtr.
2213    ///
2214    /// This action is *lazy*: upon invocation of this method the calculation is
2215    /// booked but not executed. Also see RResultPtr.
2216    ///
2217    /// ### Example usage:
2218    /// ~~~{.cpp}
2219    /// // Deduce column types (this invocation needs jitting internally)
2220    /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2221    ///                             "myValueX", "myValueY", "myValueZ", "myWeight");
2222    /// // Explicit column types
2223    /// using d_t = double;
2224    /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
2225    ///                                                    "myValueX", "myValueY", "myValueZ", "myWeight");
2226    /// ~~~
2227    ///
2228    ///
2229    /// See the documentation of the first Histo2D() overload for more details.
2230    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2231              typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2232    RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
2233                               std::string_view v3Name, std::string_view wName)
2234    {
2235       std::shared_ptr<::TH3D> h(nullptr);
2236       {
2237          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2238          h = model.GetHistogram();
2239       }
2240       if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
2241          throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
2242       }
2243       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2244       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2245                                   ? ColumnNames_t()
2246                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2247       return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2248    }
2249 
2250    template <typename V1, typename V2, typename V3, typename W>
2251    RResultPtr<::TH3D> Histo3D(const TH3DModel &model)
2252    {
2253       return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
2254    }
2255 
2256    ////////////////////////////////////////////////////////////////////////////
2257    /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2258    /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
2259    /// present.
2260    /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
2261    /// object.
2262    /// \param[in] model The returned histogram will be constructed using this as a model.
2263    /// \param[in] columnList
2264    /// A list containing the names of the columns that will be passed when calling `Fill`.
2265    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
2266    /// \return the N-dimensional histogram wrapped in a RResultPtr.
2267    ///
2268    /// This action is *lazy*: upon invocation of this method the calculation is
2269    /// booked but not executed. See RResultPtr documentation.
2270    ///
2271    /// ### Example usage:
2272    /// ~~~{.cpp}
2273    /// auto myFilledObj = myDf.HistoND<float, float, float, float>({"name","title", 4,
2274    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2275    ///                                               {"col0", "col1", "col2", "col3"});
2276    /// ~~~
2277    ///
2278    template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
2279    RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2280    {
2281       std::shared_ptr<::THnD> h(nullptr);
2282       {
2283          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2284          h = model.GetHistogram();
2285 
2286          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2287             h->Sumw2();
2288          } else if (int(columnList.size()) != h->GetNdimensions()) {
2289             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2290          }
2291       }
2292       return CreateAction<RDFInternal::ActionTags::HistoND, FirstColumn, OtherColumns...>(columnList, h, h,
2293                                                                                           fProxiedPtr);
2294    }
2295 
2296    ////////////////////////////////////////////////////////////////////////////
2297    /// \brief Fill and return an N-dimensional histogram (*lazy action*).
2298    /// \param[in] model The returned histogram will be constructed using this as a model.
2299    /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2300    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
2301    /// \return the N-dimensional histogram wrapped in a RResultPtr.
2302    ///
2303    /// This action is *lazy*: upon invocation of this method the calculation is
2304    /// booked but not executed. Also see RResultPtr.
2305    ///
2306    /// ### Example usage:
2307    /// ~~~{.cpp}
2308    /// auto myFilledObj = myDf.HistoND({"name","title", 4,
2309    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2310    ///                                               {"col0", "col1", "col2", "col3"});
2311    /// ~~~
2312    ///
2313    RResultPtr<::THnD> HistoND(const THnDModel &model, const ColumnNames_t &columnList)
2314    {
2315       std::shared_ptr<::THnD> h(nullptr);
2316       {
2317          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2318          h = model.GetHistogram();
2319 
2320          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2321             h->Sumw2();
2322          } else if (int(columnList.size()) != h->GetNdimensions()) {
2323             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2324          }
2325       }
2326       return CreateAction<RDFInternal::ActionTags::HistoND, RDFDetail::RInferredType>(columnList, h, h, fProxiedPtr,
2327                                                                                       columnList.size());
2328    }
2329 
2330    ////////////////////////////////////////////////////////////////////////////
2331    /// \brief Fill and return a sparse N-dimensional histogram (*lazy action*).
2332    /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred if not
2333    /// present.
2334    /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the
2335    /// object.
2336    /// \param[in] model The returned histogram will be constructed using this as a model.
2337    /// \param[in] columnList
2338    /// A list containing the names of the columns that will be passed when calling `Fill`.
2339    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
2340    /// \return the N-dimensional histogram wrapped in a RResultPtr.
2341    ///
2342    /// This action is *lazy*: upon invocation of this method the calculation is
2343    /// booked but not executed. See RResultPtr documentation.
2344    ///
2345    /// ### Example usage:
2346    /// ~~~{.cpp}
2347    /// auto myFilledObj = myDf.HistoNSparseD<float, float, float, float>({"name","title", 4,
2348    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2349    ///                                               {"col0", "col1", "col2", "col3"});
2350    /// ~~~
2351    ///
2352    template <typename FirstColumn, typename... OtherColumns> // need FirstColumn to disambiguate overloads
2353    RResultPtr<::THnSparseD> HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList)
2354    {
2355       std::shared_ptr<::THnSparseD> h(nullptr);
2356       {
2357          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2358          h = model.GetHistogram();
2359 
2360          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2361             h->Sumw2();
2362          } else if (int(columnList.size()) != h->GetNdimensions()) {
2363             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2364          }
2365       }
2366       return CreateAction<RDFInternal::ActionTags::HistoNSparseD, FirstColumn, OtherColumns...>(columnList, h, h,
2367                                                                                                 fProxiedPtr);
2368    }
2369 
2370    ////////////////////////////////////////////////////////////////////////////
2371    /// \brief Fill and return a sparse N-dimensional histogram (*lazy action*).
2372    /// \param[in] model The returned histogram will be constructed using this as a model.
2373    /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2374    ///  (N columns for unweighted filling, or N+1 columns for weighted filling)
2375    /// \return the N-dimensional histogram wrapped in a RResultPtr.
2376    ///
2377    /// This action is *lazy*: upon invocation of this method the calculation is
2378    /// booked but not executed. Also see RResultPtr.
2379    ///
2380    /// ### Example usage:
2381    /// ~~~{.cpp}
2382    /// auto myFilledObj = myDf.HistoNSparseD({"name","title", 4,
2383    ///                                                {40,40,40,40}, {20.,20.,20.,20.}, {60.,60.,60.,60.}},
2384    ///                                               {"col0", "col1", "col2", "col3"});
2385    /// ~~~
2386    ///
2387    RResultPtr<::THnSparseD> HistoNSparseD(const THnSparseDModel &model, const ColumnNames_t &columnList)
2388    {
2389       std::shared_ptr<::THnSparseD> h(nullptr);
2390       {
2391          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2392          h = model.GetHistogram();
2393 
2394          if (int(columnList.size()) == (h->GetNdimensions() + 1)) {
2395             h->Sumw2();
2396          } else if (int(columnList.size()) != h->GetNdimensions()) {
2397             throw std::runtime_error("Wrong number of columns for the specified number of histogram axes.");
2398          }
2399       }
2400       return CreateAction<RDFInternal::ActionTags::HistoNSparseD, RDFDetail::RInferredType>(
2401          columnList, h, h, fProxiedPtr, columnList.size());
2402    }
2403 
2404    ////////////////////////////////////////////////////////////////////////////
2405    /// \brief Fill and return a TGraph object (*lazy action*).
2406    /// \tparam X The type of the column used to fill the x axis.
2407    /// \tparam Y The type of the column used to fill the y axis.
2408    /// \param[in] x The name of the column that will fill the x axis.
2409    /// \param[in] y The name of the column that will fill the y axis.
2410    /// \return the TGraph wrapped in a RResultPtr.
2411    ///
2412    /// Columns can be of a container type (e.g. std::vector<double>), in which case the TGraph
2413    /// is filled with each one of the elements of the container.
2414    /// If Multithreading is enabled, the order in which points are inserted is undefined.
2415    /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
2416    /// A name and a title to the TGraph is given based on the input column names.
2417    ///
2418    /// This action is *lazy*: upon invocation of this method the calculation is
2419    /// booked but not executed. Also see RResultPtr.
2420    ///
2421    /// ### Example usage:
2422    /// ~~~{.cpp}
2423    /// // Deduce column types (this invocation needs jitting internally)
2424    /// auto myGraph1 = myDf.Graph("xValues", "yValues");
2425    /// // Explicit column types
2426    /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
2427    /// ~~~
2428    ///
2429    /// \note Differently from other ROOT interfaces, the returned TGraph is not associated to gDirectory
2430    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2431    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2432    template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType>
2433    RResultPtr<::TGraph> Graph(std::string_view x = "", std::string_view y = "")
2434    {
2435       auto graph = std::make_shared<::TGraph>();
2436       const std::vector<std::string_view> columnViews = {x, y};
2437       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2438                                   ? ColumnNames_t()
2439                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2440 
2441       const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
2442 
2443       // We build a default name and title based on the input columns
2444       const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2445       const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2446       graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2447       graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2448       graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2449 
2450       return CreateAction<RDFInternal::ActionTags::Graph, X, Y>(validatedColumns, graph, graph, fProxiedPtr);
2451    }
2452 
2453    ////////////////////////////////////////////////////////////////////////////
2454    /// \brief Fill and return a TGraphAsymmErrors object (*lazy action*).
2455    /// \param[in] x The name of the column that will fill the x axis.
2456    /// \param[in] y The name of the column that will fill the y axis.
2457    /// \param[in] exl The name of the column of X low errors
2458    /// \param[in] exh The name of the column of X high errors
2459    /// \param[in] eyl The name of the column of Y low errors
2460    /// \param[in] eyh The name of the column of Y high errors
2461    /// \return the TGraphAsymmErrors wrapped in a RResultPtr.
2462    ///
2463    /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
2464    /// is filled with each one of the elements of the container.
2465    /// If Multithreading is enabled, the order in which points are inserted is undefined.
2466    ///
2467    /// This action is *lazy*: upon invocation of this method the calculation is
2468    /// booked but not executed. Also see RResultPtr.
2469    ///
2470    /// ### Example usage:
2471    /// ~~~{.cpp}
2472    /// // Deduce column types (this invocation needs jitting internally)
2473    /// auto myGAE1 = myDf.GraphAsymmErrors("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2474    /// // Explicit column types
2475    /// using f = float
2476    /// auto myGAE2 = myDf.GraphAsymmErrors<f, f, f, f, f, f>("xValues", "yValues", "exl", "exh", "eyl", "eyh");
2477    /// ~~~
2478    ///
2479    /// `GraphAsymmErrors` should also be used for the cases in which values associated only with
2480    /// one of the axes have associated errors. For example, only `ey` exist and `ex` are equal to zero.
2481    /// In such cases, user should do the following:
2482    /// ~~~{.cpp}
2483    /// // Create a column of zeros in RDataFrame
2484    /// auto rdf_withzeros = rdf.Define("zero", "0");
2485    /// // or alternatively:
2486    /// auto rdf_withzeros = rdf.Define("zero", []() -> double { return 0.;});
2487    /// // Create the graph with y errors only
2488    /// auto rdf_errorsOnYOnly = rdf_withzeros.GraphAsymmErrors("xValues", "yValues", "zero", "zero", "eyl", "eyh");
2489    /// ~~~
2490    ///
2491    /// \note Differently from other ROOT interfaces, the returned TGraphAsymmErrors is not associated to gDirectory
2492    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2493    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2494    template <typename X = RDFDetail::RInferredType, typename Y = RDFDetail::RInferredType,
2495              typename EXL = RDFDetail::RInferredType, typename EXH = RDFDetail::RInferredType,
2496              typename EYL = RDFDetail::RInferredType, typename EYH = RDFDetail::RInferredType>
2497    RResultPtr<::TGraphAsymmErrors>
2498    GraphAsymmErrors(std::string_view x = "", std::string_view y = "", std::string_view exl = "",
2499                     std::string_view exh = "", std::string_view eyl = "", std::string_view eyh = "")
2500    {
2501       auto graph = std::make_shared<::TGraphAsymmErrors>();
2502       const std::vector<std::string_view> columnViews = {x, y, exl, exh, eyl, eyh};
2503       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2504                                   ? ColumnNames_t()
2505                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2506 
2507       const auto validatedColumns = GetValidatedColumnNames(6, userColumns);
2508 
2509       // We build a default name and title based on the input columns
2510       const auto g_name = validatedColumns[1] + "_vs_" + validatedColumns[0];
2511       const auto g_title = validatedColumns[1] + " vs " + validatedColumns[0];
2512       graph->SetNameTitle(g_name.c_str(), g_title.c_str());
2513       graph->GetXaxis()->SetTitle(validatedColumns[0].c_str());
2514       graph->GetYaxis()->SetTitle(validatedColumns[1].c_str());
2515 
2516       return CreateAction<RDFInternal::ActionTags::GraphAsymmErrors, X, Y, EXL, EXH, EYL, EYH>(validatedColumns, graph,
2517                                                                                                graph, fProxiedPtr);
2518    }
2519 
2520    ////////////////////////////////////////////////////////////////////////////
2521    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2522    /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2523    /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2524    /// \param[in] model The model to be considered to build the new return value.
2525    /// \param[in] v1Name The name of the column that will fill the x axis.
2526    /// \param[in] v2Name The name of the column that will fill the y axis.
2527    /// \return the monodimensional profile wrapped in a RResultPtr.
2528    ///
2529    /// This action is *lazy*: upon invocation of this method the calculation is
2530    /// booked but not executed. Also see RResultPtr.
2531    ///
2532    /// ### Example usage:
2533    /// ~~~{.cpp}
2534    /// // Deduce column types (this invocation needs jitting internally)
2535    /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2536    /// // Explicit column types
2537    /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
2538    /// ~~~
2539    ///
2540    /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2541    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2542    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2543    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
2544    RResultPtr<::TProfile>
2545    Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
2546    {
2547       std::shared_ptr<::TProfile> h(nullptr);
2548       {
2549          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2550          h = model.GetProfile();
2551       }
2552 
2553       if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2554          throw std::runtime_error("Profiles with no axes limits are not supported yet.");
2555       }
2556       const std::vector<std::string_view> columnViews = {v1Name, v2Name};
2557       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2558                                   ? ColumnNames_t()
2559                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2560       return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h, h, fProxiedPtr);
2561    }
2562 
2563    ////////////////////////////////////////////////////////////////////////////
2564    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2565    /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
2566    /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
2567    /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
2568    /// \param[in] model The model to be considered to build the new return value.
2569    /// \param[in] v1Name The name of the column that will fill the x axis.
2570    /// \param[in] v2Name The name of the column that will fill the y axis.
2571    /// \param[in] wName The name of the column that will provide the weights.
2572    /// \return the monodimensional profile wrapped in a RResultPtr.
2573    ///
2574    /// This action is *lazy*: upon invocation of this method the calculation is
2575    /// booked but not executed. Also see RResultPtr.
2576    ///
2577    /// ### Example usage:
2578    /// ~~~{.cpp}
2579    /// // Deduce column types (this invocation needs jitting internally)
2580    /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
2581    /// // Explicit column types
2582    /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
2583    ///                                                   "xValues", "yValues", "weight");
2584    /// ~~~
2585    ///
2586    /// See the first Profile1D() overload for more details.
2587    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2588              typename W = RDFDetail::RInferredType>
2589    RResultPtr<::TProfile>
2590    Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
2591    {
2592       std::shared_ptr<::TProfile> h(nullptr);
2593       {
2594          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2595          h = model.GetProfile();
2596       }
2597 
2598       if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
2599          throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
2600       }
2601       const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
2602       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2603                                   ? ColumnNames_t()
2604                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2605       return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h, h, fProxiedPtr);
2606    }
2607 
2608    ////////////////////////////////////////////////////////////////////////////
2609    /// \brief Fill and return a one-dimensional profile (*lazy action*).
2610    /// See the first Profile1D() overload for more details.
2611    template <typename V1, typename V2, typename W>
2612    RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model)
2613    {
2614       return Profile1D<V1, V2, W>(model, "", "", "");
2615    }
2616 
2617    ////////////////////////////////////////////////////////////////////////////
2618    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2619    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2620    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2621    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2622    /// \param[in] model The returned profile will be constructed using this as a model.
2623    /// \param[in] v1Name The name of the column that will fill the x axis.
2624    /// \param[in] v2Name The name of the column that will fill the y axis.
2625    /// \param[in] v3Name The name of the column that will fill the z axis.
2626    /// \return the bidimensional profile wrapped in a RResultPtr.
2627    ///
2628    /// This action is *lazy*: upon invocation of this method the calculation is
2629    /// booked but not executed. Also see RResultPtr.
2630    ///
2631    /// ### Example usage:
2632    /// ~~~{.cpp}
2633    /// // Deduce column types (this invocation needs jitting internally)
2634    /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2635    ///                               "xValues", "yValues", "zValues");
2636    /// // Explicit column types
2637    /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2638    ///                                                   "xValues", "yValues", "zValues");
2639    /// ~~~
2640    ///
2641    /// \note Differently from other ROOT interfaces, the returned profile is not associated to gDirectory
2642    /// and the caller is responsible for its lifetime (in particular, a typical source of confusion is that
2643    /// if result histograms go out of scope before the end of the program, ROOT might display a blank canvas).
2644    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2645              typename V3 = RDFDetail::RInferredType>
2646    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
2647                                       std::string_view v2Name = "", std::string_view v3Name = "")
2648    {
2649       std::shared_ptr<::TProfile2D> h(nullptr);
2650       {
2651          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2652          h = model.GetProfile();
2653       }
2654 
2655       if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2656          throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2657       }
2658       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
2659       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2660                                   ? ColumnNames_t()
2661                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2662       return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h, h, fProxiedPtr);
2663    }
2664 
2665    ////////////////////////////////////////////////////////////////////////////
2666    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2667    /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
2668    /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
2669    /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
2670    /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
2671    /// \param[in] model The returned histogram will be constructed using this as a model.
2672    /// \param[in] v1Name The name of the column that will fill the x axis.
2673    /// \param[in] v2Name The name of the column that will fill the y axis.
2674    /// \param[in] v3Name The name of the column that will fill the z axis.
2675    /// \param[in] wName The name of the column that will provide the weights.
2676    /// \return the bidimensional profile wrapped in a RResultPtr.
2677    ///
2678    /// This action is *lazy*: upon invocation of this method the calculation is
2679    /// booked but not executed. Also see RResultPtr.
2680    ///
2681    /// ### Example usage:
2682    /// ~~~{.cpp}
2683    /// // Deduce column types (this invocation needs jitting internally)
2684    /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2685    ///                               "xValues", "yValues", "zValues", "weight");
2686    /// // Explicit column types
2687    /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
2688    ///                                                        "xValues", "yValues", "zValues", "weight");
2689    /// ~~~
2690    ///
2691    /// See the first Profile2D() overload for more details.
2692    template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
2693              typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2694    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
2695                                       std::string_view v3Name, std::string_view wName)
2696    {
2697       std::shared_ptr<::TProfile2D> h(nullptr);
2698       {
2699          ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
2700          h = model.GetProfile();
2701       }
2702 
2703       if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
2704          throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
2705       }
2706       const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
2707       const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
2708                                   ? ColumnNames_t()
2709                                   : ColumnNames_t(columnViews.begin(), columnViews.end());
2710       return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h, h, fProxiedPtr);
2711    }
2712 
2713    /// \brief Fill and return a two-dimensional profile (*lazy action*).
2714    /// See the first Profile2D() overload for more details.
2715    template <typename V1, typename V2, typename V3, typename W>
2716    RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model)
2717    {
2718       return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
2719    }
2720 
2721    ////////////////////////////////////////////////////////////////////////////
2722    /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*).
2723    ///
2724    /// Type T must provide at least:
2725    /// - a copy-constructor
2726    /// - a `Fill` method that accepts as many arguments and with same types as the column names passed as columnList
2727    ///   (these types can also be passed as template parameters to this method)
2728    /// - a `Merge` method with signature `Merge(TCollection *)` or `Merge(const std::vector<T *>&)` that merges the
2729    ///   objects passed as argument into the object on which `Merge` was called (an analogous of TH1::Merge). Note that
2730    ///   if the signature that takes a `TCollection*` is used, then T must inherit from TObject (to allow insertion in
2731    ///   the TCollection*).
2732    ///
2733    /// \tparam FirstColumn The first type of the column the values of which are used to fill the object. Inferred together with OtherColumns if not present.
2734    /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
2735    /// \tparam T The type of the object to fill. Automatically deduced.
2736    /// \param[in] model The model to be considered to build the new return value.
2737    /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
2738    /// \return the filled object wrapped in a RResultPtr.
2739    ///
2740    /// The user gives up ownership of the model object.
2741    /// The list of column names to be used for filling must always be specified.
2742    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
2743    /// Also see RResultPtr.
2744    ///
2745    /// ### Example usage:
2746    /// ~~~{.cpp}
2747    /// MyClass obj;
2748    /// // Deduce column types (this invocation needs jitting internally, and in this case
2749    /// // MyClass needs to be known to the interpreter)
2750    /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
2751    /// // explicit column types
2752    /// auto myFilledObj = myDf.Fill<float, float>(obj, {"col0", "col1"});
2753    /// ~~~
2754    ///
2755    template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename T>
2756    RResultPtr<std::decay_t<T>> Fill(T &&model, const ColumnNames_t &columnList)
2757    {
2758       auto h = std::make_shared<std::decay_t<T>>(std::forward<T>(model));
2759       if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
2760          throw std::runtime_error("The absence of axes limits is not supported yet.");
2761       }
2762       return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h, h, fProxiedPtr,
2763                                                                                        columnList.size());
2764    }
2765 
2766    ////////////////////////////////////////////////////////////////////////////
2767    /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2768    ///
2769    /// \tparam V The type of the value column
2770    /// \param[in] value The name of the column with the values to fill the statistics with.
2771    /// \return the filled TStatistic object wrapped in a RResultPtr.
2772    ///
2773    /// ### Example usage:
2774    /// ~~~{.cpp}
2775    /// // Deduce column type (this invocation needs jitting internally)
2776    /// auto stats0 = myDf.Stats("values");
2777    /// // Explicit column type
2778    /// auto stats1 = myDf.Stats<float>("values");
2779    /// ~~~
2780    ///
2781    template <typename V = RDFDetail::RInferredType>
2782    RResultPtr<TStatistic> Stats(std::string_view value = "")
2783    {
2784       ColumnNames_t columns;
2785       if (!value.empty()) {
2786          columns.emplace_back(std::string(value));
2787       }
2788       const auto validColumnNames = GetValidatedColumnNames(1, columns);
2789       if (std::is_same<V, RDFDetail::RInferredType>::value) {
2790          return Fill(TStatistic(), validColumnNames);
2791       } else {
2792          return Fill<V>(TStatistic(), validColumnNames);
2793       }
2794    }
2795 
2796    ////////////////////////////////////////////////////////////////////////////
2797    /// \brief Return a TStatistic object, filled once per event (*lazy action*).
2798    ///
2799    /// \tparam V The type of the value column
2800    /// \tparam W The type of the weight column
2801    /// \param[in] value The name of the column with the values to fill the statistics with.
2802    /// \param[in] weight The name of the column with the weights to fill the statistics with.
2803    /// \return the filled TStatistic object wrapped in a RResultPtr.
2804    ///
2805    /// ### Example usage:
2806    /// ~~~{.cpp}
2807    /// // Deduce column types (this invocation needs jitting internally)
2808    /// auto stats0 = myDf.Stats("values", "weights");
2809    /// // Explicit column types
2810    /// auto stats1 = myDf.Stats<int, float>("values", "weights");
2811    /// ~~~
2812    ///
2813    template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
2814    RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
2815    {
2816       ColumnNames_t columns{std::string(value), std::string(weight)};
2817       constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
2818       constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
2819       const auto validColumnNames = GetValidatedColumnNames(2, columns);
2820       // We have 3 cases:
2821       // 1. Both types are inferred: we use Fill and let the jit kick in.
2822       // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
2823       // 3. Both types are explicit: we invoke the fully compiled Fill method.
2824       if (vIsInferred && wIsInferred) {
2825          return Fill(TStatistic(), validColumnNames);
2826       } else if (vIsInferred != wIsInferred) {
2827          std::string error("The ");
2828          error += vIsInferred ? "value " : "weight ";
2829          error += "column type is explicit, while the ";
2830          error += vIsInferred ? "weight " : "value ";
2831          error += " is specified to be inferred. This case is not supported: please specify both types or none.";
2832          throw std::runtime_error(error);
2833       } else {
2834          return Fill<V, W>(TStatistic(), validColumnNames);
2835       }
2836    }
2837 
2838    ////////////////////////////////////////////////////////////////////////////
2839    /// \brief Return the minimum of processed column values (*lazy action*).
2840    /// \tparam T The type of the branch/column.
2841    /// \param[in] columnName The name of the branch/column to be treated.
2842    /// \return the minimum value of the selected column wrapped in a RResultPtr.
2843    ///
2844    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2845    /// template specialization of this method.
2846    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2847    ///
2848    /// This action is *lazy*: upon invocation of this method the calculation is
2849    /// booked but not executed. Also see RResultPtr.
2850    ///
2851    /// ### Example usage:
2852    /// ~~~{.cpp}
2853    /// // Deduce column type (this invocation needs jitting internally)
2854    /// auto minVal0 = myDf.Min("values");
2855    /// // Explicit column type
2856    /// auto minVal1 = myDf.Min<double>("values");
2857    /// ~~~
2858    ///
2859    template <typename T = RDFDetail::RInferredType>
2860    RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
2861    {
2862       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2863       using RetType_t = RDFDetail::MinReturnType_t<T>;
2864       auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
2865       return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV, minV, fProxiedPtr);
2866    }
2867 
2868    ////////////////////////////////////////////////////////////////////////////
2869    /// \brief Return the maximum of processed column values (*lazy action*).
2870    /// \tparam T The type of the branch/column.
2871    /// \param[in] columnName The name of the branch/column to be treated.
2872    /// \return the maximum value of the selected column wrapped in a RResultPtr.
2873    ///
2874    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2875    /// template specialization of this method.
2876    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2877    ///
2878    /// This action is *lazy*: upon invocation of this method the calculation is
2879    /// booked but not executed. Also see RResultPtr.
2880    ///
2881    /// ### Example usage:
2882    /// ~~~{.cpp}
2883    /// // Deduce column type (this invocation needs jitting internally)
2884    /// auto maxVal0 = myDf.Max("values");
2885    /// // Explicit column type
2886    /// auto maxVal1 = myDf.Max<double>("values");
2887    /// ~~~
2888    ///
2889    template <typename T = RDFDetail::RInferredType>
2890    RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
2891    {
2892       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2893       using RetType_t = RDFDetail::MaxReturnType_t<T>;
2894       auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
2895       return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV, maxV, fProxiedPtr);
2896    }
2897 
2898    ////////////////////////////////////////////////////////////////////////////
2899    /// \brief Return the mean of processed column values (*lazy action*).
2900    /// \tparam T The type of the branch/column.
2901    /// \param[in] columnName The name of the branch/column to be treated.
2902    /// \return the mean value of the selected column wrapped in a RResultPtr.
2903    ///
2904    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2905    /// template specialization of this method.
2906    /// Note that internally, the summations are executed with Kahan sums in double precision, irrespective
2907    /// of the type of column that is read.
2908    ///
2909    /// This action is *lazy*: upon invocation of this method the calculation is
2910    /// booked but not executed. Also see RResultPtr.
2911    ///
2912    /// ### Example usage:
2913    /// ~~~{.cpp}
2914    /// // Deduce column type (this invocation needs jitting internally)
2915    /// auto meanVal0 = myDf.Mean("values");
2916    /// // Explicit column type
2917    /// auto meanVal1 = myDf.Mean<double>("values");
2918    /// ~~~
2919    ///
2920    template <typename T = RDFDetail::RInferredType>
2921    RResultPtr<double> Mean(std::string_view columnName = "")
2922    {
2923       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2924       auto meanV = std::make_shared<double>(0);
2925       return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV, meanV, fProxiedPtr);
2926    }
2927 
2928    ////////////////////////////////////////////////////////////////////////////
2929    /// \brief Return the unbiased standard deviation of processed column values (*lazy action*).
2930    /// \tparam T The type of the branch/column.
2931    /// \param[in] columnName The name of the branch/column to be treated.
2932    /// \return the standard deviation value of the selected column wrapped in a RResultPtr.
2933    ///
2934    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2935    /// template specialization of this method.
2936    ///
2937    /// This action is *lazy*: upon invocation of this method the calculation is
2938    /// booked but not executed. Also see RResultPtr.
2939    ///
2940    /// ### Example usage:
2941    /// ~~~{.cpp}
2942    /// // Deduce column type (this invocation needs jitting internally)
2943    /// auto stdDev0 = myDf.StdDev("values");
2944    /// // Explicit column type
2945    /// auto stdDev1 = myDf.StdDev<double>("values");
2946    /// ~~~
2947    ///
2948    template <typename T = RDFDetail::RInferredType>
2949    RResultPtr<double> StdDev(std::string_view columnName = "")
2950    {
2951       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2952       auto stdDeviationV = std::make_shared<double>(0);
2953       return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV, stdDeviationV, fProxiedPtr);
2954    }
2955 
2956    // clang-format off
2957    ////////////////////////////////////////////////////////////////////////////
2958    /// \brief Return the sum of processed column values (*lazy action*).
2959    /// \tparam T The type of the branch/column.
2960    /// \param[in] columnName The name of the branch/column.
2961    /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
2962    /// \return the sum of the selected column wrapped in a RResultPtr.
2963    ///
2964    /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
2965    /// template specialization of this method.
2966    /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
2967    ///
2968    /// This action is *lazy*: upon invocation of this method the calculation is
2969    /// booked but not executed. Also see RResultPtr.
2970    ///
2971    /// ### Example usage:
2972    /// ~~~{.cpp}
2973    /// // Deduce column type (this invocation needs jitting internally)
2974    /// auto sum0 = myDf.Sum("values");
2975    /// // Explicit column type
2976    /// auto sum1 = myDf.Sum<double>("values");
2977    /// ~~~
2978    ///
2979    template <typename T = RDFDetail::RInferredType>
2980    RResultPtr<RDFDetail::SumReturnType_t<T>>
2981    Sum(std::string_view columnName = "",
2982        const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
2983    {
2984       const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2985       auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
2986       return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV, sumV, fProxiedPtr);
2987    }
2988    // clang-format on
2989 
2990    ////////////////////////////////////////////////////////////////////////////
2991    /// \brief Gather filtering statistics.
2992    /// \return the resulting `RCutFlowReport` instance wrapped in a RResultPtr.
2993    ///
2994    /// Calling `Report` on the main `RDataFrame` object gathers stats for
2995    /// all named filters in the call graph. Calling this method on a
2996    /// stored chain state (i.e. a graph node different from the first) gathers
2997    /// the stats for all named filters in the chain section between the original
2998    /// `RDataFrame` and that node (included). Stats are gathered in the same
2999    /// order as the named filters have been added to the graph.
3000    /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
3001    /// effects cuts had.
3002    ///
3003    /// This action is *lazy*: upon invocation of
3004    /// this method the calculation is booked but not executed. See RResultPtr
3005    /// documentation.
3006    ///
3007    /// ### Example usage:
3008    /// ~~~{.cpp}
3009    /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
3010    /// auto cutReport = filtered3.Report();
3011    /// cutReport->Print();
3012    /// ~~~
3013    ///
3014    RResultPtr<RCutFlowReport> Report()
3015    {
3016       bool returnEmptyReport = false;
3017       // if this is a RInterface<RLoopManager> on which `Define` has been called, users
3018       // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
3019       // certainly does not contain named filters.
3020       // The number 4 takes into account the implicit columns for entry and slot number
3021       // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
3022       if (std::is_same<Proxied, RLoopManager>::value && fColRegister.GenerateColumnNames().size() > 4)
3023          returnEmptyReport = true;
3024 
3025       auto rep = std::make_shared<RCutFlowReport>();
3026       using Helper_t = RDFInternal::ReportHelper<Proxied>;
3027       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
3028 
3029       auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr.get(), returnEmptyReport), ColumnNames_t({}),
3030                                                fProxiedPtr, RDFInternal::RColumnRegister(fColRegister));
3031 
3032       return MakeResultPtr(rep, *fLoopManager, std::move(action));
3033    }
3034 
3035    /// \brief Returns the names of the filters created.
3036    /// \return the container of filters names.
3037    ///
3038    /// If called on a root node, all the filters in the computation graph will
3039    /// be printed. For any other node, only the filters upstream of that node.
3040    /// Filters without a name are printed as "Unnamed Filter"
3041    /// This is not an action nor a transformation, just a query to the RDataFrame object.
3042    ///
3043    /// ### Example usage:
3044    /// ~~~{.cpp}
3045    /// auto filtNames = d.GetFilterNames();
3046    /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
3047    /// ~~~
3048    ///
3049    std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
3050 
3051    // clang-format off
3052    ////////////////////////////////////////////////////////////////////////////
3053    /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
3054    /// \tparam F The type of the aggregator callable. Automatically deduced.
3055    /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
3056    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
3057    /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
3058    /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
3059    /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
3060    /// \param[in] aggIdentity The aggregator variable of each thread is initialized to this value (or is default-constructed if the parameter is omitted)
3061    /// \return the result of the aggregation wrapped in a RResultPtr.
3062    ///
3063    /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
3064    /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
3065    /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
3066    /// the value of the column columnName.
3067    /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
3068    /// Otherwise the signature of aggregator must be `void(U&,T)`.
3069    ///
3070    /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
3071    /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
3072    /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
3073    ///
3074    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
3075    ///
3076    /// Example usage:
3077    /// ~~~{.cpp}
3078    /// auto aggregator = [](double acc, double x) { return acc * x; };
3079    /// ROOT::EnableImplicitMT();
3080    /// // If multithread is enabled, the aggregator function will be called by more threads
3081    /// // and will produce a vector of partial accumulators.
3082    /// // The merger function performs the final aggregation of these partial results.
3083    /// auto merger = [](std::vector<double> &accumulators) {
3084    ///    for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
3085    ///       accumulators[0] *= accumulators[i];
3086    ///    }
3087    /// };
3088    ///
3089    /// // The accumulator is initialized at this value by every thread.
3090    /// double initValue = 1.;
3091    ///
3092    /// // Multiplies all elements of the column "x"
3093    /// auto result = d.Aggregate(aggregator, merger, "x", initValue);
3094    /// ~~~
3095    // clang-format on
3096    template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
3097              typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
3098              typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
3099              typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
3100              typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
3101    RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
3102    {
3103       RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
3104       const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
3105 
3106       const auto validColumnNames = GetValidatedColumnNames(1, columns);
3107       CheckAndFillDSColumns(validColumnNames, TTraits::TypeList<T>());
3108 
3109       auto accObjPtr = std::make_shared<U>(aggIdentity);
3110       using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
3111       using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
3112       auto action = std::make_unique<Action_t>(
3113          Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
3114          fProxiedPtr, fColRegister);
3115       return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
3116    }
3117 
3118    // clang-format off
3119    ////////////////////////////////////////////////////////////////////////////
3120    /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot.
3121    /// \tparam F The type of the aggregator callable. Automatically deduced.
3122    /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
3123    /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
3124    /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
3125    /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
3126    /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
3127    /// \return the result of the aggregation wrapped in a RResultPtr.
3128    ///
3129    /// See previous Aggregate overload for more information.
3130    // clang-format on
3131    template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
3132              typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
3133              typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
3134              typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
3135    RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
3136    {
3137       static_assert(
3138          std::is_default_constructible<U>::value,
3139          "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
3140       return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
3141    }
3142 
3143    // clang-format off
3144    ////////////////////////////////////////////////////////////////////////////
3145    /// \brief Book execution of a custom action using a user-defined helper object.
3146    /// \tparam FirstColumn The type of the first column used by this action.  Inferred together with OtherColumns if not present.
3147    /// \tparam OtherColumns A list of the types of the other columns used by this action
3148    /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
3149    /// \param[in] helper The Action Helper to be scheduled.
3150    /// \param[in] columns The names of the columns on which the helper acts.
3151    /// \return the result of the helper wrapped in a RResultPtr.
3152    ///
3153    /// This method books a custom action for execution. The behavior of the action is completely dependent on the
3154    /// Helper object provided by the caller. The required interface for the helper is described below (more
3155    /// methods that the ones required can be present, e.g. a constructor that takes the number of worker threads is usually useful):
3156    ///
3157    /// ### Mandatory interface
3158    ///
3159    /// * `Helper` must publicly inherit from `ROOT::Detail::RDF::RActionImpl<Helper>`
3160    /// * `Helper::Result_t`: public alias for the type of the result of this action helper. `Result_t` must be default-constructible.
3161    /// * `Helper(Helper &&)`: a move-constructor is required. Copy-constructors are discouraged.
3162    /// * `std::shared_ptr<Result_t> GetResultPtr() const`: return a shared_ptr to the result of this action (of type
3163    ///   Result_t). The RResultPtr returned by Book will point to this object. Note that this method can be called
3164    ///   _before_ Initialize(), because the RResultPtr is constructed before the event loop is started.
3165    /// * `void Initialize()`: this method is called once before starting the event-loop. Useful for setup operations.
3166    ///   It must reset the state of the helper to the expected state at the beginning of the event loop: the same helper,
3167    ///   or copies of it, might be used for multiple event loops (e.g. in the presence of systematic variations).
3168    /// * `void InitTask(TTreeReader *, unsigned int slot)`: each working thread shall call this method during the event
3169    ///   loop, before processing a batch of entries. The pointer passed as argument, if not null, will point to the TTreeReader
3170    ///   that RDataFrame has set up to read the task's batch of entries. It is passed to the helper to allow certain advanced optimizations
3171    ///   it should not usually serve any purpose for the Helper. This method is often no-op for simple helpers.
3172    /// * `void Exec(unsigned int slot, ColumnTypes...columnValues)`: each working thread shall call this method
3173    ///   during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
3174    ///   this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
3175    ///   the requested columns for the particular entry being processed.
3176    /// * `void Finalize()`: this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
3177    /// * `std::string GetActionName()`: it returns a string identifier for this type of action that RDataFrame will use in
3178    ///    diagnostics, SaveGraph(), etc.
3179    ///
3180    /// ### Optional methods
3181    ///
3182    /// If these methods are implemented they enable extra functionality as per the description below.
3183    ///
3184    /// * `Result_t &PartialUpdate(unsigned int slot)`: if present, it must return the value of the partial result of this action for the given 'slot'.
3185    ///   Different threads might call this method concurrently, but will do so with different 'slot' numbers.
3186    ///   RDataFrame leverages this method to implement RResultPtr::OnPartialResult().
3187    /// * `ROOT::RDF::SampleCallback_t GetSampleCallback()`: if present, it must return a callable with the
3188    ///   appropriate signature (see ROOT::RDF::SampleCallback_t) that will be invoked at the beginning of the processing
3189    ///   of every sample, as in DefinePerSample().
3190    /// * `Helper MakeNew(void *newResult, std::string_view variation = "nominal")`: if implemented, it enables varying
3191    ///   the action's result with VariationsFor(). It takes a type-erased new result that can be safely cast to a
3192    ///   `std::shared_ptr<Result_t> *` (a pointer to shared pointer) and should be used as the action's output result.
3193    ///   The function optionally takes the name of the current variation which could be useful in customizing its behaviour.
3194    ///
3195    /// In case Book is called without specifying column types as template arguments, corresponding typed code will be just-in-time compiled
3196    /// by RDataFrame. In that case the Helper class needs to be known to the ROOT interpreter.
3197    ///
3198    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see RResultPtr.
3199    ///
3200    /// ### Examples
3201    /// See [this tutorial](https://root.cern/doc/master/df018__customActions_8C.html) for an example implementation of an action helper.
3202    ///
3203    /// It is also possible to inspect the code used by built-in RDataFrame actions at ActionHelpers.hxx.
3204    ///
3205    // clang-format on
3206    template <typename FirstColumn = RDFDetail::RInferredType, typename... OtherColumns, typename Helper>
3207    RResultPtr<typename std::decay_t<Helper>::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
3208    {
3209       using HelperT = std::decay_t<Helper>;
3210       // TODO add more static sanity checks on Helper
3211       using AH = RDFDetail::RActionImpl<HelperT>;
3212       static_assert(std::is_base_of<AH, HelperT>::value && std::is_convertible<HelperT *, AH *>::value,
3213                     "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
3214 
3215       auto hPtr = std::make_shared<HelperT>(std::forward<Helper>(helper));
3216       auto resPtr = hPtr->GetResultPtr();
3217 
3218       if (std::is_same<FirstColumn, RDFDetail::RInferredType>::value && columns.empty()) {
3219          return CallCreateActionWithoutColsIfPossible<HelperT>(resPtr, hPtr, TTraits::TypeList<FirstColumn>{});
3220       } else {
3221          return CreateAction<RDFInternal::ActionTags::Book, FirstColumn, OtherColumns...>(columns, resPtr, hPtr,
3222                                                                                           fProxiedPtr, columns.size());
3223       }
3224    }
3225 
3226    ////////////////////////////////////////////////////////////////////////////
3227    /// \brief Provides a representation of the columns in the dataset.
3228    /// \tparam ColumnTypes variadic list of branch/column types.
3229    /// \param[in] columnList Names of the columns to be displayed.
3230    /// \param[in] nRows Number of events for each column to be displayed.
3231    /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3232    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3233    ///
3234    /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
3235    /// form. RDisplay will either print on the standard output a summarized version through `RDisplay::Print()` or will
3236    /// return a complete version through `RDisplay::AsString()`.
3237    ///
3238    /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. Also see
3239    /// RResultPtr.
3240    ///
3241    /// Example usage:
3242    /// ~~~{.cpp}
3243    /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
3244    /// auto d1 = rdf.Display("");
3245    /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
3246    /// auto d2 = d.Display({"x", "y"}, 128);
3247    /// // Printing the short representations, the event loop will run
3248    /// d1->Print();
3249    /// d2->Print();
3250    /// ~~~
3251    template <typename... ColumnTypes>
3252    RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3253    {
3254       CheckIMTDisabled("Display");
3255       auto newCols = columnList;
3256       newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3257       auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3258       using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3259       // Need to add ULong64_t type corresponding to the first column rdfentry_
3260       return CreateAction<RDFInternal::ActionTags::Display, ULong64_t, ColumnTypes...>(
3261          std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr);
3262    }
3263 
3264    ////////////////////////////////////////////////////////////////////////////
3265    /// \brief Provides a representation of the columns in the dataset.
3266    /// \param[in] columnList Names of the columns to be displayed.
3267    /// \param[in] nRows Number of events for each column to be displayed.
3268    /// \param[in] nMaxCollectionElements  Maximum number of collection elements to display per row.
3269    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3270    ///
3271    /// This overload automatically infers the column types.
3272    /// See the previous overloads for further details.
3273    ///
3274    /// Invoked when no types are specified to Display
3275    RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3276    {
3277       CheckIMTDisabled("Display");
3278       auto newCols = columnList;
3279       newCols.insert(newCols.begin(), "rdfentry_"); // Artificially insert first column
3280       auto displayer = std::make_shared<RDisplay>(newCols, GetColumnTypeNamesList(newCols), nMaxCollectionElements);
3281       using displayHelperArgs_t = std::pair<size_t, std::shared_ptr<RDisplay>>;
3282       return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(
3283          std::move(newCols), displayer, std::make_shared<displayHelperArgs_t>(nRows, displayer), fProxiedPtr,
3284          columnList.size() + 1);
3285    }
3286 
3287    ////////////////////////////////////////////////////////////////////////////
3288    /// \brief Provides a representation of the columns in the dataset.
3289    /// \param[in] columnNameRegexp A regular expression to select the columns.
3290    /// \param[in] nRows Number of events for each column to be displayed.
3291    /// \param[in] nMaxCollectionElements Maximum number of collection elements to display per row.
3292    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3293    ///
3294    /// The existing columns are matched against the regular expression. If the string provided
3295    /// is empty, all columns are selected.
3296    /// See the previous overloads for further details.
3297    RResultPtr<RDisplay>
3298    Display(std::string_view columnNameRegexp = "", size_t nRows = 5, size_t nMaxCollectionElements = 10)
3299    {
3300       const auto columnNames = GetColumnNames();
3301       const auto selectedColumns = RDFInternal::ConvertRegexToColumns(columnNames, columnNameRegexp, "Display");
3302       return Display(selectedColumns, nRows, nMaxCollectionElements);
3303    }
3304 
3305    ////////////////////////////////////////////////////////////////////////////
3306    /// \brief Provides a representation of the columns in the dataset.
3307    /// \param[in] columnList Names of the columns to be displayed.
3308    /// \param[in] nRows Number of events for each column to be displayed.
3309    /// \param[in] nMaxCollectionElements Number of maximum elements in collection.
3310    /// \return the `RDisplay` instance wrapped in a RResultPtr.
3311    ///
3312    /// See the previous overloads for further details.
3313    RResultPtr<RDisplay>
3314    Display(std::initializer_list<std::string> columnList, size_t nRows = 5, size_t nMaxCollectionElements = 10)
3315    {
3316       ColumnNames_t selectedColumns(columnList);
3317       return Display(selectedColumns, nRows, nMaxCollectionElements);
3318    }
3319 
3320 private:
3321    template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
3322    std::enable_if_t<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>
3323    DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns, const std::string &where)
3324    {
3325       if (where.compare(0, 8, "Redefine") != 0) { // not a Redefine
3326          RDFInternal::CheckValidCppVarName(name, where);
3327          RDFInternal::CheckForRedefinition(where, name, fColRegister,
3328                                            GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3329       } else {
3330          RDFInternal::CheckForDefinition(where, name, fColRegister,
3331                                          GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3332          RDFInternal::CheckForNoVariations(where, name, fColRegister);
3333       }
3334 
3335       using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
3336       using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
3337          std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::Slot>::value, ArgTypes_t>::type;
3338       using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
3339          std::is_same<DefineType, RDFDetail::ExtraArgsForDefine::SlotAndEntry>::value, ColTypesTmp_t>::type;
3340 
3341       constexpr auto nColumns = ColTypes_t::list_size;
3342 
3343       const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
3344       CheckAndFillDSColumns(validColumnNames, ColTypes_t());
3345 
3346       // Declare return type to the interpreter, for future use by jitted actions
3347       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3348       if (retTypeName.empty()) {
3349          // The type is not known to the interpreter.
3350          // We must not error out here, but if/when this column is used in jitted code
3351          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3352          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3353       }
3354 
3355       using NewCol_t = RDFDetail::RDefine<F, DefineType>;
3356       auto newColumn = std::make_shared<NewCol_t>(name, retTypeName, std::forward<F>(expression), validColumnNames,
3357                                                   fColRegister, *fLoopManager);
3358 
3359       RDFInternal::RColumnRegister newCols(fColRegister);
3360       newCols.AddDefine(std::move(newColumn));
3361 
3362       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3363 
3364       return newInterface;
3365    }
3366 
3367    // This overload is chosen when the callable passed to Define or DefineSlot returns void.
3368    // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
3369    // this way compilation of `Define` has no way to continue after throwing the error.
3370    template <typename F, typename DefineType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
3371              bool IsFStringConv = std::is_convertible<F, std::string>::value,
3372              bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
3373    std::enable_if_t<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>
3374    DefineImpl(std::string_view, F, const ColumnNames_t &, const std::string &)
3375    {
3376       static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
3377                     "Error in `Define`: type returned by expression is not default-constructible");
3378       return *this; // never reached
3379    }
3380 
3381    ////////////////////////////////////////////////////////////////////////////
3382    /// \brief Implementation of cache.
3383    template <typename... ColTypes, std::size_t... S>
3384    RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...>)
3385    {
3386       const auto columnListWithoutSizeColumns = RDFInternal::FilterArraySizeColNames(columnList, "Snapshot");
3387 
3388       // Check at compile time that the columns types are copy constructible
3389       constexpr bool areCopyConstructible =
3390          RDFInternal::TEvalAnd<std::is_copy_constructible<ColTypes>::value...>::value;
3391       static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
3392 
3393       RDFInternal::CheckTypesAndPars(sizeof...(ColTypes), columnListWithoutSizeColumns.size());
3394 
3395       auto colHolders = std::make_tuple(Take<ColTypes>(columnListWithoutSizeColumns[S])...);
3396       auto ds = std::make_unique<RLazyDS<ColTypes...>>(
3397          std::make_pair(columnListWithoutSizeColumns[S], std::get<S>(colHolders))...);
3398 
3399       RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnListWithoutSizeColumns));
3400 
3401       return cachedRDF;
3402    }
3403 
3404    template <bool IsSingleColumn, typename F>
3405    RInterface<Proxied, DS_t>
3406    VaryImpl(const std::vector<std::string> &colNames, F &&expression, const ColumnNames_t &inputColumns,
3407             const std::vector<std::string> &variationTags, std::string_view variationName)
3408    {
3409       using F_t = std::decay_t<F>;
3410       using ColTypes_t = typename TTraits::CallableTraits<F_t>::arg_types;
3411       using RetType = typename TTraits::CallableTraits<F_t>::ret_type;
3412       constexpr auto nColumns = ColTypes_t::list_size;
3413 
3414       SanityChecksForVary<RetType>(colNames, variationTags, variationName);
3415 
3416       const auto validColumnNames = GetValidatedColumnNames(nColumns, inputColumns);
3417       CheckAndFillDSColumns(validColumnNames, ColTypes_t{});
3418 
3419       auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
3420       if (retTypeName.empty()) {
3421          // The type is not known to the interpreter, but we don't want to error out
3422          // here, rather if/when this column is used in jitted code, so we inject a broken but telling type name.
3423          const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
3424          retTypeName = "CLING_UNKNOWN_TYPE_" + demangledType;
3425       }
3426 
3427       auto variation = std::make_shared<RDFInternal::RVariation<F_t, IsSingleColumn>>(
3428          colNames, variationName, std::forward<F>(expression), variationTags, retTypeName, fColRegister, *fLoopManager,
3429          validColumnNames);
3430 
3431       RDFInternal::RColumnRegister newCols(fColRegister);
3432       newCols.AddVariation(std::move(variation));
3433 
3434       RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols));
3435 
3436       return newInterface;
3437    }
3438 
3439    RInterface<Proxied, DS_t> JittedVaryImpl(const std::vector<std::string> &colNames, std::string_view expression,
3440                                             const std::vector<std::string> &variationTags,
3441                                             std::string_view variationName, bool isSingleColumn)
3442    {
3443       R__ASSERT(!variationTags.empty() && "Must have at least one variation.");
3444       R__ASSERT(!colNames.empty() && "Must have at least one varied column.");
3445       R__ASSERT(!variationName.empty() && "Must provide a variation name.");
3446 
3447       for (auto &colName : colNames) {
3448          RDFInternal::CheckValidCppVarName(colName, "Vary");
3449          RDFInternal::CheckForDefinition("Vary", colName, fColRegister,
3450                                          GetDataSource() ? GetDataSource()->GetColumnNames() : ColumnNames_t{});
3451       }
3452       RDFInternal::CheckValidCppVarName(variationName, "Vary");
3453 
3454       // when varying multiple columns, they must be different columns
3455       if (colNames.size() > 1) {
3456          std::set<std::string> uniqueCols(colNames.begin(), colNames.end());
3457          if (uniqueCols.size() != colNames.size())
3458             throw std::logic_error("A column name was passed to the same Vary invocation multiple times.");
3459       }
3460 
3461       auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
3462       auto jittedVariation =
3463          RDFInternal::BookVariationJit(colNames, variationName, variationTags, expression, *fLoopManager,
3464                                        GetDataSource(), fColRegister, upcastNodeOnHeap, isSingleColumn);
3465 
3466       RDFInternal::RColumnRegister newColRegister(fColRegister);
3467       newColRegister.AddVariation(std::move(jittedVariation));
3468 
3469       RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newColRegister));
3470 
3471       return newInterface;
3472    }
3473 
3474    template <typename Helper, typename ActionResultType>
3475    auto CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &resPtr,
3476                                               const std::shared_ptr<Helper> &hPtr,
3477                                               TTraits::TypeList<RDFDetail::RInferredType>)
3478       -> decltype(hPtr->Exec(0u), RResultPtr<ActionResultType>{})
3479    {
3480       return CreateAction<RDFInternal::ActionTags::Book>(/*columns=*/{}, resPtr, hPtr, fProxiedPtr, 0u);
3481    }
3482 
3483    template <typename Helper, typename ActionResultType, typename... Others>
3484    RResultPtr<ActionResultType>
3485    CallCreateActionWithoutColsIfPossible(const std::shared_ptr<ActionResultType> &,
3486                                          const std::shared_ptr<Helper>& /*hPtr*/,
3487                                          Others...)
3488    {
3489       throw std::logic_error(std::string("An action was booked with no input columns, but the action requires "
3490                                          "columns! The action helper type was ") +
3491                              typeid(Helper).name());
3492       return {};
3493    }
3494 
3495 protected:
3496    RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
3497               const RDFInternal::RColumnRegister &colRegister)
3498       : RInterfaceBase(lm, colRegister), fProxiedPtr(proxied)
3499    {
3500    }
3501 
3502    const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
3503 };
3504 
3505 } // namespace RDF
3506 
3507 } // namespace ROOT
3508 
3509 #endif // ROOT_RDF_INTERFACE