Back to home page

EIC code displayed by LXR

 
 

    


Warning, file /include/root/ROOT/RNTupleProcessor.hxx was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).

0001 /// \file ROOT/RNTupleProcessor.hxx
0002 /// \ingroup NTuple
0003 /// \author Florine de Geus <florine.de.geus@cern.ch>
0004 /// \date 2024-03-26
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT_RNTupleProcessor
0017 #define ROOT_RNTupleProcessor
0018 
0019 #include <ROOT/REntry.hxx>
0020 #include <ROOT/RError.hxx>
0021 #include <ROOT/RFieldToken.hxx>
0022 #include <ROOT/RNTupleDescriptor.hxx>
0023 #include <ROOT/RNTupleJoinTable.hxx>
0024 #include <ROOT/RNTupleModel.hxx>
0025 #include <ROOT/RNTupleUtil.hxx>
0026 #include <ROOT/RPageStorage.hxx>
0027 
0028 #include <memory>
0029 #include <string>
0030 #include <string_view>
0031 #include <vector>
0032 
0033 namespace ROOT {
0034 namespace Experimental {
0035 
0036 namespace Internal {
0037 struct RNTupleProcessorEntryLoader;
0038 } // namespace Internal
0039 
0040 // clang-format off
0041 /**
0042 \class ROOT::Experimental::RNTupleOpenSpec
0043 \ingroup NTuple
0044 \brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
0045 
0046 An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
0047 TDirectory (or any of its subclasses) that contains the RNTuple.
0048 
0049 Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
0050 */
0051 // clang-format on
0052 class RNTupleOpenSpec {
0053    friend class RNTupleProcessor;
0054    friend class RNTupleSingleProcessor;
0055    friend class RNTupleJoinProcessor;
0056 
0057 private:
0058    std::string fNTupleName;
0059    std::variant<std::string, TDirectory *> fStorage;
0060 
0061 public:
0062    RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
0063    RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
0064 
0065    std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
0066 };
0067 
0068 // clang-format off
0069 /**
0070 \class ROOT::Experimental::RNTupleProcessor
0071 \ingroup NTuple
0072 \brief Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains).
0073 
0074 Example usage (see ntpl012_processor.C for a full example):
0075 
0076 ~~~{.cpp}
0077 #include <ROOT/RNTupleProcessor.hxx>
0078 using ROOT::Experimental::RNTupleProcessor;
0079 using ROOT::Experimental::RNTupleOpenSpec;
0080 
0081 std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
0082 auto processor = RNTupleProcessor::CreateChain(ntuples);
0083 
0084 for (const auto &entry : processor) {
0085    std::cout << "pt = " << *entry.GetPtr<float>("pt") << std::endl;
0086 }
0087 ~~~
0088 
0089 An RNTupleProcessor is created by providing one or more RNTupleOpenSpecs, each of which contains the name and storage
0090 location of a single RNTuple. The RNTuples are processed in the order in which they were provided.
0091 
0092 The RNTupleProcessor constructor also (optionally) accepts an RNTupleModel, which determines which fields should be
0093 read. If no model is provided, a default model based on the descriptor of the first specified RNTuple will be used.
0094 If a field that was present in the first RNTuple is not found in a subsequent one, an error will be thrown.
0095 
0096 The RNTupleProcessor provides an iterator which gives access to the REntry containing the field data for the current
0097 entry. Additional bookkeeping information can be obtained through the RNTupleProcessor itself.
0098 */
0099 // clang-format on
0100 class RNTupleProcessor {
0101    friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader; // for unit tests
0102    friend class RNTupleSingleProcessor;
0103    friend class RNTupleChainProcessor;
0104    friend class RNTupleJoinProcessor;
0105 
0106 protected:
0107    // clang-format off
0108    /**
0109    \class ROOT::Experimental::RNTupleProcessor::RFieldContext
0110    \ingroup NTuple
0111    \brief Manager for a field as part of the RNTupleProcessor.
0112 
0113    An RFieldContext contains two fields: a proto-field which is not connected to any page source but serves as the
0114    blueprint for this particular field, and a concrete field that is connected to the page source currently connected
0115    to the RNTupleProcessor for reading. When a new page source is connected, the current concrete field gets reset. A
0116    new concrete field that is connected to this new page source is subsequently created from the proto-field.
0117    */
0118    // clang-format on
0119    class RFieldContext {
0120       friend class RNTupleProcessor;
0121       friend class RNTupleSingleProcessor;
0122       friend class RNTupleChainProcessor;
0123       friend class RNTupleJoinProcessor;
0124 
0125    private:
0126       std::unique_ptr<ROOT::RFieldBase> fProtoField;
0127       std::unique_ptr<ROOT::RFieldBase> fConcreteField;
0128       ROOT::RFieldToken fToken;
0129       // Which RNTuple the field belongs to, in case the field belongs to an auxiliary RNTuple, according to the order
0130       // in which it was specified. For chained RNTuples, this value will always be 0.
0131       std::size_t fNTupleIdx;
0132 
0133    public:
0134       RFieldContext(std::unique_ptr<ROOT::RFieldBase> protoField, ROOT::RFieldToken token, std::size_t ntupleIdx = 0)
0135          : fProtoField(std::move(protoField)), fToken(token), fNTupleIdx(ntupleIdx)
0136       {
0137       }
0138 
0139       const ROOT::RFieldBase &GetProtoField() const { return *fProtoField; }
0140       /// Concrete pages need to be reset explicitly before the page source they belong to is destroyed.
0141       void ResetConcreteField() { fConcreteField.reset(); }
0142       void SetConcreteField() { fConcreteField = fProtoField->Clone(fProtoField->GetFieldName()); }
0143       bool IsAuxiliary() const { return fNTupleIdx > 0; }
0144    };
0145 
0146    std::string fProcessorName;
0147    std::vector<RNTupleOpenSpec> fNTuples;
0148    std::unique_ptr<ROOT::REntry> fEntry;
0149    std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
0150    /// Maps the (qualified) field name to its corresponding field context.
0151    std::unordered_map<std::string, RFieldContext> fFieldContexts;
0152 
0153    std::unique_ptr<ROOT::RNTupleModel> fModel;
0154 
0155    /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
0156    /// interface.
0157    ROOT::NTupleSize_t fNEntries = kInvalidNTupleIndex;
0158 
0159    ROOT::NTupleSize_t fNEntriesProcessed = 0;  //< Total number of entries processed so far
0160    ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
0161    std::size_t fCurrentProcessorNumber = 0;    //< Number of the currently open inner processor
0162 
0163    /////////////////////////////////////////////////////////////////////////////
0164    /// \brief Create and connect a concrete field to the current page source, based on its proto field.
0165    void ConnectField(RFieldContext &fieldContext, ROOT::Internal::RPageSource &pageSource, ROOT::REntry &entry);
0166 
0167    /////////////////////////////////////////////////////////////////////////////
0168    /// \brief Load the entry identified by the provided entry number.
0169    ///
0170    /// \param[in] entryNumber Entry number to load
0171    ///
0172    /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
0173    virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0;
0174 
0175    /////////////////////////////////////////////////////////////////////////////
0176    /// \brief Point the entry's field values of the processor to the pointers from the provided entry.
0177    ///
0178    /// \param[in] entry The entry whose field values to use.
0179    virtual void SetEntryPointers(const ROOT::REntry &entry) = 0;
0180 
0181    /////////////////////////////////////////////////////////////////////////////
0182    /// \brief Get the total number of entries in this processor
0183    virtual ROOT::NTupleSize_t GetNEntries() = 0;
0184 
0185    /////////////////////////////////////////////////////////////////////////////
0186    /// \brief Create a new base RNTupleProcessor.
0187    ///
0188    /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
0189    /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
0190    /// RNTuple for RNTupleJoinProcessor.
0191    /// \param[in] model The RNTupleModel representing the entries returned by the processor.
0192    ///
0193    /// \note Before processing, a model *must* exist. However, this is handled downstream by the RNTupleProcessor's
0194    /// factory functions (CreateSingle, CreateChain and CreateJoin) and constructors.
0195    RNTupleProcessor(std::string_view processorName, std::unique_ptr<ROOT::RNTupleModel> model)
0196       : fProcessorName(processorName), fModel(std::move(model))
0197    {
0198    }
0199 
0200 public:
0201    RNTupleProcessor(const RNTupleProcessor &) = delete;
0202    RNTupleProcessor(RNTupleProcessor &&) = delete;
0203    RNTupleProcessor &operator=(const RNTupleProcessor &) = delete;
0204    RNTupleProcessor &operator=(RNTupleProcessor &&) = delete;
0205    virtual ~RNTupleProcessor() = default;
0206 
0207    /////////////////////////////////////////////////////////////////////////////
0208    /// \brief Get the total number of entries processed so far.
0209    ROOT::NTupleSize_t GetNEntriesProcessed() const { return fNEntriesProcessed; }
0210 
0211    /////////////////////////////////////////////////////////////////////////////
0212    /// \brief Get the entry number that is currently being processed.
0213    ROOT::NTupleSize_t GetCurrentEntryNumber() const { return fCurrentEntryNumber; }
0214 
0215    /////////////////////////////////////////////////////////////////////////////
0216    /// \brief Get the number of the inner processor currently being read.
0217    ///
0218    /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
0219    std::size_t GetCurrentProcessorNumber() const { return fCurrentProcessorNumber; }
0220 
0221    /////////////////////////////////////////////////////////////////////////////
0222    /// \brief Get the name of the processor.
0223    ///
0224    /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
0225    /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
0226    /// primary RNTuple for RNTupleJoinProcessor.
0227    const std::string &GetProcessorName() const { return fProcessorName; }
0228 
0229    /////////////////////////////////////////////////////////////////////////////
0230    /// \brief Get the model used by the processor.
0231    const ROOT::RNTupleModel &GetModel() const { return *fModel; }
0232 
0233    /////////////////////////////////////////////////////////////////////////////
0234    /// \brief Get a reference to the entry used by the processor.
0235    ///
0236    /// \return A reference to the entry used by the processor.
0237    const ROOT::REntry &GetEntry() const { return *fEntry; }
0238 
0239    // clang-format off
0240    /**
0241    \class ROOT::Experimental::RNTupleProcessor::RIterator
0242    \ingroup NTuple
0243    \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
0244    */
0245    // clang-format on
0246    class RIterator {
0247    private:
0248       RNTupleProcessor &fProcessor;
0249       ROOT::NTupleSize_t fCurrentEntryNumber;
0250 
0251    public:
0252       using iterator_category = std::forward_iterator_tag;
0253       using iterator = RIterator;
0254       using value_type = ROOT::REntry;
0255       using difference_type = std::ptrdiff_t;
0256       using pointer = ROOT::REntry *;
0257       using reference = const ROOT::REntry &;
0258 
0259       RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
0260          : fProcessor(processor), fCurrentEntryNumber(entryNumber)
0261       {
0262          // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
0263          // know there is nothing to load.
0264          if (fCurrentEntryNumber != ROOT::kInvalidNTupleIndex) {
0265             fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber);
0266          }
0267       }
0268 
0269       iterator operator++()
0270       {
0271          fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber + 1);
0272          return *this;
0273       }
0274 
0275       iterator operator++(int)
0276       {
0277          auto obj = *this;
0278          ++(*this);
0279          return obj;
0280       }
0281 
0282       reference operator*() { return fProcessor.GetEntry(); }
0283 
0284       friend bool operator!=(const iterator &lh, const iterator &rh)
0285       {
0286          return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber;
0287       }
0288       friend bool operator==(const iterator &lh, const iterator &rh)
0289       {
0290          return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber;
0291       }
0292    };
0293 
0294    RIterator begin() { return RIterator(*this, 0); }
0295    RIterator end() { return RIterator(*this, ROOT::kInvalidNTupleIndex); }
0296 
0297    /////////////////////////////////////////////////////////////////////////////
0298    /// \brief Create an RNTupleProcessor for a single RNTuple.
0299    ///
0300    /// \param[in] ntuple The name and storage location of the RNTuple to process.
0301    /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
0302    /// one will be created based on the descriptor of the first ntuple specified.
0303    ///
0304    /// \return A pointer to the newly created RNTupleProcessor.
0305    static std::unique_ptr<RNTupleProcessor>
0306    Create(RNTupleOpenSpec ntuple, std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
0307 
0308    /////////////////////////////////////////////////////////////////////////////
0309    /// \brief Create an RNTupleProcessor for a single RNTuple.
0310    ///
0311    /// \param[in] ntuple The name and storage location of the RNTuple to process.
0312    /// \param[in] processorName The name to give to the processor. Use
0313    /// Create(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the input RNTuple
0314    /// instead.
0315    /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
0316    /// one will be created based on the descriptor of the first ntuple specified.
0317    ///
0318    /// \return A pointer to the newly created RNTupleProcessor.
0319    static std::unique_ptr<RNTupleProcessor>
0320    Create(RNTupleOpenSpec ntuple, std::string_view processorName, std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
0321 
0322    /////////////////////////////////////////////////////////////////////////////
0323    /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
0324    ///
0325    /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
0326    /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
0327    /// one will be created based on the descriptor of the first RNTuple specified.
0328    ///
0329    /// \return A pointer to the newly created RNTupleProcessor.
0330    static std::unique_ptr<RNTupleProcessor>
0331    CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
0332 
0333    /////////////////////////////////////////////////////////////////////////////
0334    /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
0335    ///
0336    /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
0337    /// \param[in] processorName The name to give to the processor. Use
0338    /// CreateChain(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the first
0339    /// input RNTuple instead.
0340    /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
0341    /// one will be created based on the descriptor of the first RNTuple specified.
0342    ///
0343    /// \return A pointer to the newly created RNTupleProcessor.
0344    static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<RNTupleOpenSpec> ntuples,
0345                                                         std::string_view processorName,
0346                                                         std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
0347 
0348    /////////////////////////////////////////////////////////////////////////////
0349    /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
0350    ///
0351    /// \param[in] innerProcessors A list with the processors to chain.
0352    /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
0353    /// one will be created based on the model used by the first inner processor.
0354    ///
0355    /// \return A pointer to the newly created RNTupleProcessor.
0356    static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors,
0357                                                         std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
0358 
0359    /////////////////////////////////////////////////////////////////////////////
0360    /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
0361    ///
0362    /// \param[in] innerProcessors A list with the processors to chain.
0363    /// \param[in] processorName The name to give to the processor. Use
0364    /// CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>>, std::unique_ptr<RNTupleModel>) to automatically use
0365    /// the name of the first inner processor instead.
0366    /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided,
0367    /// one will be created based on the model used by the first inner processor.
0368    ///
0369    /// \return A pointer to the newly created RNTupleProcessor.
0370    static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors,
0371                                                         std::string_view processorName,
0372                                                         std::unique_ptr<ROOT::RNTupleModel> model = nullptr);
0373 
0374    /////////////////////////////////////////////////////////////////////////////
0375    /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
0376    ///
0377    /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
0378    /// order.
0379    /// \param[in] auxNTuples The names and locations of the RNTuples to join the primary RNTuple with. The order in
0380    /// which their entries are processed are determined by the primary RNTuple and doesn't necessarily have to be
0381    /// sequential.
0382    /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
0383    /// The join is made based on the combined join field values, and therefore each field has to be present in each
0384    /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
0385    /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the
0386    /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple.
0387    /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple
0388    /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will
0389    /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary
0390    /// RNTuples for which the provided model is a `nullptr`.
0391    ///
0392    /// \return A pointer to the newly created RNTupleProcessor.
0393    static std::unique_ptr<RNTupleProcessor>
0394    CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
0395               const std::vector<std::string> &joinFields, std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr,
0396               std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {});
0397 
0398    /////////////////////////////////////////////////////////////////////////////
0399    /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
0400    ///
0401    /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
0402    /// order.
0403    /// \param[in] auxNTuples The names and locations of the RNTuples to join the primary RNTuple with. The order in
0404    /// which their entries are processed are determined by the primary RNTuple and doesn't necessarily have to be
0405    /// sequential.
0406    /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
0407    /// The join is made based on the combined join field values, and therefore each field has to be present in each
0408    /// specified RNTuple. If an empty list is provided, it is assumed that the specified RNTuple are fully aligned.
0409    /// \param[in] processorName The name to give to the processor. Use
0410    /// CreateJoin(const RNTupleOpenSpec &, const std::vector<RNTupleOpenSpec> &, const std::vector<std::string> &,
0411    /// std::unique_ptr<RNTupleModel>, std::vector<std::unique_ptr<RNTupleModel>>) to automatically use the name of the
0412    /// input RNTuple instead.
0413    /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple
0414    /// can be read by the processor. If no model is provided, one will be created based on the descriptor of the primary
0415    /// RNTuple.
0416    /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary
0417    /// RNTuple (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the
0418    /// models will be created based on the descriptors of their corresponding RNTuples. This also applies to individual
0419    /// auxiliary RNTuples for which the provided model is a `nullptr`.
0420    ///
0421    /// \return A pointer to the newly created RNTupleProcessor.
0422    static std::unique_ptr<RNTupleProcessor>
0423    CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
0424               const std::vector<std::string> &joinFields, std::string_view processorName,
0425               std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr,
0426               std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {});
0427 };
0428 
0429 // clang-format off
0430 /**
0431 \class ROOT::Experimental::RNTupleSingleProcessor
0432 \ingroup NTuple
0433 \brief Processor specialization for processing a single RNTuple.
0434 */
0435 // clang-format on
0436 class RNTupleSingleProcessor : public RNTupleProcessor {
0437    friend class RNTupleProcessor;
0438 
0439 private:
0440    RNTupleOpenSpec fNTupleSpec;
0441 
0442    /////////////////////////////////////////////////////////////////////////////
0443    /// \brief Connect the page source of the underlying RNTuple.
0444    void Connect();
0445 
0446    /////////////////////////////////////////////////////////////////////////////
0447    /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
0448    /// processor).
0449    ///
0450    /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
0451    ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
0452 
0453    /////////////////////////////////////////////////////////////////////////////
0454    /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers.
0455    void SetEntryPointers(const ROOT::REntry &entry) final;
0456 
0457    /////////////////////////////////////////////////////////////////////////////
0458    /// \brief Get the total number of entries in this processor.
0459    ROOT::NTupleSize_t GetNEntries() final
0460    {
0461       Connect();
0462       return fNEntries;
0463    }
0464 
0465    /////////////////////////////////////////////////////////////////////////////
0466    /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
0467    ///
0468    /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
0469    /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
0470    /// the name of the underlying RNTuple.
0471    /// \param[in] model The model that specifies which fields should be read by the processor.
0472    RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName,
0473                           std::unique_ptr<ROOT::RNTupleModel> model);
0474 };
0475 
0476 // clang-format off
0477 /**
0478 \class ROOT::Experimental::RNTupleChainProcessor
0479 \ingroup NTuple
0480 \brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
0481 */
0482 // clang-format on
0483 class RNTupleChainProcessor : public RNTupleProcessor {
0484    friend class RNTupleProcessor;
0485 
0486 private:
0487    std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
0488    std::vector<ROOT::NTupleSize_t> fInnerNEntries;
0489 
0490    /////////////////////////////////////////////////////////////////////////////
0491    /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
0492    /// processor).
0493    ///
0494    /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
0495    ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
0496 
0497    /////////////////////////////////////////////////////////////////////////////
0498    /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers.
0499    void SetEntryPointers(const ROOT::REntry &) final;
0500 
0501    /////////////////////////////////////////////////////////////////////////////
0502    /// \brief Get the total number of entries in this processor.
0503    ///
0504    /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
0505    ROOT::NTupleSize_t GetNEntries() final;
0506 
0507    /////////////////////////////////////////////////////////////////////////////
0508    /// \brief Construct a new RNTupleChainProcessor.
0509    ///
0510    /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
0511    /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
0512    /// is the name of the first inner processor.
0513    /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by
0514    /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is
0515    /// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`.
0516    ///
0517    /// RNTuples are processed in the order in which they are specified.
0518    RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName,
0519                          std::unique_ptr<ROOT::RNTupleModel> model);
0520 };
0521 
0522 // clang-format off
0523 /**
0524 \class ROOT::Experimental::RNTupleJoinProcessor
0525 \ingroup NTuple
0526 \brief Processor specialization for horizontally combined (*joined*) RNTuples.
0527 */
0528 // clang-format on
0529 class RNTupleJoinProcessor : public RNTupleProcessor {
0530    friend class RNTupleProcessor;
0531 
0532 private:
0533    std::vector<std::unique_ptr<ROOT::Internal::RPageSource>> fAuxiliaryPageSources;
0534    /// Tokens representing the join fields present in the main RNTuple
0535    std::vector<ROOT::RFieldToken> fJoinFieldTokens;
0536    std::vector<std::unique_ptr<Internal::RNTupleJoinTable>> fJoinTables;
0537    bool fJoinTablesAreBuilt = false;
0538 
0539    bool HasJoinTable() const { return fJoinTables.size() > 0; }
0540 
0541    /////////////////////////////////////////////////////////////////////////////
0542    /// \brief Load the entry identified by the provided entry number of the primary RNTuple.
0543    ///
0544    /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
0545    ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
0546 
0547    /////////////////////////////////////////////////////////////////////////////
0548    /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers.
0549    void SetEntryPointers(const ROOT::REntry &) final;
0550 
0551    /////////////////////////////////////////////////////////////////////////////
0552    /// \brief Get the total number of entries in this processor.
0553    ROOT::NTupleSize_t GetNEntries() final { return fNEntries; }
0554 
0555    /////////////////////////////////////////////////////////////////////////////
0556    /// \brief Set fModel by combining the primary and auxiliary models.
0557    ///
0558    /// \param[in] primaryModel The model of the primary RNTuple.
0559    /// \param[in] auxModels Models of the auxiliary RNTuples.
0560    ///
0561    /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each
0562    /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model.
0563    /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`.
0564    void SetModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel,
0565                  std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels);
0566 
0567    /////////////////////////////////////////////////////////////////////////////
0568    /// \brief Connect all fields, once the primary and all auxiliary RNTuples have been added.
0569    void ConnectFields();
0570 
0571    /////////////////////////////////////////////////////////////////////////////
0572    /// \brief Populate fJoinFieldTokens with tokens for join fields belonging to the main RNTuple in the join model.
0573    ///
0574    /// \param[in] joinFields The names of the fields used in the join.
0575    void SetJoinFieldTokens(const std::vector<std::string> &joinFields)
0576    {
0577       fJoinFieldTokens.reserve(joinFields.size());
0578       for (const auto &fieldName : joinFields) {
0579          fJoinFieldTokens.emplace_back(fEntry->GetToken(fieldName));
0580       }
0581    }
0582 
0583    /////////////////////////////////////////////////////////////////////////////
0584    /// \brief Construct a new RNTupleJoinProcessor.
0585    ///
0586    /// \param[in] mainNTuple The source specification (name and storage location) of the primary RNTuple.
0587    /// \param[in] auxNTUples The source specifications (name and storage location) of the auxiliary RNTuples.
0588    /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
0589    /// The join is made based on the combined join field values, and therefore each field has to be present in each
0590    /// specified RNTuple. If an empty list is provided, it is assumed that the RNTuples are fully aligned.
0591    /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
0592    /// is the name of the main RNTuple.
0593    /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the
0594    /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple.
0595    /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple
0596    /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will
0597    /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary
0598    /// RNTuples for which the provided model is a `nullptr`.
0599    RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples,
0600                         const std::vector<std::string> &joinFields, std::string_view processorName,
0601                         std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr,
0602                         std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {});
0603 
0604 public:
0605    RNTupleJoinProcessor(const RNTupleJoinProcessor &) = delete;
0606    RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete;
0607    RNTupleJoinProcessor(RNTupleJoinProcessor &&) = delete;
0608    RNTupleJoinProcessor operator=(RNTupleJoinProcessor &&) = delete;
0609    ~RNTupleJoinProcessor() override
0610    {
0611       for (auto &[_, fieldContext] : fFieldContexts) {
0612          fieldContext.ResetConcreteField();
0613       }
0614    }
0615 };
0616 
0617 } // namespace Experimental
0618 } // namespace ROOT
0619 
0620 #endif // ROOT_RNTupleProcessor