Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:30:01

0001 /// \file ROOT/RNTupleProcessor.hxx
0002 /// \ingroup NTuple
0003 /// \author Florine de Geus <florine.de.geus@cern.ch>
0004 /// \date 2024-03-26
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT_RNTupleProcessor
0017 #define ROOT_RNTupleProcessor
0018 
0019 #include <ROOT/REntry.hxx>
0020 #include <ROOT/RError.hxx>
0021 #include <ROOT/RNTupleDescriptor.hxx>
0022 #include <ROOT/RNTupleJoinTable.hxx>
0023 #include <ROOT/RNTupleModel.hxx>
0024 #include <ROOT/RNTupleTypes.hxx>
0025 #include <ROOT/RNTupleProcessorEntry.hxx>
0026 #include <ROOT/RPageStorage.hxx>
0027 
0028 #include <memory>
0029 #include <string>
0030 #include <string_view>
0031 #include <vector>
0032 
0033 namespace ROOT {
0034 namespace Experimental {
0035 
0036 namespace Internal {
0037 struct RNTupleProcessorEntryLoader;
0038 } // namespace Internal
0039 
0040 // clang-format off
0041 /**
0042 \class ROOT::Experimental::RNTupleOpenSpec
0043 \ingroup NTuple
0044 \brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor.
0045 
0046 An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the
0047 TDirectory (or any of its subclasses) that contains the RNTuple.
0048 
0049 Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python.
0050 */
0051 // clang-format on
0052 class RNTupleOpenSpec {
0053    friend class RNTupleProcessor;
0054    friend class RNTupleSingleProcessor;
0055    friend class RNTupleJoinProcessor;
0056 
0057 private:
0058    std::string fNTupleName;
0059    std::variant<std::string, TDirectory *> fStorage;
0060 
0061 public:
0062    RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {}
0063    RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {}
0064 
0065    std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const;
0066 };
0067 
0068 // clang-format off
0069 /**
0070 \class ROOT::Experimental::RNTupleProcessorOptionalPtr<T>
0071 \ingroup NTuple
0072 \brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support
0073 and checks for missing values.
0074 */
0075 // clang-format on
0076 template <typename T>
0077 class RNTupleProcessorOptionalPtr {
0078    friend class RNTupleProcessor;
0079 
0080 private:
0081    Internal::RNTupleProcessorEntry *fProcessorEntry;
0082    Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex;
0083 
0084    RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry,
0085                                Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
0086       : fProcessorEntry(processorEntry), fFieldIndex(fieldIdx)
0087    {
0088    }
0089 
0090 public:
0091    /////////////////////////////////////////////////////////////////////////////
0092    /// \brief Check if the pointer currently holds a valid value.
0093    bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); }
0094 
0095    /////////////////////////////////////////////////////////////////////////////
0096    /// \brief Get a shared pointer to the field value managed by the processor's entry.
0097    ///
0098    /// \return A `std::shared_ptr<T>` if the field is valid in the current entry, or a `nullptr` otherwise.
0099    std::shared_ptr<T> GetPtr() const
0100    {
0101       if (fProcessorEntry->IsValidField(fFieldIndex))
0102          return fProcessorEntry->GetPtr<T>(fFieldIndex);
0103 
0104       return nullptr;
0105    }
0106 
0107    /////////////////////////////////////////////////////////////////////////////
0108    /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
0109    ///
0110    /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise.
0111    T *GetRawPtr() const { return GetPtr().get(); }
0112 
0113    /////////////////////////////////////////////////////////////////////////////
0114    /// \brief Bind the value to `valuePtr`.
0115    ///
0116    /// \param[in] valuePtr Pointer to bind the value to.
0117    ///
0118    /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
0119    /// example when a field is not present in one of the chained processors or when during a join operation, no matching
0120    /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
0121    /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
0122    /// its data through this interface, to ensure that only valid data can be read.
0123    void BindRawPtr(T *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); }
0124 
0125    /////////////////////////////////////////////////////////////////////////////
0126    /// \brief Get a reference to the field value managed by the processor's entry.
0127    ///
0128    /// Throws an exception if the field is invalid in the processor's current entry.
0129    const T &operator*() const
0130    {
0131       if (auto ptr = GetPtr())
0132          return *ptr;
0133       else
0134          throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
0135                                   "\" because it has no value for the current entry"));
0136    }
0137 
0138    /////////////////////////////////////////////////////////////////////////////
0139    /// \brief Access the field value managed by the processor's entry.
0140    ///
0141    /// Throws an exception if the field is invalid in the processor's current entry.
0142    const T *operator->() const
0143    {
0144       if (auto ptr = GetPtr())
0145          return ptr.get();
0146       else
0147          throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) +
0148                                   "\" because it has no value for the current entry"));
0149    }
0150 };
0151 
0152 // clang-format off
0153 /**
0154 \class ROOT::Experimental::RNTupleProcessorOptionalPtr<void>
0155 \ingroup NTuple
0156 \brief Specialization of RNTupleProcessorOptionalPtr<T> for `void`-type pointers.
0157 */
0158 // clang-format on
0159 template <>
0160 class RNTupleProcessorOptionalPtr<void> {
0161    friend class RNTupleProcessor;
0162 
0163 private:
0164    Internal::RNTupleProcessorEntry *fProcessorEntry;
0165    Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex;
0166 
0167    RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry,
0168                                Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx)
0169       : fProcessorEntry(processorEntry), fFieldIndex(fieldIdx)
0170    {
0171    }
0172 
0173 public:
0174    /////////////////////////////////////////////////////////////////////////////
0175    /// \brief Check if the pointer currently holds a valid value.
0176    bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); }
0177 
0178    /////////////////////////////////////////////////////////////////////////////
0179    /// \brief Get the pointer to the field value managed by the processor's entry.
0180    ///
0181    /// \return A `std::shared_ptr<void>` if the field is valid in the current entry, or a `nullptr` otherwise.
0182    std::shared_ptr<void> GetPtr() const
0183    {
0184       if (fProcessorEntry->IsValidField(fFieldIndex))
0185          return fProcessorEntry->GetPtr<void>(fFieldIndex);
0186 
0187       return nullptr;
0188    }
0189 
0190    /////////////////////////////////////////////////////////////////////////////
0191    /// \brief Get a non-owning pointer to the field value managed by the processor's entry.
0192    ///
0193    /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise.
0194    void *GetRawPtr() const { return GetPtr().get(); }
0195 
0196    /////////////////////////////////////////////////////////////////////////////
0197    /// \brief Bind the value to `valuePtr`.
0198    ///
0199    /// \param[in] valuePtr Pointer to bind the value to.
0200    ///
0201    /// \warning Use this function with care! Values may not always be valid for every entry during processing, for
0202    /// example when a field is not present in one of the chained processors or when during a join operation, no matching
0203    /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
0204    /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing
0205    /// its data through this interface, to ensure that only valid data can be read.
0206    void BindRawPtr(void *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); }
0207 };
0208 
0209 // clang-format off
0210 /**
0211 \class ROOT::Experimental::RNTupleProcessor
0212 \ingroup NTuple
0213 \brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples.
0214 
0215 Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples):
0216 
0217 ~~~{.cpp}
0218 #include <ROOT/RNTupleProcessor.hxx>
0219 using ROOT::Experimental::RNTupleProcessor;
0220 using ROOT::Experimental::RNTupleOpenSpec;
0221 
0222 std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}};
0223 auto processor = RNTupleProcessor::CreateChain(ntuples);
0224 
0225 auto pt = processor->RequestField<float>("pt");
0226 
0227 for (const auto idx : *processor) {
0228    std::cout << "event = " << idx << ", pt = " << *pt << std::endl;
0229 }
0230 ~~~
0231 
0232 An RNTupleProcessor is created either:
0233 1. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple;
0234 2. By providing a previously created RNTupleProcessor.
0235 
0236 The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the
0237 processor, i.e. taking into account previously processed RNTuples.
0238 
0239 Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because
0240 it can occur that entries are only partially complete in a join-based processor, field values may be marked as
0241 "invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr
0242 that is returned by RequestField().
0243 */
0244 // clang-format on
0245 class RNTupleProcessor {
0246    friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader; // for unit tests
0247    friend class RNTupleSingleProcessor;
0248    friend class RNTupleChainProcessor;
0249    friend class RNTupleJoinProcessor;
0250 
0251 protected:
0252    std::string fProcessorName;
0253    std::unique_ptr<ROOT::RNTupleModel> fProtoModel = nullptr;
0254    std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr;
0255    std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs;
0256 
0257    /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public
0258    /// interface.
0259    ROOT::NTupleSize_t fNEntries = kInvalidNTupleIndex;
0260 
0261    ROOT::NTupleSize_t fNEntriesProcessed = 0;  //< Total number of entries processed so far
0262    ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number
0263    std::size_t fCurrentProcessorNumber = 0;    //< Number of the currently open inner processor
0264 
0265    /////////////////////////////////////////////////////////////////////////////
0266    /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
0267    /// an existing one.
0268    virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0;
0269 
0270    /////////////////////////////////////////////////////////////////////////////
0271    /// \brief Check if the processor already has been initialized.
0272    bool IsInitialized() const { return fProtoModel && fEntry; }
0273 
0274    /////////////////////////////////////////////////////////////////////////////
0275    /// \brief Connect fields to the page source of the processor's underlying RNTuple(s).
0276    ///
0277    /// \param[in] fieldIdxs Indices of the fields to connect.
0278    /// \param[in] provenance Provenance of the processor.
0279    /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying
0280    /// RNTuple source changed.
0281    virtual void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
0282                         const Internal::RNTupleProcessorProvenance &provenance, bool updateFields) = 0;
0283 
0284    /////////////////////////////////////////////////////////////////////////////
0285    /// \brief Load the entry identified by the provided entry number.
0286    ///
0287    /// \param[in] entryNumber Entry number to load
0288    ///
0289    /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise.
0290    virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0;
0291 
0292    /////////////////////////////////////////////////////////////////////////////
0293    /// \brief Get the proto model used by the processor.
0294    ///
0295    /// A processor's proto model contains all fields that can be accessed and is inferred from the descriptors of the
0296    /// underlying RNTuples. It is used in RequestField() to check that the requested field is actually valid.
0297    const ROOT::RNTupleModel &GetProtoModel() const
0298    {
0299       assert(fProtoModel);
0300       return *fProtoModel;
0301    }
0302 
0303    /////////////////////////////////////////////////////////////////////////////
0304    /// \brief Get the total number of entries in this processor
0305    virtual ROOT::NTupleSize_t GetNEntries() = 0;
0306 
0307    /////////////////////////////////////////////////////////////////////////////
0308    /// \brief Check if a field exists on-disk and can be read by the processor.
0309    ///
0310    /// \param[in] fieldName Name of the field to check.
0311    virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0;
0312 
0313    /////////////////////////////////////////////////////////////////////////////
0314    /// \brief Add a field to the entry.
0315    ///
0316    ///
0317    /// \param[in] fieldName Name of the field to add.
0318    /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be
0319    /// created.
0320    /// \param[in] provenance Provenance of the processor.
0321    ///
0322    /// \return The index of the newly added field in the entry.
0323    ///
0324    /// In case the field was already present in the entry, the index of the existing field is returned.
0325    virtual ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t>
0326    AddFieldToEntry(std::string_view fieldName, void *valuePtr,
0327                    const Internal::RNTupleProcessorProvenance &provenance) = 0;
0328 
0329    /////////////////////////////////////////////////////////////////////////////
0330    /// \brief Add the entry mappings for this processor to the provided join table.
0331    ///
0332    /// \param[in] joinTable the join table to map the entries to.
0333    /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use
0334    /// with respect to the processor's position in the chain.
0335    virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) = 0;
0336 
0337    /////////////////////////////////////////////////////////////////////////////
0338    /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
0339    ///
0340    /// \param[in,out] output Output stream to print to.
0341    virtual void PrintStructureImpl(std::ostream &output) const = 0;
0342 
0343    /////////////////////////////////////////////////////////////////////////////
0344    /// \brief Create a new base RNTupleProcessor.
0345    ///
0346    /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for
0347    /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary
0348    /// RNTuple for RNTupleJoinProcessor.
0349    RNTupleProcessor(std::string_view processorName) : fProcessorName(processorName) {}
0350 
0351 public:
0352    RNTupleProcessor(const RNTupleProcessor &) = delete;
0353    RNTupleProcessor(RNTupleProcessor &&) = delete;
0354    RNTupleProcessor &operator=(const RNTupleProcessor &) = delete;
0355    RNTupleProcessor &operator=(RNTupleProcessor &&) = delete;
0356    virtual ~RNTupleProcessor() = default;
0357 
0358    /////////////////////////////////////////////////////////////////////////////
0359    /// \brief Get the total number of entries processed so far.
0360    ROOT::NTupleSize_t GetNEntriesProcessed() const { return fNEntriesProcessed; }
0361 
0362    /////////////////////////////////////////////////////////////////////////////
0363    /// \brief Get the entry number that is currently being processed.
0364    ROOT::NTupleSize_t GetCurrentEntryNumber() const { return fCurrentEntryNumber; }
0365 
0366    /////////////////////////////////////////////////////////////////////////////
0367    /// \brief Get the number of the inner processor currently being read.
0368    ///
0369    /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned.
0370    std::size_t GetCurrentProcessorNumber() const { return fCurrentProcessorNumber; }
0371 
0372    /////////////////////////////////////////////////////////////////////////////
0373    /// \brief Get the name of the processor.
0374    ///
0375    /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying
0376    /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the
0377    /// primary processor for RNTupleJoinProcessor.
0378    const std::string &GetProcessorName() const { return fProcessorName; }
0379 
0380    /////////////////////////////////////////////////////////////////////////////
0381    /// \brief Request access to a field for reading during processing.
0382    ///
0383    /// \tparam T Type of the requested field.
0384    ///
0385    /// \param[in] fieldName Name of the requested field.
0386    ///
0387    /// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value.
0388    ///
0389    /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for
0390    /// example when a field is not present in one of the chained processors or when during a join operation, no matching
0391    /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading
0392    /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through
0393    /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read.
0394    template <typename T>
0395    RNTupleProcessorOptionalPtr<T> RequestField(std::string_view fieldName, void *valuePtr = nullptr)
0396    {
0397       Initialize(fEntry);
0398       // TODO handle alternative (compatible field types)
0399       auto fieldIdx = AddFieldToEntry(fieldName, valuePtr, Internal::RNTupleProcessorProvenance()).Unwrap();
0400       return RNTupleProcessorOptionalPtr<T>(fEntry.get(), fieldIdx);
0401    }
0402 
0403    /////////////////////////////////////////////////////////////////////////////
0404    /// \brief Print a graphical representation of the processor composition.
0405    ///
0406    /// \param[in,out] output Stream to print to (default is stdout).
0407    ///
0408    /// ### Example:
0409    /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary
0410    /// RNTuples will be printed as follows:
0411    /// ~~~
0412    /// +-----------------------------+ +-----------------------------+
0413    /// | ntuple                      | | ntuple_aux                  |
0414    /// | ntuple.root                 | | ntuple_aux1.root            |
0415    /// +-----------------------------+ +-----------------------------+
0416    ///                                 +-----------------------------+
0417    ///                                 | ntuple_aux                  |
0418    ///                                 | ntuple_aux2.root            |
0419    ///                                 +-----------------------------+
0420    /// ~~~
0421    void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); }
0422 
0423    // clang-format off
0424    /**
0425    \class ROOT::Experimental::RNTupleProcessor::RIterator
0426    \ingroup NTuple
0427    \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof.
0428    */
0429    // clang-format on
0430    class RIterator {
0431    private:
0432       RNTupleProcessor &fProcessor;
0433       ROOT::NTupleSize_t fCurrentEntryNumber;
0434 
0435    public:
0436       using iterator_category = std::input_iterator_tag;
0437       using iterator = RIterator;
0438       using value_type = ROOT::NTupleSize_t;
0439       using difference_type = std::ptrdiff_t;
0440       using pointer = ROOT::NTupleSize_t *;
0441       using reference = ROOT::NTupleSize_t &;
0442 
0443       RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber)
0444          : fProcessor(processor), fCurrentEntryNumber(entryNumber)
0445       {
0446          if (!fProcessor.fEntry) {
0447             fCurrentEntryNumber = ROOT::kInvalidNTupleIndex;
0448          }
0449          // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already
0450          // know there is nothing to load.
0451          if (fCurrentEntryNumber != ROOT::kInvalidNTupleIndex) {
0452             fProcessor.Connect(fProcessor.fEntry->GetFieldIndices(), Internal::RNTupleProcessorProvenance(),
0453                                /*updateFields=*/false);
0454             fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber);
0455          }
0456       }
0457 
0458       iterator operator++()
0459       {
0460          fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber + 1);
0461          return *this;
0462       }
0463 
0464       iterator operator++(int)
0465       {
0466          auto obj = *this;
0467          ++(*this);
0468          return obj;
0469       }
0470 
0471       reference operator*() { return fCurrentEntryNumber; }
0472 
0473       friend bool operator!=(const iterator &lh, const iterator &rh)
0474       {
0475          return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber;
0476       }
0477       friend bool operator==(const iterator &lh, const iterator &rh)
0478       {
0479          return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber;
0480       }
0481    };
0482 
0483    RIterator begin() { return RIterator(*this, 0); }
0484    RIterator end() { return RIterator(*this, ROOT::kInvalidNTupleIndex); }
0485 
0486    /////////////////////////////////////////////////////////////////////////////
0487    /// \brief Create an RNTupleProcessor for a single RNTuple.
0488    ///
0489    /// \param[in] ntuple The name and storage location of the RNTuple to process.
0490    /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used.
0491    ///
0492    /// \return A pointer to the newly created RNTupleProcessor.
0493    static std::unique_ptr<RNTupleProcessor> Create(RNTupleOpenSpec ntuple, std::string_view processorName = "");
0494 
0495    /////////////////////////////////////////////////////////////////////////////
0496    /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples.
0497    ///
0498    /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process.
0499    /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used.
0500    ///
0501    /// \return A pointer to the newly created RNTupleProcessor.
0502    static std::unique_ptr<RNTupleProcessor>
0503    CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName = "");
0504 
0505    /////////////////////////////////////////////////////////////////////////////
0506    /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors.
0507    ///
0508    /// \param[in] innerProcessors A list with the processors to chain.
0509    /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is
0510    /// used.
0511    ///
0512    /// \return A pointer to the newly created RNTupleProcessor.
0513    static std::unique_ptr<RNTupleProcessor>
0514    CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, std::string_view processorName = "");
0515 
0516    /////////////////////////////////////////////////////////////////////////////
0517    /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
0518    ///
0519    /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential
0520    /// order.
0521    /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which
0522    /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential.
0523    /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned.
0524    /// The join is made based on the combined join field values, and therefore each field has to be present in each
0525    /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned.
0526    /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used.
0527    ///
0528    /// \return A pointer to the newly created RNTupleProcessor.
0529    static std::unique_ptr<RNTupleProcessor> CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple,
0530                                                        const std::vector<std::string> &joinFields,
0531                                                        std::string_view processorName = "");
0532 
0533    /////////////////////////////////////////////////////////////////////////////
0534    /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples.
0535    ///
0536    /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
0537    /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
0538    /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
0539    /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
0540    /// The join is made based on the combined join field values, and therefore each field has to be present in each
0541    /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully
0542    /// aligned.
0543    /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used.
0544    ///
0545    /// \return A pointer to the newly created RNTupleProcessor.
0546    static std::unique_ptr<RNTupleProcessor>
0547    CreateJoin(std::unique_ptr<RNTupleProcessor> primaryProcessor, std::unique_ptr<RNTupleProcessor> auxProcessor,
0548               const std::vector<std::string> &joinFields, std::string_view processorName = "");
0549 };
0550 
0551 // clang-format off
0552 /**
0553 \class ROOT::Experimental::RNTupleSingleProcessor
0554 \ingroup NTuple
0555 \brief Processor specialization for processing a single RNTuple.
0556 */
0557 // clang-format on
0558 class RNTupleSingleProcessor : public RNTupleProcessor {
0559    friend class RNTupleProcessor;
0560 
0561 private:
0562    RNTupleOpenSpec fNTupleSpec;
0563    std::unique_ptr<ROOT::Internal::RPageSource> fPageSource;
0564 
0565    /////////////////////////////////////////////////////////////////////////////
0566    /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
0567    /// an existing one.
0568    ///
0569    /// At this point, the page source for the underlying RNTuple of the processor will be created and opened.
0570    void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
0571 
0572    /////////////////////////////////////////////////////////////////////////////
0573    /// \brief Connect the provided fields indices in the entry to their on-disk fields.
0574    void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
0575                 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(),
0576                 bool updateFields = false) final;
0577 
0578    /////////////////////////////////////////////////////////////////////////////
0579    /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
0580    /// processor).
0581    ///
0582    /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
0583    ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
0584 
0585    /////////////////////////////////////////////////////////////////////////////
0586    /// \brief Get the total number of entries in this processor.
0587    ROOT::NTupleSize_t GetNEntries() final
0588    {
0589       Initialize();
0590       if (fNEntries == ROOT::kInvalidNTupleIndex)
0591          Connect(fFieldIdxs);
0592       return fNEntries;
0593    }
0594 
0595    /////////////////////////////////////////////////////////////////////////////
0596    /// \brief Check if a field exists on-disk and can be read by the processor.
0597    ///
0598    /// \sa RNTupleProcessor::CanReadFieldFromDisk()
0599    bool CanReadFieldFromDisk(std::string_view fieldName) final;
0600 
0601    /////////////////////////////////////////////////////////////////////////////
0602    /// \brief Add a field to the entry.
0603    ///
0604    /// \sa RNTupleProcessor::AddFieldToEntry()
0605    ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry(
0606       std::string_view fieldName, void *valuePtr = nullptr,
0607       const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final;
0608 
0609    /////////////////////////////////////////////////////////////////////////////
0610    /// \brief Add the entry mappings for this processor to the provided join table.
0611    ///
0612    /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
0613    void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
0614 
0615    /////////////////////////////////////////////////////////////////////////////
0616    /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
0617    ///
0618    /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
0619    void PrintStructureImpl(std::ostream &output) const final;
0620 
0621    /////////////////////////////////////////////////////////////////////////////
0622    /// \brief Construct a new RNTupleProcessor for processing a single RNTuple.
0623    ///
0624    /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process.
0625    /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is
0626    /// the name of the underlying RNTuple.
0627    RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName);
0628 
0629 public:
0630    RNTupleSingleProcessor(const RNTupleSingleProcessor &) = delete;
0631    RNTupleSingleProcessor(RNTupleSingleProcessor &&) = delete;
0632    RNTupleSingleProcessor &operator=(const RNTupleSingleProcessor &) = delete;
0633    RNTupleSingleProcessor &operator=(RNTupleSingleProcessor &&) = delete;
0634    ~RNTupleSingleProcessor() override
0635    {
0636       // The proto model needs to be deleted before fPageSource.
0637       fProtoModel.release();
0638    };
0639 };
0640 
0641 // clang-format off
0642 /**
0643 \class ROOT::Experimental::RNTupleChainProcessor
0644 \ingroup NTuple
0645 \brief Processor specialization for vertically combined (*chained*) RNTupleProcessors.
0646 */
0647 // clang-format on
0648 class RNTupleChainProcessor : public RNTupleProcessor {
0649    friend class RNTupleProcessor;
0650 
0651 private:
0652    std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors;
0653    std::vector<ROOT::NTupleSize_t> fInnerNEntries;
0654 
0655    Internal::RNTupleProcessorProvenance fProvenance;
0656 
0657    /////////////////////////////////////////////////////////////////////////////
0658    /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
0659    /// an existing one.
0660    void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
0661 
0662    /////////////////////////////////////////////////////////////////////////////
0663    /// \brief Connect the provided fields indices in the entry to their on-disk fields.
0664    ///
0665    /// \sa RNTupleProcessor::Connect()
0666    void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
0667                 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(),
0668                 bool updateFields = false) final;
0669 
0670    /////////////////////////////////////////////////////////////////////////////
0671    /// \brief Update the entry to reflect any missing fields in the current inner processor.
0672    void ConnectInnerProcessor(std::size_t processorNumber);
0673 
0674    /////////////////////////////////////////////////////////////////////////////
0675    /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this
0676    /// processor).
0677    ///
0678    /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
0679    ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
0680 
0681    /////////////////////////////////////////////////////////////////////////////
0682    /// \brief Get the total number of entries in this processor.
0683    ///
0684    /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly!
0685    ROOT::NTupleSize_t GetNEntries() final;
0686 
0687    /////////////////////////////////////////////////////////////////////////////
0688    /// \brief Check if a field exists on-disk and can be read by the processor.
0689    ///
0690    /// \sa RNTupleProcessor::CanReadFieldFromDisk()
0691    bool CanReadFieldFromDisk(std::string_view fieldName) final
0692    {
0693       return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName);
0694    }
0695 
0696    /////////////////////////////////////////////////////////////////////////////
0697    /// \brief Add a field to the entry.
0698    ///
0699    /// \sa RNTupleProcessor::AddFieldToEntry()
0700    ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry(
0701       std::string_view fieldName, void *valuePtr = nullptr,
0702       const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final;
0703 
0704    /////////////////////////////////////////////////////////////////////////////
0705    /// \brief Add the entry mappings for this processor to the provided join table.
0706    ///
0707    /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
0708    void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
0709 
0710    /////////////////////////////////////////////////////////////////////////////
0711    /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
0712    ///
0713    /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
0714    void PrintStructureImpl(std::ostream &output) const final;
0715 
0716    /////////////////////////////////////////////////////////////////////////////
0717    /// \brief Construct a new RNTupleChainProcessor.
0718    ///
0719    /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process.
0720    /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this
0721    /// is the name of the first inner processor.
0722    ///
0723    /// RNTuples are processed in the order in which they are specified.
0724    RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName);
0725 
0726 public:
0727    RNTupleChainProcessor(const RNTupleChainProcessor &) = delete;
0728    RNTupleChainProcessor(RNTupleChainProcessor &&) = delete;
0729    RNTupleChainProcessor &operator=(const RNTupleChainProcessor &) = delete;
0730    RNTupleChainProcessor &operator=(RNTupleChainProcessor &&) = delete;
0731    ~RNTupleChainProcessor() override = default;
0732 };
0733 
0734 // clang-format off
0735 /**
0736 \class ROOT::Experimental::RNTupleJoinProcessor
0737 \ingroup NTuple
0738 \brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors.
0739 */
0740 // clang-format on
0741 class RNTupleJoinProcessor : public RNTupleProcessor {
0742    friend class RNTupleProcessor;
0743 
0744 private:
0745    std::unique_ptr<RNTupleProcessor> fPrimaryProcessor;
0746    std::unique_ptr<RNTupleProcessor> fAuxiliaryProcessor;
0747 
0748    std::vector<std::string> fJoinFieldNames;
0749    std::set<Internal::RNTupleProcessorEntry::FieldIndex_t> fJoinFieldIdxs;
0750 
0751    std::unique_ptr<Internal::RNTupleJoinTable> fJoinTable;
0752    bool fJoinTableIsBuilt = false;
0753 
0754    std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs;
0755 
0756    /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting
0757    /// an existing one.
0758    void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final;
0759 
0760    /////////////////////////////////////////////////////////////////////////////
0761    /// \brief Connect the provided fields indices in the entry to their on-disk fields.
0762    ///
0763    /// \sa RNTupleProcessor::Connect()
0764    void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs,
0765                 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(),
0766                 bool updateFields = false) final;
0767 
0768    /////////////////////////////////////////////////////////////////////////////
0769    /// \brief Load the entry identified by the provided entry number of the primary processor.
0770    ///
0771    /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry
0772    ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final;
0773 
0774    /////////////////////////////////////////////////////////////////////////////
0775    /// \brief Get the total number of entries in this processor.
0776    ROOT::NTupleSize_t GetNEntries() final;
0777 
0778    /////////////////////////////////////////////////////////////////////////////
0779    /// \brief Set the processor's proto model by combining the primary and auxiliary models.
0780    ///
0781    /// \param[in] primaryModel The proto model of the primary processor.
0782    /// \param[in] auxModel The proto model of the auxiliary processors.
0783    ///
0784    /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each
0785    /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model.
0786    /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`.
0787    void SetProtoModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel, std::unique_ptr<ROOT::RNTupleModel> auxModel);
0788 
0789    /////////////////////////////////////////////////////////////////////////////
0790    /// \brief Set the validity for all fields in the auxiliary processor at once.
0791    void SetAuxiliaryFieldValidity(bool validity);
0792 
0793    /////////////////////////////////////////////////////////////////////////////
0794    /// \brief Check if a field exists on-disk and can be read by the processor.
0795    ///
0796    /// \sa RNTupleProcessor::CanReadFieldFromDisk()
0797    bool CanReadFieldFromDisk(std::string_view fieldName) final
0798    {
0799       if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) {
0800          if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0)
0801             fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1);
0802          return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName);
0803       }
0804 
0805       return true;
0806    }
0807 
0808    /////////////////////////////////////////////////////////////////////////////
0809    /// \brief Add a field to the entry.
0810    ///
0811    /// \sa RNTupleProcessor::AddFieldToEntry()
0812    ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry(
0813       std::string_view fieldName, void *valuePtr = nullptr,
0814       const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final;
0815 
0816    /////////////////////////////////////////////////////////////////////////////
0817    /// \brief Add the entry mappings for this processor to the provided join table.
0818    ///
0819    /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable
0820    void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final;
0821 
0822    /////////////////////////////////////////////////////////////////////////////
0823    /// \brief Processor-specific implementation for printing its structure, called by PrintStructure().
0824    ///
0825    /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl
0826    void PrintStructureImpl(std::ostream &output) const final;
0827 
0828    /////////////////////////////////////////////////////////////////////////////
0829    /// \brief Construct a new RNTupleJoinProcessor.
0830    /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order.
0831    /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are
0832    /// processed is determined by the primary processor and doesn't necessarily have to be sequential.
0833    /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned.
0834    /// The join is made based on the combined join field values, and therefore each field has to be present in each
0835    /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned.
0836    /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this
0837    /// is the name of the primary processor.
0838    RNTupleJoinProcessor(std::unique_ptr<RNTupleProcessor> primaryProcessor,
0839                         std::unique_ptr<RNTupleProcessor> auxProcessor, const std::vector<std::string> &joinFields,
0840                         std::string_view processorName);
0841 
0842 public:
0843    RNTupleJoinProcessor(const RNTupleJoinProcessor &) = delete;
0844    RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete;
0845    RNTupleJoinProcessor(RNTupleJoinProcessor &&) = delete;
0846    RNTupleJoinProcessor operator=(RNTupleJoinProcessor &&) = delete;
0847    ~RNTupleJoinProcessor() override = default;
0848 };
0849 
0850 } // namespace Experimental
0851 } // namespace ROOT
0852 
0853 #endif // ROOT_RNTupleProcessor