Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:30:01

0001 /// \file ROOT/RNTupleReader.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2024-02-20
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RNTupleReader
0015 #define ROOT_RNTupleReader
0016 
0017 #include <ROOT/RConfig.hxx> // for R__unlikely
0018 #include <ROOT/REntry.hxx>
0019 #include <ROOT/RError.hxx>
0020 #include <ROOT/RNTupleDescriptor.hxx>
0021 #include <ROOT/RNTupleMetrics.hxx>
0022 #include <ROOT/RNTupleModel.hxx>
0023 #include <ROOT/RNTupleReadOptions.hxx>
0024 #include <ROOT/RNTupleTypes.hxx>
0025 #include <ROOT/RNTupleView.hxx>
0026 #include <ROOT/RPageStorage.hxx>
0027 #include <ROOT/RSpan.hxx>
0028 
0029 #include <iostream>
0030 #include <iterator>
0031 #include <memory>
0032 #include <string>
0033 #include <string_view>
0034 
0035 namespace ROOT {
0036 class RNTuple;
0037 
0038 /// Listing of the different options that can be printed by RNTupleReader::GetInfo()
0039 enum class ENTupleInfo {
0040    kSummary,        // The RNTuple name, description, number of entries
0041    kStorageDetails, // size on storage, page sizes, compression factor, etc.
0042    kMetrics,        // internals performance counters, requires that EnableMetrics() was called
0043 };
0044 
0045 // clang-format off
0046 /**
0047 \class ROOT::RNTupleReader
0048 \ingroup NTuple
0049 \brief Reads RNTuple data from storage
0050 
0051 The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
0052 It infers this model from the RNTuple's on-disk metadata, or uses a model imposed by the user.
0053 The latter case allows users to read into a specialized RNTuple model that covers
0054 only a subset of the fields in the RNTuple. The RNTuple model is used when reading complete entries through LoadEntry().
0055 Individual fields can be read as well by instantiating a tree view.
0056 
0057 ~~~ {.cpp}
0058 #include <ROOT/RNTupleReader.hxx>
0059 #include <iostream>
0060 
0061 auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0062 std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
0063 ~~~
0064 */
0065 // clang-format on
0066 class RNTupleReader {
0067 private:
0068    /// Set as the page source's scheduler for parallel page decompression if implicit multi-threading (IMT) is on.
0069    /// Needs to be destructed after the page source is destructed (and thus be declared before)
0070    std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
0071 
0072    std::unique_ptr<Internal::RPageSource> fSource;
0073    /// Needs to be destructed before fSource
0074    std::unique_ptr<ROOT::RNTupleModel> fModel;
0075    /// We use a dedicated on-demand reader for Show(). Printing data uses all the fields
0076    /// from the full model even if the analysis code uses only a subset of fields. The display reader
0077    /// is a clone of the original reader.
0078    std::unique_ptr<RNTupleReader> fDisplayReader;
0079    /// The RNTuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
0080    /// users of RNTupleReader::GetDescriptor().  Instead, if descriptor information is needed, we clone the
0081    /// descriptor.  Using the descriptor's generation number, we know if the cached descriptor is stale.
0082    /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
0083    /// not on a hot code path.
0084    std::optional<ROOT::RNTupleDescriptor> fCachedDescriptor;
0085    /// We know that the RNTupleReader is always reading a single RNTuple, so the number of entries is fixed.
0086    ROOT::NTupleSize_t fNEntries = 0;
0087    Experimental::Detail::RNTupleMetrics fMetrics;
0088    /// If not nullopt, these will be used when creating the model
0089    std::optional<ROOT::RNTupleDescriptor::RCreateModelOptions> fCreateModelOptions;
0090 
0091    RNTupleReader(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
0092                  const ROOT::RNTupleReadOptions &options);
0093    /// The model is generated from the RNTuple metadata on storage.
0094    explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const ROOT::RNTupleReadOptions &options);
0095 
0096    void ConnectModel(ROOT::RNTupleModel &model, bool allowFieldSubstitutions);
0097    RNTupleReader *GetDisplayReader();
0098    void InitPageSource(bool enableMetrics);
0099 
0100    ROOT::DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
0101 
0102 public:
0103    // Browse through the entries
0104    class RIterator {
0105    private:
0106       ROOT::NTupleSize_t fIndex = ROOT::kInvalidNTupleIndex;
0107 
0108    public:
0109       using iterator = RIterator;
0110       using iterator_category = std::input_iterator_tag;
0111       using value_type = ROOT::NTupleSize_t;
0112       using difference_type = std::ptrdiff_t;
0113       using pointer = const ROOT::NTupleSize_t *;
0114       using reference = const ROOT::NTupleSize_t &;
0115 
0116       RIterator() = default;
0117       explicit RIterator(ROOT::NTupleSize_t index) : fIndex(index) {}
0118       ~RIterator() = default;
0119 
0120       iterator operator++(int) /* postfix */
0121       {
0122          auto r = *this;
0123          fIndex++;
0124          return r;
0125       }
0126       iterator &operator++() /* prefix */
0127       {
0128          ++fIndex;
0129          return *this;
0130       }
0131       reference operator*() const { return fIndex; }
0132       pointer operator->() const { return &fIndex; }
0133       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0134       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0135    };
0136 
0137    /// Open an RNTuple for reading.
0138    ///
0139    /// Throws an RException if there is no RNTuple with the given name.
0140    ///
0141    /// **Example: open an RNTuple and print the number of entries**
0142    /// ~~~ {.cpp}
0143    /// #include <ROOT/RNTupleReader.hxx>
0144    /// #include <iostream>
0145    ///
0146    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0147    /// std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
0148    /// ~~~
0149    static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
0150                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0151    static std::unique_ptr<RNTupleReader>
0152    Open(const RNTuple &ntuple, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0153 
0154    /// The caller imposes a model, which must be compatible with the model found in the data on storage.
0155    static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName,
0156                                               std::string_view storage,
0157                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0158    static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, const RNTuple &ntuple,
0159                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0160 
0161    /// The caller imposes the way the model is reconstructed
0162    static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
0163                                               std::string_view ntupleName, std::string_view storage,
0164                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0165    static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
0166                                               const RNTuple &ntuple,
0167                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0168    std::unique_ptr<RNTupleReader> Clone()
0169    {
0170       auto options = ROOT::RNTupleReadOptions{};
0171       options.SetEnableMetrics(fMetrics.IsEnabled());
0172       return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
0173    }
0174    ~RNTupleReader();
0175 
0176    /// Returns the number of entries in this RNTuple.
0177    /// Note that the recommended way to iterate the RNTuple is using
0178    /// ~~~ {.cpp}
0179    /// // RECOMMENDED way to iterate an ntuple
0180    /// for (auto i : reader->GetEntryRange()) { ... }
0181    /// ~~~
0182    /// instead of
0183    /// ~~~ {.cpp}
0184    /// // DISCOURAGED way to iterate an ntuple
0185    /// for (auto i = 0u; i < reader->GetNEntries(); ++i) { ... }
0186    /// ~~~
0187    /// The reason is that determining the number of entries, while currently cheap, may in the future be
0188    /// an expensive operation.
0189    ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
0190    const ROOT::RNTupleModel &GetModel();
0191    std::unique_ptr<ROOT::REntry> CreateEntry();
0192 
0193    /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
0194    /// to LoadEntry() or to any of the views returned from the reader.
0195    const ROOT::RNTupleDescriptor &GetDescriptor();
0196 
0197    /// Prints a detailed summary of the RNTuple, including a list of fields.
0198    ///
0199    /// **Example: print summary information to stdout**
0200    /// ~~~ {.cpp}
0201    /// #include <ROOT/RNTupleReader.hxx>
0202    /// #include <iostream>
0203    ///
0204    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0205    /// reader->PrintInfo();
0206    /// // or, equivalently:
0207    /// reader->PrintInfo(ROOT::ENTupleInfo::kSummary, std::cout);
0208    /// ~~~
0209    /// **Example: print detailed column storage data to stderr**
0210    /// ~~~ {.cpp}
0211    /// #include <ROOT/RNTupleReader.hxx>
0212    /// #include <iostream>
0213    ///
0214    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0215    /// reader->PrintInfo(ROOT::ENTupleInfo::kStorageDetails, std::cerr);
0216    /// ~~~
0217    ///
0218    /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
0219    void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
0220 
0221    /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
0222    /// prints the output in JSON format.
0223    /// Uses the visitor pattern to traverse through each field of the given entry.
0224    void Show(ROOT::NTupleSize_t index, std::ostream &output = std::cout);
0225 
0226    /// Fills the default entry of the model.
0227    /// Raises an exception when `index` is greater than the number of entries present in the RNTuple
0228    void LoadEntry(ROOT::NTupleSize_t index)
0229    {
0230       // TODO(jblomer): can be templated depending on the factory method / constructor
0231       if (R__unlikely(!fModel)) {
0232          // Will create the fModel.
0233          GetModel();
0234       }
0235       LoadEntry(index, fModel->GetDefaultEntry());
0236    }
0237    /// Fills a user provided entry after checking that the entry has been instantiated from the RNTuple model
0238    void LoadEntry(ROOT::NTupleSize_t index, ROOT::REntry &entry)
0239    {
0240       if (R__unlikely(entry.GetModelId() != fModel->GetModelId()))
0241          throw RException(R__FAIL("mismatch between entry and model"));
0242 
0243       entry.Read(index);
0244    }
0245 
0246    /// Returns an iterator over the entry indices of the RNTuple.
0247    ///
0248    /// **Example: iterate over all entries and print each entry in JSON format**
0249    /// ~~~ {.cpp}
0250    /// #include <ROOT/RNTupleReader.hxx>
0251    /// #include <iostream>
0252    ///
0253    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0254    /// for (auto i : ntuple->GetEntryRange()) {
0255    ///    reader->Show(i);
0256    /// }
0257    /// ~~~
0258    ROOT::RNTupleGlobalRange GetEntryRange() { return ROOT::RNTupleGlobalRange(0, GetNEntries()); }
0259 
0260    /// Provides access to an individual (sub)field,
0261    /// e.g. `GetView<Particle>("particle")`, `GetView<double>("particle.pt")` or
0262    /// `GetView<std::vector<Particle>>("particles")`. It is possible to directly get the size of a collection (without
0263    /// reading the collection itself) using RNTupleCardinality:
0264    /// `GetView<ROOT::RNTupleCardinality<std::uint64_t>>("particles")`.
0265    ///
0266    /// Raises an exception if there is no field with the given name.
0267    ///
0268    /// **Example: iterate over a field named "pt" of type `float`**
0269    /// ~~~ {.cpp}
0270    /// #include <ROOT/RNTupleReader.hxx>
0271    /// #include <iostream>
0272    ///
0273    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0274    /// auto pt = reader->GetView<float>("pt");
0275    ///
0276    /// for (auto i : reader->GetEntryRange()) {
0277    ///    std::cout << i << ": " << pt(i) << "\n";
0278    /// }
0279    /// ~~~
0280    ///
0281    /// **Note**: if `T = void`, type checks are disabled. This is not really useful for this overload because
0282    /// RNTupleView<void> does not give access to the pointer. If required, it is possible to provide an `objPtr` of a
0283    /// dynamic type, for example via GetView(std::string_view, void *, std::string_view).
0284    template <typename T>
0285    ROOT::RNTupleView<T> GetView(std::string_view fieldName)
0286    {
0287       return GetView<T>(RetrieveFieldId(fieldName));
0288    }
0289 
0290    /// Provides access to an individual (sub)field, reading its values into `objPtr`.
0291    ///
0292    /// Raises an exception if there is no field with the given name.
0293    ///
0294    /// **Example: iterate over a field named "pt" of type `float`**
0295    /// ~~~ {.cpp}
0296    /// #include <ROOT/RNTupleReader.hxx>
0297    /// #include <iostream>
0298    ///
0299    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0300    /// auto pt = std::make_shared<float>();
0301    /// auto ptView = reader->GetView("pt", pt);
0302    ///
0303    /// for (auto i : reader->GetEntryRange()) {
0304    ///    ptView(i);
0305    ///    std::cout << i << ": " << *pt << "\n";
0306    /// }
0307    /// ~~~
0308    ///
0309    /// **Note**: if `T = void`, type checks are disabled. It is the caller's responsibility to match the field and
0310    /// object types. It is strongly recommended to use an overload that allows passing the `typeName`, such as
0311    /// GetView(std::string_view, void *, std::string_view). This allows type checks with the on-disk metadata and
0312    /// enables automatic schema evolution and conversion rules.
0313    template <typename T>
0314    ROOT::RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
0315    {
0316       return GetView<T>(RetrieveFieldId(fieldName), objPtr);
0317    }
0318 
0319    /// Provides access to an individual (sub)field, reading its values into `rawPtr`.
0320    ///
0321    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0322    template <typename T>
0323    ROOT::RNTupleView<T> GetView(std::string_view fieldName, T *rawPtr)
0324    {
0325       return GetView<T>(RetrieveFieldId(fieldName), rawPtr);
0326    }
0327 
0328    /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `typeName`.
0329    ///
0330    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0331    ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, std::string_view typeName)
0332    {
0333       return GetView(RetrieveFieldId(fieldName), rawPtr, typeName);
0334    }
0335 
0336    /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `ti`.
0337    ///
0338    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0339    ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, const std::type_info &ti)
0340    {
0341       return GetView(RetrieveFieldId(fieldName), rawPtr, ROOT::Internal::GetRenormalizedTypeName(ti));
0342    }
0343 
0344    /// Provides access to an individual (sub)field from its on-disk ID.
0345    ///
0346    /// \sa GetView(std::string_view)
0347    template <typename T>
0348    ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId)
0349    {
0350       auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource);
0351       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0352       return ROOT::RNTupleView<T>(std::move(field), range);
0353    }
0354 
0355    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr`.
0356    ///
0357    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0358    template <typename T>
0359    ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId, std::shared_ptr<T> objPtr)
0360    {
0361       auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource);
0362       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0363       return ROOT::RNTupleView<T>(std::move(field), range, objPtr);
0364    }
0365 
0366    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr`.
0367    ///
0368    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0369    template <typename T>
0370    ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId, T *rawPtr)
0371    {
0372       auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource);
0373       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0374       return ROOT::RNTupleView<T>(std::move(field), range, rawPtr);
0375    }
0376 
0377    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr` as the type
0378    /// provided by `typeName`.
0379    ///
0380    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0381    ROOT::RNTupleView<void> GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, std::string_view typeName)
0382    {
0383       auto field = RNTupleView<void>::CreateField(fieldId, *fSource, typeName);
0384       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0385       return RNTupleView<void>(std::move(field), range, rawPtr);
0386    }
0387 
0388    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr` as the type
0389    /// provided by `ti`.
0390    ///
0391    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0392    ROOT::RNTupleView<void> GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, const std::type_info &ti)
0393    {
0394       return GetView(fieldId, rawPtr, ROOT::Internal::GetRenormalizedTypeName(ti));
0395    }
0396 
0397    /// Provides direct access to the I/O buffers of a **mappable** (sub)field.
0398    ///
0399    /// Raises an exception if there is no field with the given name.
0400    /// Attempting to access the values of a direct-access view for non-mappable fields will yield compilation errors.
0401    ///
0402    /// \sa GetView(std::string_view)
0403    template <typename T>
0404    ROOT::RNTupleDirectAccessView<T> GetDirectAccessView(std::string_view fieldName)
0405    {
0406       return GetDirectAccessView<T>(RetrieveFieldId(fieldName));
0407    }
0408 
0409    /// Provides direct access to the I/O buffers of a **mappable** (sub)field from its on-disk ID.
0410    ///
0411    /// \sa GetDirectAccessView(std::string_view)
0412    template <typename T>
0413    ROOT::RNTupleDirectAccessView<T> GetDirectAccessView(ROOT::DescriptorId_t fieldId)
0414    {
0415       auto field = ROOT::RNTupleDirectAccessView<T>::CreateField(fieldId, *fSource);
0416       auto range = ROOT::Internal::GetFieldRange(field, *fSource);
0417       return ROOT::RNTupleDirectAccessView<T>(std::move(field), range);
0418    }
0419 
0420    /// Provides access to a collection field, that can itself generate new RNTupleViews for its nested fields.
0421    ///
0422    /// Raises an exception if:
0423    /// * there is no field with the given name or,
0424    /// * the field is not a collection
0425    ///
0426    /// \sa GetView(std::string_view)
0427    ROOT::RNTupleCollectionView GetCollectionView(std::string_view fieldName)
0428    {
0429       auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
0430       if (fieldId == ROOT::kInvalidDescriptorId) {
0431          throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
0432                                   fSource->GetSharedDescriptorGuard()->GetName() + "'"));
0433       }
0434       return GetCollectionView(fieldId);
0435    }
0436 
0437    /// Provides access to a collection field from its on-disk ID, that can itself generate new RNTupleViews for its
0438    /// nested fields.
0439    ///
0440    /// \sa GetCollectionView(std::string_view)
0441    ROOT::RNTupleCollectionView GetCollectionView(ROOT::DescriptorId_t fieldId)
0442    {
0443       return ROOT::RNTupleCollectionView::Create(fieldId, fSource.get());
0444    }
0445 
0446    RIterator begin() { return RIterator(0); }
0447    RIterator end() { return RIterator(GetNEntries()); }
0448 
0449    /// Enable performance measurements (decompression time, bytes read from storage, etc.)
0450    ///
0451    /// **Example: inspect the reader metrics after loading every entry**
0452    /// ~~~ {.cpp}
0453    /// #include <ROOT/RNTupleReader.hxx>
0454    /// #include <iostream>
0455    ///
0456    /// auto ntuple = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0457    /// // metrics must be turned on beforehand
0458    /// reader->EnableMetrics();
0459    ///
0460    /// for (auto i : ntuple->GetEntryRange()) {
0461    ///    reader->LoadEntry(i);
0462    /// }
0463    /// reader->PrintInfo(ROOT::ENTupleInfo::kMetrics);
0464    /// ~~~
0465    void EnableMetrics() { fMetrics.Enable(); }
0466    const Experimental::Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
0467 }; // class RNTupleReader
0468 
0469 } // namespace ROOT
0470 
0471 #endif // ROOT_RNTupleReader