Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:14:31

0001 /// \file ROOT/RNTupleReader.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2024-02-20
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RNTupleReader
0015 #define ROOT_RNTupleReader
0016 
0017 #include <ROOT/RConfig.hxx> // for R__unlikely
0018 #include <ROOT/REntry.hxx>
0019 #include <ROOT/RError.hxx>
0020 #include <ROOT/RNTupleDescriptor.hxx>
0021 #include <ROOT/RNTupleMetrics.hxx>
0022 #include <ROOT/RNTupleModel.hxx>
0023 #include <ROOT/RNTupleReadOptions.hxx>
0024 #include <ROOT/RNTupleUtil.hxx>
0025 #include <ROOT/RNTupleView.hxx>
0026 #include <ROOT/RPageStorage.hxx>
0027 #include <ROOT/RSpan.hxx>
0028 
0029 #include <iostream>
0030 #include <iterator>
0031 #include <memory>
0032 #include <string>
0033 #include <string_view>
0034 
0035 namespace ROOT {
0036 class RNTuple;
0037 
0038 /// Listing of the different options that can be printed by RNTupleReader::GetInfo()
0039 enum class ENTupleInfo {
0040    kSummary,        // The RNTuple name, description, number of entries
0041    kStorageDetails, // size on storage, page sizes, compression factor, etc.
0042    kMetrics,        // internals performance counters, requires that EnableMetrics() was called
0043 };
0044 
0045 // clang-format off
0046 /**
0047 \class ROOT::RNTupleReader
0048 \ingroup NTuple
0049 \brief Reads RNTuple data from storage
0050 
0051 The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel.
0052 It infers this model from the RNTuple's on-disk metadata, or uses a model imposed by the user.
0053 The latter case allows users to read into a specialized RNTuple model that covers
0054 only a subset of the fields in the RNTuple. The RNTuple model is used when reading complete entries through LoadEntry().
0055 Individual fields can be read as well by instantiating a tree view.
0056 
0057 ~~~ {.cpp}
0058 #include <ROOT/RNTupleReader.hxx>
0059 #include <iostream>
0060 
0061 auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0062 std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
0063 ~~~
0064 */
0065 // clang-format on
0066 class RNTupleReader {
0067 private:
0068    /// Set as the page source's scheduler for parallel page decompression if implicit multi-threading (IMT) is on.
0069    /// Needs to be destructed after the page source is destructed (and thus be declared before)
0070    std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks;
0071 
0072    std::unique_ptr<Internal::RPageSource> fSource;
0073    /// Needs to be destructed before fSource
0074    std::unique_ptr<ROOT::RNTupleModel> fModel;
0075    /// We use a dedicated on-demand reader for Show(). Printing data uses all the fields
0076    /// from the full model even if the analysis code uses only a subset of fields. The display reader
0077    /// is a clone of the original reader.
0078    std::unique_ptr<RNTupleReader> fDisplayReader;
0079    /// The RNTuple descriptor in the page source is protected by a read-write lock. We don't expose that to the
0080    /// users of RNTupleReader::GetDescriptor().  Instead, if descriptor information is needed, we clone the
0081    /// descriptor.  Using the descriptor's generation number, we know if the cached descriptor is stale.
0082    /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes,
0083    /// not on a hot code path.
0084    std::optional<ROOT::RNTupleDescriptor> fCachedDescriptor;
0085    Experimental::Detail::RNTupleMetrics fMetrics;
0086    /// If not nullopt, these will be used when creating the model
0087    std::optional<ROOT::RNTupleDescriptor::RCreateModelOptions> fCreateModelOptions;
0088 
0089    RNTupleReader(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source,
0090                  const ROOT::RNTupleReadOptions &options);
0091    /// The model is generated from the RNTuple metadata on storage.
0092    explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const ROOT::RNTupleReadOptions &options);
0093 
0094    void ConnectModel(ROOT::RNTupleModel &model);
0095    RNTupleReader *GetDisplayReader();
0096    void InitPageSource(bool enableMetrics);
0097 
0098    ROOT::DescriptorId_t RetrieveFieldId(std::string_view fieldName) const;
0099 
0100 public:
0101    // Browse through the entries
0102    class RIterator {
0103    private:
0104       ROOT::NTupleSize_t fIndex = ROOT::kInvalidNTupleIndex;
0105 
0106    public:
0107       using iterator = RIterator;
0108       using iterator_category = std::forward_iterator_tag;
0109       using value_type = ROOT::NTupleSize_t;
0110       using difference_type = ROOT::NTupleSize_t;
0111       using pointer = ROOT::NTupleSize_t *;
0112       using reference = ROOT::NTupleSize_t &;
0113 
0114       RIterator() = default;
0115       explicit RIterator(ROOT::NTupleSize_t index) : fIndex(index) {}
0116       ~RIterator() = default;
0117 
0118       iterator operator++(int) /* postfix */
0119       {
0120          auto r = *this;
0121          fIndex++;
0122          return r;
0123       }
0124       iterator &operator++() /* prefix */
0125       {
0126          ++fIndex;
0127          return *this;
0128       }
0129       reference operator*() { return fIndex; }
0130       pointer operator->() { return &fIndex; }
0131       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0132       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0133    };
0134 
0135    /// Open an RNTuple for reading.
0136    ///
0137    /// Throws an RException if there is no RNTuple with the given name.
0138    ///
0139    /// **Example: open an RNTuple and print the number of entries**
0140    /// ~~~ {.cpp}
0141    /// #include <ROOT/RNTupleReader.hxx>
0142    /// #include <iostream>
0143    ///
0144    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0145    /// std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n";
0146    /// ~~~
0147    static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage,
0148                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0149    static std::unique_ptr<RNTupleReader>
0150    Open(const RNTuple &ntuple, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0151 
0152    /// The caller imposes a model, which must be compatible with the model found in the data on storage.
0153    static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName,
0154                                               std::string_view storage,
0155                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0156    static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, const RNTuple &ntuple,
0157                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0158 
0159    /// The caller imposes the way the model is reconstructed
0160    static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
0161                                               std::string_view ntupleName, std::string_view storage,
0162                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0163    static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts,
0164                                               const RNTuple &ntuple,
0165                                               const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0166    std::unique_ptr<RNTupleReader> Clone()
0167    {
0168       auto options = ROOT::RNTupleReadOptions{};
0169       options.SetEnableMetrics(fMetrics.IsEnabled());
0170       return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options));
0171    }
0172    ~RNTupleReader();
0173 
0174    ROOT::NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); }
0175    const ROOT::RNTupleModel &GetModel();
0176    std::unique_ptr<ROOT::REntry> CreateEntry();
0177 
0178    /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call
0179    /// to LoadEntry() or to any of the views returned from the reader.
0180    const ROOT::RNTupleDescriptor &GetDescriptor();
0181 
0182    /// Prints a detailed summary of the RNTuple, including a list of fields.
0183    ///
0184    /// **Example: print summary information to stdout**
0185    /// ~~~ {.cpp}
0186    /// #include <ROOT/RNTupleReader.hxx>
0187    /// #include <iostream>
0188    ///
0189    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0190    /// reader->PrintInfo();
0191    /// // or, equivalently:
0192    /// reader->PrintInfo(ROOT::ENTupleInfo::kSummary, std::cout);
0193    /// ~~~
0194    /// **Example: print detailed column storage data to stderr**
0195    /// ~~~ {.cpp}
0196    /// #include <ROOT/RNTupleReader.hxx>
0197    /// #include <iostream>
0198    ///
0199    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0200    /// reader->PrintInfo(ROOT::ENTupleInfo::kStorageDetails, std::cerr);
0201    /// ~~~
0202    ///
0203    /// For use of ENTupleInfo::kMetrics, see #EnableMetrics.
0204    void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const;
0205 
0206    /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default,
0207    /// prints the output in JSON format.
0208    /// Uses the visitor pattern to traverse through each field of the given entry.
0209    void Show(ROOT::NTupleSize_t index, std::ostream &output = std::cout);
0210 
0211    /// Fills the default entry of the model.
0212    /// Raises an exception when `index` is greater than the number of entries present in the RNTuple
0213    void LoadEntry(ROOT::NTupleSize_t index)
0214    {
0215       // TODO(jblomer): can be templated depending on the factory method / constructor
0216       if (R__unlikely(!fModel)) {
0217          fModel = fSource->GetSharedDescriptorGuard()->CreateModel(
0218             fCreateModelOptions.value_or(ROOT::RNTupleDescriptor::RCreateModelOptions{}));
0219          ConnectModel(*fModel);
0220       }
0221       LoadEntry(index, fModel->GetDefaultEntry());
0222    }
0223    /// Fills a user provided entry after checking that the entry has been instantiated from the RNTuple model
0224    void LoadEntry(ROOT::NTupleSize_t index, ROOT::REntry &entry)
0225    {
0226       if (R__unlikely(entry.GetModelId() != fModel->GetModelId()))
0227          throw RException(R__FAIL("mismatch between entry and model"));
0228 
0229       entry.Read(index);
0230    }
0231 
0232    /// Returns an iterator over the entry indices of the RNTuple.
0233    ///
0234    /// **Example: iterate over all entries and print each entry in JSON format**
0235    /// ~~~ {.cpp}
0236    /// #include <ROOT/RNTupleReader.hxx>
0237    /// #include <iostream>
0238    ///
0239    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0240    /// for (auto i : ntuple->GetEntryRange()) {
0241    ///    reader->Show(i);
0242    /// }
0243    /// ~~~
0244    ROOT::RNTupleGlobalRange GetEntryRange() { return ROOT::RNTupleGlobalRange(0, GetNEntries()); }
0245 
0246    /// Provides access to an individual (sub)field,
0247    /// e.g. `GetView<Particle>("particle")`, `GetView<double>("particle.pt")` or
0248    /// `GetView<std::vector<Particle>>("particles")`. It is possible to directly get the size of a collection (without
0249    /// reading the collection itself) using RNTupleCardinality:
0250    /// `GetView<ROOT::RNTupleCardinality<std::uint64_t>>("particles")`.
0251    ///
0252    /// Raises an exception if there is no field with the given name.
0253    ///
0254    /// **Example: iterate over a field named "pt" of type `float`**
0255    /// ~~~ {.cpp}
0256    /// #include <ROOT/RNTupleReader.hxx>
0257    /// #include <iostream>
0258    ///
0259    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0260    /// auto pt = reader->GetView<float>("pt");
0261    ///
0262    /// for (auto i : reader->GetEntryRange()) {
0263    ///    std::cout << i << ": " << pt(i) << "\n";
0264    /// }
0265    /// ~~~
0266    ///
0267    /// **Note**: if `T = void`, type checks are disabled. This is not really useful for this overload because
0268    /// RNTupleView<void> does not give access to the pointer. If required, it is possible to provide an `objPtr` of a
0269    /// dynamic type, for example via GetView(std::string_view, void *, std::string_view).
0270    template <typename T>
0271    ROOT::RNTupleView<T> GetView(std::string_view fieldName)
0272    {
0273       return GetView<T>(RetrieveFieldId(fieldName));
0274    }
0275 
0276    /// Provides access to an individual (sub)field, reading its values into `objPtr`.
0277    ///
0278    /// Raises an exception if there is no field with the given name.
0279    ///
0280    /// **Example: iterate over a field named "pt" of type `float`**
0281    /// ~~~ {.cpp}
0282    /// #include <ROOT/RNTupleReader.hxx>
0283    /// #include <iostream>
0284    ///
0285    /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0286    /// auto pt = std::make_shared<float>();
0287    /// auto ptView = reader->GetView("pt", pt);
0288    ///
0289    /// for (auto i : reader->GetEntryRange()) {
0290    ///    ptView(i);
0291    ///    std::cout << i << ": " << *pt << "\n";
0292    /// }
0293    /// ~~~
0294    ///
0295    /// **Note**: if `T = void`, type checks are disabled. It is the caller's responsibility to match the field and
0296    /// object types. It is strongly recommended to use an overload that allows passing the `typeName`, such as
0297    /// GetView(std::string_view, void *, std::string_view). This allows type checks with the on-disk metadata and
0298    /// enables automatic schema evolution and conversion rules.
0299    template <typename T>
0300    ROOT::RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr)
0301    {
0302       return GetView<T>(RetrieveFieldId(fieldName), objPtr);
0303    }
0304 
0305    /// Provides access to an individual (sub)field, reading its values into `rawPtr`.
0306    ///
0307    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0308    template <typename T>
0309    ROOT::RNTupleView<T> GetView(std::string_view fieldName, T *rawPtr)
0310    {
0311       return GetView<T>(RetrieveFieldId(fieldName), rawPtr);
0312    }
0313 
0314    /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `typeName`.
0315    ///
0316    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0317    ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, std::string_view typeName)
0318    {
0319       return GetView(RetrieveFieldId(fieldName), rawPtr, typeName);
0320    }
0321 
0322    /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `ti`.
0323    ///
0324    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0325    ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, const std::type_info &ti)
0326    {
0327       return GetView(RetrieveFieldId(fieldName), rawPtr, ROOT::Internal::GetRenormalizedDemangledTypeName(ti));
0328    }
0329 
0330    /// Provides access to an individual (sub)field from its on-disk ID.
0331    ///
0332    /// \sa GetView(std::string_view)
0333    template <typename T>
0334    ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId)
0335    {
0336       auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource);
0337       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0338       return ROOT::RNTupleView<T>(std::move(field), range);
0339    }
0340 
0341    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr`.
0342    ///
0343    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0344    template <typename T>
0345    ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId, std::shared_ptr<T> objPtr)
0346    {
0347       auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource);
0348       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0349       return ROOT::RNTupleView<T>(std::move(field), range, objPtr);
0350    }
0351 
0352    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr`.
0353    ///
0354    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0355    template <typename T>
0356    ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId, T *rawPtr)
0357    {
0358       auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource);
0359       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0360       return ROOT::RNTupleView<T>(std::move(field), range, rawPtr);
0361    }
0362 
0363    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr` as the type
0364    /// provided by `typeName`.
0365    ///
0366    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0367    ROOT::RNTupleView<void> GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, std::string_view typeName)
0368    {
0369       auto field = RNTupleView<void>::CreateField(fieldId, *fSource, typeName);
0370       auto range = ROOT::Internal::GetFieldRange(*field, *fSource);
0371       return RNTupleView<void>(std::move(field), range, rawPtr);
0372    }
0373 
0374    /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr` as the type
0375    /// provided by `ti`.
0376    ///
0377    /// \sa GetView(std::string_view, std::shared_ptr<T>)
0378    ROOT::RNTupleView<void> GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, const std::type_info &ti)
0379    {
0380       return GetView(fieldId, rawPtr, ROOT::Internal::GetRenormalizedDemangledTypeName(ti));
0381    }
0382 
0383    /// Provides direct access to the I/O buffers of a **mappable** (sub)field.
0384    ///
0385    /// Raises an exception if there is no field with the given name.
0386    /// Attempting to access the values of a direct-access view for non-mappable fields will yield compilation errors.
0387    ///
0388    /// \sa GetView(std::string_view)
0389    template <typename T>
0390    ROOT::RNTupleDirectAccessView<T> GetDirectAccessView(std::string_view fieldName)
0391    {
0392       return GetDirectAccessView<T>(RetrieveFieldId(fieldName));
0393    }
0394 
0395    /// Provides direct access to the I/O buffers of a **mappable** (sub)field from its on-disk ID.
0396    ///
0397    /// \sa GetDirectAccessView(std::string_view)
0398    template <typename T>
0399    ROOT::RNTupleDirectAccessView<T> GetDirectAccessView(ROOT::DescriptorId_t fieldId)
0400    {
0401       auto field = ROOT::RNTupleDirectAccessView<T>::CreateField(fieldId, *fSource);
0402       auto range = ROOT::Internal::GetFieldRange(field, *fSource);
0403       return ROOT::RNTupleDirectAccessView<T>(std::move(field), range);
0404    }
0405 
0406    /// Provides access to a collection field, that can itself generate new RNTupleViews for its nested fields.
0407    ///
0408    /// Raises an exception if:
0409    /// * there is no field with the given name or,
0410    /// * the field is not a collection
0411    ///
0412    /// \sa GetView(std::string_view)
0413    ROOT::RNTupleCollectionView GetCollectionView(std::string_view fieldName)
0414    {
0415       auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName);
0416       if (fieldId == ROOT::kInvalidDescriptorId) {
0417          throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" +
0418                                   fSource->GetSharedDescriptorGuard()->GetName() + "'"));
0419       }
0420       return GetCollectionView(fieldId);
0421    }
0422 
0423    /// Provides access to a collection field from its on-disk ID, that can itself generate new RNTupleViews for its
0424    /// nested fields.
0425    ///
0426    /// \sa GetCollectionView(std::string_view)
0427    ROOT::RNTupleCollectionView GetCollectionView(ROOT::DescriptorId_t fieldId)
0428    {
0429       return ROOT::RNTupleCollectionView::Create(fieldId, fSource.get());
0430    }
0431 
0432    RIterator begin() { return RIterator(0); }
0433    RIterator end() { return RIterator(GetNEntries()); }
0434 
0435    /// Enable performance measurements (decompression time, bytes read from storage, etc.)
0436    ///
0437    /// **Example: inspect the reader metrics after loading every entry**
0438    /// ~~~ {.cpp}
0439    /// #include <ROOT/RNTupleReader.hxx>
0440    /// #include <iostream>
0441    ///
0442    /// auto ntuple = ROOT::RNTupleReader::Open("myNTuple", "some/file.root");
0443    /// // metrics must be turned on beforehand
0444    /// reader->EnableMetrics();
0445    ///
0446    /// for (auto i : ntuple->GetEntryRange()) {
0447    ///    reader->LoadEntry(i);
0448    /// }
0449    /// reader->PrintInfo(ROOT::ENTupleInfo::kMetrics);
0450    /// ~~~
0451    void EnableMetrics() { fMetrics.Enable(); }
0452    const Experimental::Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
0453 }; // class RNTupleReader
0454 
0455 } // namespace ROOT
0456 
0457 #endif // ROOT_RNTupleReader