|
|
|||
File indexing completed on 2025-12-16 10:30:01
0001 /// \file ROOT/RNTupleReader.hxx 0002 /// \ingroup NTuple 0003 /// \author Jakob Blomer <jblomer@cern.ch> 0004 /// \date 2024-02-20 0005 0006 /************************************************************************* 0007 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * 0008 * All rights reserved. * 0009 * * 0010 * For the licensing terms see $ROOTSYS/LICENSE. * 0011 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0012 *************************************************************************/ 0013 0014 #ifndef ROOT_RNTupleReader 0015 #define ROOT_RNTupleReader 0016 0017 #include <ROOT/RConfig.hxx> // for R__unlikely 0018 #include <ROOT/REntry.hxx> 0019 #include <ROOT/RError.hxx> 0020 #include <ROOT/RNTupleDescriptor.hxx> 0021 #include <ROOT/RNTupleMetrics.hxx> 0022 #include <ROOT/RNTupleModel.hxx> 0023 #include <ROOT/RNTupleReadOptions.hxx> 0024 #include <ROOT/RNTupleTypes.hxx> 0025 #include <ROOT/RNTupleView.hxx> 0026 #include <ROOT/RPageStorage.hxx> 0027 #include <ROOT/RSpan.hxx> 0028 0029 #include <iostream> 0030 #include <iterator> 0031 #include <memory> 0032 #include <string> 0033 #include <string_view> 0034 0035 namespace ROOT { 0036 class RNTuple; 0037 0038 /// Listing of the different options that can be printed by RNTupleReader::GetInfo() 0039 enum class ENTupleInfo { 0040 kSummary, // The RNTuple name, description, number of entries 0041 kStorageDetails, // size on storage, page sizes, compression factor, etc. 0042 kMetrics, // internals performance counters, requires that EnableMetrics() was called 0043 }; 0044 0045 // clang-format off 0046 /** 0047 \class ROOT::RNTupleReader 0048 \ingroup NTuple 0049 \brief Reads RNTuple data from storage 0050 0051 The RNTupleReader provides access to data stored in the RNTuple binary format as C++ objects, using an RNTupleModel. 0052 It infers this model from the RNTuple's on-disk metadata, or uses a model imposed by the user. 0053 The latter case allows users to read into a specialized RNTuple model that covers 0054 only a subset of the fields in the RNTuple. The RNTuple model is used when reading complete entries through LoadEntry(). 0055 Individual fields can be read as well by instantiating a tree view. 0056 0057 ~~~ {.cpp} 0058 #include <ROOT/RNTupleReader.hxx> 0059 #include <iostream> 0060 0061 auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0062 std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n"; 0063 ~~~ 0064 */ 0065 // clang-format on 0066 class RNTupleReader { 0067 private: 0068 /// Set as the page source's scheduler for parallel page decompression if implicit multi-threading (IMT) is on. 0069 /// Needs to be destructed after the page source is destructed (and thus be declared before) 0070 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks; 0071 0072 std::unique_ptr<Internal::RPageSource> fSource; 0073 /// Needs to be destructed before fSource 0074 std::unique_ptr<ROOT::RNTupleModel> fModel; 0075 /// We use a dedicated on-demand reader for Show(). Printing data uses all the fields 0076 /// from the full model even if the analysis code uses only a subset of fields. The display reader 0077 /// is a clone of the original reader. 0078 std::unique_ptr<RNTupleReader> fDisplayReader; 0079 /// The RNTuple descriptor in the page source is protected by a read-write lock. We don't expose that to the 0080 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the 0081 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale. 0082 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes, 0083 /// not on a hot code path. 0084 std::optional<ROOT::RNTupleDescriptor> fCachedDescriptor; 0085 /// We know that the RNTupleReader is always reading a single RNTuple, so the number of entries is fixed. 0086 ROOT::NTupleSize_t fNEntries = 0; 0087 Experimental::Detail::RNTupleMetrics fMetrics; 0088 /// If not nullopt, these will be used when creating the model 0089 std::optional<ROOT::RNTupleDescriptor::RCreateModelOptions> fCreateModelOptions; 0090 0091 RNTupleReader(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source, 0092 const ROOT::RNTupleReadOptions &options); 0093 /// The model is generated from the RNTuple metadata on storage. 0094 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source, const ROOT::RNTupleReadOptions &options); 0095 0096 void ConnectModel(ROOT::RNTupleModel &model, bool allowFieldSubstitutions); 0097 RNTupleReader *GetDisplayReader(); 0098 void InitPageSource(bool enableMetrics); 0099 0100 ROOT::DescriptorId_t RetrieveFieldId(std::string_view fieldName) const; 0101 0102 public: 0103 // Browse through the entries 0104 class RIterator { 0105 private: 0106 ROOT::NTupleSize_t fIndex = ROOT::kInvalidNTupleIndex; 0107 0108 public: 0109 using iterator = RIterator; 0110 using iterator_category = std::input_iterator_tag; 0111 using value_type = ROOT::NTupleSize_t; 0112 using difference_type = std::ptrdiff_t; 0113 using pointer = const ROOT::NTupleSize_t *; 0114 using reference = const ROOT::NTupleSize_t &; 0115 0116 RIterator() = default; 0117 explicit RIterator(ROOT::NTupleSize_t index) : fIndex(index) {} 0118 ~RIterator() = default; 0119 0120 iterator operator++(int) /* postfix */ 0121 { 0122 auto r = *this; 0123 fIndex++; 0124 return r; 0125 } 0126 iterator &operator++() /* prefix */ 0127 { 0128 ++fIndex; 0129 return *this; 0130 } 0131 reference operator*() const { return fIndex; } 0132 pointer operator->() const { return &fIndex; } 0133 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; } 0134 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; } 0135 }; 0136 0137 /// Open an RNTuple for reading. 0138 /// 0139 /// Throws an RException if there is no RNTuple with the given name. 0140 /// 0141 /// **Example: open an RNTuple and print the number of entries** 0142 /// ~~~ {.cpp} 0143 /// #include <ROOT/RNTupleReader.hxx> 0144 /// #include <iostream> 0145 /// 0146 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0147 /// std::cout << "myNTuple has " << reader->GetNEntries() << " entries\n"; 0148 /// ~~~ 0149 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage, 0150 const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions()); 0151 static std::unique_ptr<RNTupleReader> 0152 Open(const RNTuple &ntuple, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions()); 0153 0154 /// The caller imposes a model, which must be compatible with the model found in the data on storage. 0155 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName, 0156 std::string_view storage, 0157 const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions()); 0158 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<ROOT::RNTupleModel> model, const RNTuple &ntuple, 0159 const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions()); 0160 0161 /// The caller imposes the way the model is reconstructed 0162 static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts, 0163 std::string_view ntupleName, std::string_view storage, 0164 const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions()); 0165 static std::unique_ptr<RNTupleReader> Open(const ROOT::RNTupleDescriptor::RCreateModelOptions &createModelOpts, 0166 const RNTuple &ntuple, 0167 const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions()); 0168 std::unique_ptr<RNTupleReader> Clone() 0169 { 0170 auto options = ROOT::RNTupleReadOptions{}; 0171 options.SetEnableMetrics(fMetrics.IsEnabled()); 0172 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone(), options)); 0173 } 0174 ~RNTupleReader(); 0175 0176 /// Returns the number of entries in this RNTuple. 0177 /// Note that the recommended way to iterate the RNTuple is using 0178 /// ~~~ {.cpp} 0179 /// // RECOMMENDED way to iterate an ntuple 0180 /// for (auto i : reader->GetEntryRange()) { ... } 0181 /// ~~~ 0182 /// instead of 0183 /// ~~~ {.cpp} 0184 /// // DISCOURAGED way to iterate an ntuple 0185 /// for (auto i = 0u; i < reader->GetNEntries(); ++i) { ... } 0186 /// ~~~ 0187 /// The reason is that determining the number of entries, while currently cheap, may in the future be 0188 /// an expensive operation. 0189 ROOT::NTupleSize_t GetNEntries() const { return fNEntries; } 0190 const ROOT::RNTupleModel &GetModel(); 0191 std::unique_ptr<ROOT::REntry> CreateEntry(); 0192 0193 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call 0194 /// to LoadEntry() or to any of the views returned from the reader. 0195 const ROOT::RNTupleDescriptor &GetDescriptor(); 0196 0197 /// Prints a detailed summary of the RNTuple, including a list of fields. 0198 /// 0199 /// **Example: print summary information to stdout** 0200 /// ~~~ {.cpp} 0201 /// #include <ROOT/RNTupleReader.hxx> 0202 /// #include <iostream> 0203 /// 0204 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0205 /// reader->PrintInfo(); 0206 /// // or, equivalently: 0207 /// reader->PrintInfo(ROOT::ENTupleInfo::kSummary, std::cout); 0208 /// ~~~ 0209 /// **Example: print detailed column storage data to stderr** 0210 /// ~~~ {.cpp} 0211 /// #include <ROOT/RNTupleReader.hxx> 0212 /// #include <iostream> 0213 /// 0214 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0215 /// reader->PrintInfo(ROOT::ENTupleInfo::kStorageDetails, std::cerr); 0216 /// ~~~ 0217 /// 0218 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics. 0219 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout) const; 0220 0221 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default, 0222 /// prints the output in JSON format. 0223 /// Uses the visitor pattern to traverse through each field of the given entry. 0224 void Show(ROOT::NTupleSize_t index, std::ostream &output = std::cout); 0225 0226 /// Fills the default entry of the model. 0227 /// Raises an exception when `index` is greater than the number of entries present in the RNTuple 0228 void LoadEntry(ROOT::NTupleSize_t index) 0229 { 0230 // TODO(jblomer): can be templated depending on the factory method / constructor 0231 if (R__unlikely(!fModel)) { 0232 // Will create the fModel. 0233 GetModel(); 0234 } 0235 LoadEntry(index, fModel->GetDefaultEntry()); 0236 } 0237 /// Fills a user provided entry after checking that the entry has been instantiated from the RNTuple model 0238 void LoadEntry(ROOT::NTupleSize_t index, ROOT::REntry &entry) 0239 { 0240 if (R__unlikely(entry.GetModelId() != fModel->GetModelId())) 0241 throw RException(R__FAIL("mismatch between entry and model")); 0242 0243 entry.Read(index); 0244 } 0245 0246 /// Returns an iterator over the entry indices of the RNTuple. 0247 /// 0248 /// **Example: iterate over all entries and print each entry in JSON format** 0249 /// ~~~ {.cpp} 0250 /// #include <ROOT/RNTupleReader.hxx> 0251 /// #include <iostream> 0252 /// 0253 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0254 /// for (auto i : ntuple->GetEntryRange()) { 0255 /// reader->Show(i); 0256 /// } 0257 /// ~~~ 0258 ROOT::RNTupleGlobalRange GetEntryRange() { return ROOT::RNTupleGlobalRange(0, GetNEntries()); } 0259 0260 /// Provides access to an individual (sub)field, 0261 /// e.g. `GetView<Particle>("particle")`, `GetView<double>("particle.pt")` or 0262 /// `GetView<std::vector<Particle>>("particles")`. It is possible to directly get the size of a collection (without 0263 /// reading the collection itself) using RNTupleCardinality: 0264 /// `GetView<ROOT::RNTupleCardinality<std::uint64_t>>("particles")`. 0265 /// 0266 /// Raises an exception if there is no field with the given name. 0267 /// 0268 /// **Example: iterate over a field named "pt" of type `float`** 0269 /// ~~~ {.cpp} 0270 /// #include <ROOT/RNTupleReader.hxx> 0271 /// #include <iostream> 0272 /// 0273 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0274 /// auto pt = reader->GetView<float>("pt"); 0275 /// 0276 /// for (auto i : reader->GetEntryRange()) { 0277 /// std::cout << i << ": " << pt(i) << "\n"; 0278 /// } 0279 /// ~~~ 0280 /// 0281 /// **Note**: if `T = void`, type checks are disabled. This is not really useful for this overload because 0282 /// RNTupleView<void> does not give access to the pointer. If required, it is possible to provide an `objPtr` of a 0283 /// dynamic type, for example via GetView(std::string_view, void *, std::string_view). 0284 template <typename T> 0285 ROOT::RNTupleView<T> GetView(std::string_view fieldName) 0286 { 0287 return GetView<T>(RetrieveFieldId(fieldName)); 0288 } 0289 0290 /// Provides access to an individual (sub)field, reading its values into `objPtr`. 0291 /// 0292 /// Raises an exception if there is no field with the given name. 0293 /// 0294 /// **Example: iterate over a field named "pt" of type `float`** 0295 /// ~~~ {.cpp} 0296 /// #include <ROOT/RNTupleReader.hxx> 0297 /// #include <iostream> 0298 /// 0299 /// auto reader = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0300 /// auto pt = std::make_shared<float>(); 0301 /// auto ptView = reader->GetView("pt", pt); 0302 /// 0303 /// for (auto i : reader->GetEntryRange()) { 0304 /// ptView(i); 0305 /// std::cout << i << ": " << *pt << "\n"; 0306 /// } 0307 /// ~~~ 0308 /// 0309 /// **Note**: if `T = void`, type checks are disabled. It is the caller's responsibility to match the field and 0310 /// object types. It is strongly recommended to use an overload that allows passing the `typeName`, such as 0311 /// GetView(std::string_view, void *, std::string_view). This allows type checks with the on-disk metadata and 0312 /// enables automatic schema evolution and conversion rules. 0313 template <typename T> 0314 ROOT::RNTupleView<T> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr) 0315 { 0316 return GetView<T>(RetrieveFieldId(fieldName), objPtr); 0317 } 0318 0319 /// Provides access to an individual (sub)field, reading its values into `rawPtr`. 0320 /// 0321 /// \sa GetView(std::string_view, std::shared_ptr<T>) 0322 template <typename T> 0323 ROOT::RNTupleView<T> GetView(std::string_view fieldName, T *rawPtr) 0324 { 0325 return GetView<T>(RetrieveFieldId(fieldName), rawPtr); 0326 } 0327 0328 /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `typeName`. 0329 /// 0330 /// \sa GetView(std::string_view, std::shared_ptr<T>) 0331 ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, std::string_view typeName) 0332 { 0333 return GetView(RetrieveFieldId(fieldName), rawPtr, typeName); 0334 } 0335 0336 /// Provides access to an individual (sub)field, reading its values into `rawPtr` as the type provided by `ti`. 0337 /// 0338 /// \sa GetView(std::string_view, std::shared_ptr<T>) 0339 ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, const std::type_info &ti) 0340 { 0341 return GetView(RetrieveFieldId(fieldName), rawPtr, ROOT::Internal::GetRenormalizedTypeName(ti)); 0342 } 0343 0344 /// Provides access to an individual (sub)field from its on-disk ID. 0345 /// 0346 /// \sa GetView(std::string_view) 0347 template <typename T> 0348 ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId) 0349 { 0350 auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource); 0351 auto range = ROOT::Internal::GetFieldRange(*field, *fSource); 0352 return ROOT::RNTupleView<T>(std::move(field), range); 0353 } 0354 0355 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr`. 0356 /// 0357 /// \sa GetView(std::string_view, std::shared_ptr<T>) 0358 template <typename T> 0359 ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId, std::shared_ptr<T> objPtr) 0360 { 0361 auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource); 0362 auto range = ROOT::Internal::GetFieldRange(*field, *fSource); 0363 return ROOT::RNTupleView<T>(std::move(field), range, objPtr); 0364 } 0365 0366 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr`. 0367 /// 0368 /// \sa GetView(std::string_view, std::shared_ptr<T>) 0369 template <typename T> 0370 ROOT::RNTupleView<T> GetView(ROOT::DescriptorId_t fieldId, T *rawPtr) 0371 { 0372 auto field = ROOT::RNTupleView<T>::CreateField(fieldId, *fSource); 0373 auto range = ROOT::Internal::GetFieldRange(*field, *fSource); 0374 return ROOT::RNTupleView<T>(std::move(field), range, rawPtr); 0375 } 0376 0377 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `rawPtr` as the type 0378 /// provided by `typeName`. 0379 /// 0380 /// \sa GetView(std::string_view, std::shared_ptr<T>) 0381 ROOT::RNTupleView<void> GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, std::string_view typeName) 0382 { 0383 auto field = RNTupleView<void>::CreateField(fieldId, *fSource, typeName); 0384 auto range = ROOT::Internal::GetFieldRange(*field, *fSource); 0385 return RNTupleView<void>(std::move(field), range, rawPtr); 0386 } 0387 0388 /// Provides access to an individual (sub)field from its on-disk ID, reading its values into `objPtr` as the type 0389 /// provided by `ti`. 0390 /// 0391 /// \sa GetView(std::string_view, std::shared_ptr<T>) 0392 ROOT::RNTupleView<void> GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, const std::type_info &ti) 0393 { 0394 return GetView(fieldId, rawPtr, ROOT::Internal::GetRenormalizedTypeName(ti)); 0395 } 0396 0397 /// Provides direct access to the I/O buffers of a **mappable** (sub)field. 0398 /// 0399 /// Raises an exception if there is no field with the given name. 0400 /// Attempting to access the values of a direct-access view for non-mappable fields will yield compilation errors. 0401 /// 0402 /// \sa GetView(std::string_view) 0403 template <typename T> 0404 ROOT::RNTupleDirectAccessView<T> GetDirectAccessView(std::string_view fieldName) 0405 { 0406 return GetDirectAccessView<T>(RetrieveFieldId(fieldName)); 0407 } 0408 0409 /// Provides direct access to the I/O buffers of a **mappable** (sub)field from its on-disk ID. 0410 /// 0411 /// \sa GetDirectAccessView(std::string_view) 0412 template <typename T> 0413 ROOT::RNTupleDirectAccessView<T> GetDirectAccessView(ROOT::DescriptorId_t fieldId) 0414 { 0415 auto field = ROOT::RNTupleDirectAccessView<T>::CreateField(fieldId, *fSource); 0416 auto range = ROOT::Internal::GetFieldRange(field, *fSource); 0417 return ROOT::RNTupleDirectAccessView<T>(std::move(field), range); 0418 } 0419 0420 /// Provides access to a collection field, that can itself generate new RNTupleViews for its nested fields. 0421 /// 0422 /// Raises an exception if: 0423 /// * there is no field with the given name or, 0424 /// * the field is not a collection 0425 /// 0426 /// \sa GetView(std::string_view) 0427 ROOT::RNTupleCollectionView GetCollectionView(std::string_view fieldName) 0428 { 0429 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName); 0430 if (fieldId == ROOT::kInvalidDescriptorId) { 0431 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" + 0432 fSource->GetSharedDescriptorGuard()->GetName() + "'")); 0433 } 0434 return GetCollectionView(fieldId); 0435 } 0436 0437 /// Provides access to a collection field from its on-disk ID, that can itself generate new RNTupleViews for its 0438 /// nested fields. 0439 /// 0440 /// \sa GetCollectionView(std::string_view) 0441 ROOT::RNTupleCollectionView GetCollectionView(ROOT::DescriptorId_t fieldId) 0442 { 0443 return ROOT::RNTupleCollectionView::Create(fieldId, fSource.get()); 0444 } 0445 0446 RIterator begin() { return RIterator(0); } 0447 RIterator end() { return RIterator(GetNEntries()); } 0448 0449 /// Enable performance measurements (decompression time, bytes read from storage, etc.) 0450 /// 0451 /// **Example: inspect the reader metrics after loading every entry** 0452 /// ~~~ {.cpp} 0453 /// #include <ROOT/RNTupleReader.hxx> 0454 /// #include <iostream> 0455 /// 0456 /// auto ntuple = ROOT::RNTupleReader::Open("myNTuple", "some/file.root"); 0457 /// // metrics must be turned on beforehand 0458 /// reader->EnableMetrics(); 0459 /// 0460 /// for (auto i : ntuple->GetEntryRange()) { 0461 /// reader->LoadEntry(i); 0462 /// } 0463 /// reader->PrintInfo(ROOT::ENTupleInfo::kMetrics); 0464 /// ~~~ 0465 void EnableMetrics() { fMetrics.Enable(); } 0466 const Experimental::Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; } 0467 }; // class RNTupleReader 0468 0469 } // namespace ROOT 0470 0471 #endif // ROOT_RNTupleReader
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|