|
||||
File indexing completed on 2025-01-18 10:10:46
0001 /// \file ROOT/RNTupleReader.hxx 0002 /// \ingroup NTuple ROOT7 0003 /// \author Jakob Blomer <jblomer@cern.ch> 0004 /// \date 2024-02-20 0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback 0006 /// is welcome! 0007 0008 /************************************************************************* 0009 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * 0010 * All rights reserved. * 0011 * * 0012 * For the licensing terms see $ROOTSYS/LICENSE. * 0013 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0014 *************************************************************************/ 0015 0016 #ifndef ROOT7_RNTupleReader 0017 #define ROOT7_RNTupleReader 0018 0019 #include <ROOT/RConfig.hxx> // for R__unlikely 0020 #include <ROOT/RError.hxx> 0021 #include <ROOT/RNTupleDescriptor.hxx> 0022 #include <ROOT/RNTupleMetrics.hxx> 0023 #include <ROOT/RNTupleModel.hxx> 0024 #include <ROOT/RNTupleReadOptions.hxx> 0025 #include <ROOT/RNTupleUtil.hxx> 0026 #include <ROOT/RNTupleView.hxx> 0027 #include <ROOT/RPageStorage.hxx> 0028 #include <ROOT/RSpan.hxx> 0029 0030 #include <iostream> 0031 #include <iterator> 0032 #include <memory> 0033 #include <string> 0034 #include <string_view> 0035 0036 namespace ROOT { 0037 namespace Experimental { 0038 0039 class REntry; 0040 class RNTuple; 0041 0042 /// Listing of the different options that can be printed by RNTupleReader::GetInfo() 0043 enum class ENTupleInfo { 0044 kSummary, // The ntuple name, description, number of entries 0045 kStorageDetails, // size on storage, page sizes, compression factor, etc. 0046 kMetrics, // internals performance counters, requires that EnableMetrics() was called 0047 }; 0048 0049 // clang-format off 0050 /** 0051 \class ROOT::Experimental::RNTupleReader 0052 \ingroup NTuple 0053 \brief An RNTuple that is used to read data from storage 0054 0055 An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage 0056 or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers 0057 only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries. 0058 Individual fields can be read as well by instantiating a tree view. 0059 0060 ~~~ {.cpp} 0061 #include <ROOT/RNTupleReader.hxx> 0062 using ROOT::Experimental::RNTupleReader; 0063 0064 #include <iostream> 0065 0066 auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root"); 0067 std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n"; 0068 ~~~ 0069 */ 0070 // clang-format on 0071 class RNTupleReader { 0072 private: 0073 /// Set as the page source's scheduler for parallel page decompression if IMT is on 0074 /// Needs to be destructed after the pages source is destructed (an thus be declared before) 0075 std::unique_ptr<Internal::RPageStorage::RTaskScheduler> fUnzipTasks; 0076 0077 std::unique_ptr<Internal::RPageSource> fSource; 0078 /// Needs to be destructed before fSource 0079 std::unique_ptr<RNTupleModel> fModel; 0080 /// We use a dedicated on-demand reader for Show() and Scan(). Printing data uses all the fields 0081 /// from the full model even if the analysis code uses only a subset of fields. The display reader 0082 /// is a clone of the original reader. 0083 std::unique_ptr<RNTupleReader> fDisplayReader; 0084 /// The ntuple descriptor in the page source is protected by a read-write lock. We don't expose that to the 0085 /// users of RNTupleReader::GetDescriptor(). Instead, if descriptor information is needed, we clone the 0086 /// descriptor. Using the descriptor's generation number, we know if the cached descriptor is stale. 0087 /// Retrieving descriptor data from an RNTupleReader is supposed to be for testing and information purposes, 0088 /// not on a hot code path. 0089 std::unique_ptr<RNTupleDescriptor> fCachedDescriptor; 0090 Detail::RNTupleMetrics fMetrics; 0091 0092 RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Internal::RPageSource> source); 0093 /// The model is generated from the ntuple metadata on storage. 0094 explicit RNTupleReader(std::unique_ptr<Internal::RPageSource> source); 0095 0096 void ConnectModel(RNTupleModel &model); 0097 RNTupleReader *GetDisplayReader(); 0098 void InitPageSource(); 0099 0100 DescriptorId_t RetrieveFieldId(std::string_view fieldName) const; 0101 0102 public: 0103 // Browse through the entries 0104 class RIterator { 0105 private: 0106 NTupleSize_t fIndex = kInvalidNTupleIndex; 0107 0108 public: 0109 using iterator = RIterator; 0110 using iterator_category = std::forward_iterator_tag; 0111 using value_type = NTupleSize_t; 0112 using difference_type = NTupleSize_t; 0113 using pointer = NTupleSize_t *; 0114 using reference = NTupleSize_t &; 0115 0116 RIterator() = default; 0117 explicit RIterator(NTupleSize_t index) : fIndex(index) {} 0118 ~RIterator() = default; 0119 0120 iterator operator++(int) /* postfix */ 0121 { 0122 auto r = *this; 0123 fIndex++; 0124 return r; 0125 } 0126 iterator &operator++() /* prefix */ 0127 { 0128 ++fIndex; 0129 return *this; 0130 } 0131 reference operator*() { return fIndex; } 0132 pointer operator->() { return &fIndex; } 0133 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; } 0134 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; } 0135 }; 0136 0137 /// Used to specify the underlying RNTuples in OpenFriends() 0138 struct ROpenSpec { 0139 std::string fNTupleName; 0140 std::string fStorage; 0141 RNTupleReadOptions fOptions; 0142 0143 ROpenSpec() = default; 0144 ROpenSpec(std::string_view n, std::string_view s) : fNTupleName(n), fStorage(s) {} 0145 }; 0146 0147 /// Open an RNTuple for reading. 0148 /// 0149 /// Throws an RException if there is no RNTuple with the given name. 0150 /// 0151 /// **Example: open an RNTuple and print the number of entries** 0152 /// ~~~ {.cpp} 0153 /// #include <ROOT/RNTupleReader.hxx> 0154 /// using ROOT::Experimental::RNTupleReader; 0155 /// 0156 /// #include <iostream> 0157 /// 0158 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root"); 0159 /// std::cout << "myNTuple has " << ntuple->GetNEntries() << " entries\n"; 0160 /// ~~~ 0161 static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage, 0162 const RNTupleReadOptions &options = RNTupleReadOptions()); 0163 static std::unique_ptr<RNTupleReader> 0164 Open(RNTuple *ntuple, const RNTupleReadOptions &options = RNTupleReadOptions()); 0165 /// The caller imposes a model, which must be compatible with the model found in the data on storage. 0166 static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model, std::string_view ntupleName, 0167 std::string_view storage, 0168 const RNTupleReadOptions &options = RNTupleReadOptions()); 0169 static std::unique_ptr<RNTupleReader> 0170 Open(std::unique_ptr<RNTupleModel> model, RNTuple *ntuple, const RNTupleReadOptions &options = RNTupleReadOptions()); 0171 /// Open RNTuples as one virtual, horizontally combined ntuple. The underlying RNTuples must 0172 /// have an identical number of entries. Fields in the combined RNTuple are named with the ntuple name 0173 /// as a prefix, e.g. myNTuple1.px and myNTuple2.pt (see tutorial ntpl006_friends) 0174 static std::unique_ptr<RNTupleReader> OpenFriends(std::span<ROpenSpec> ntuples); 0175 std::unique_ptr<RNTupleReader> Clone() 0176 { 0177 return std::unique_ptr<RNTupleReader>(new RNTupleReader(fSource->Clone())); 0178 } 0179 ~RNTupleReader(); 0180 0181 NTupleSize_t GetNEntries() const { return fSource->GetNEntries(); } 0182 const RNTupleModel &GetModel(); 0183 0184 /// Returns a cached copy of the page source descriptor. The returned pointer remains valid until the next call 0185 /// to LoadEntry or to any of the views returned from the reader. 0186 const RNTupleDescriptor &GetDescriptor(); 0187 0188 /// Prints a detailed summary of the ntuple, including a list of fields. 0189 /// 0190 /// **Example: print summary information to stdout** 0191 /// ~~~ {.cpp} 0192 /// #include <ROOT/RNTupleReader.hxx> 0193 /// using ROOT::Experimental::ENTupleInfo; 0194 /// using ROOT::Experimental::RNTupleReader; 0195 /// 0196 /// #include <iostream> 0197 /// 0198 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root"); 0199 /// ntuple->PrintInfo(); 0200 /// // or, equivalently: 0201 /// ntuple->PrintInfo(ENTupleInfo::kSummary, std::cout); 0202 /// ~~~ 0203 /// **Example: print detailed column storage data to stderr** 0204 /// ~~~ {.cpp} 0205 /// #include <ROOT/RNTupleReader.hxx> 0206 /// using ROOT::Experimental::ENTupleInfo; 0207 /// using ROOT::Experimental::RNTupleReader; 0208 /// 0209 /// #include <iostream> 0210 /// 0211 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root"); 0212 /// ntuple->PrintInfo(ENTupleInfo::kStorageDetails, std::cerr); 0213 /// ~~~ 0214 /// 0215 /// For use of ENTupleInfo::kMetrics, see #EnableMetrics. 0216 void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout); 0217 0218 /// Shows the values of the i-th entry/row, starting with 0 for the first entry. By default, 0219 /// prints the output in JSON format. 0220 /// Uses the visitor pattern to traverse through each field of the given entry. 0221 void Show(NTupleSize_t index, std::ostream &output = std::cout); 0222 0223 /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple. 0224 /// On I/O errors, raises an exception. 0225 void LoadEntry(NTupleSize_t index) 0226 { 0227 // TODO(jblomer): can be templated depending on the factory method / constructor 0228 if (R__unlikely(!fModel)) { 0229 fModel = fSource->GetSharedDescriptorGuard()->CreateModel(); 0230 ConnectModel(*fModel); 0231 } 0232 LoadEntry(index, fModel->GetDefaultEntry()); 0233 } 0234 /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model 0235 void LoadEntry(NTupleSize_t index, REntry &entry) { entry.Read(index); } 0236 0237 /// Returns an iterator over the entry indices of the RNTuple. 0238 /// 0239 /// **Example: iterate over all entries and print each entry in JSON format** 0240 /// ~~~ {.cpp} 0241 /// #include <ROOT/RNTupleReader.hxx> 0242 /// using ROOT::Experimental::ENTupleShowFormat; 0243 /// using ROOT::Experimental::RNTupleReader; 0244 /// 0245 /// #include <iostream> 0246 /// 0247 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root"); 0248 /// for (auto i : ntuple->GetEntryRange()) { 0249 /// ntuple->Show(i); 0250 /// } 0251 /// ~~~ 0252 RNTupleGlobalRange GetEntryRange() { return RNTupleGlobalRange(0, GetNEntries()); } 0253 0254 /// Provides access to an individual field that can contain either a scalar value or a collection, e.g. 0255 /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index 0256 /// field of a collection itself, like GetView<NTupleSize_t>("particle"). 0257 /// 0258 /// Raises an exception if there is no field with the given name. 0259 /// 0260 /// **Example: iterate over a field named "pt" of type `float`** 0261 /// ~~~ {.cpp} 0262 /// #include <ROOT/RNTupleReader.hxx> 0263 /// using ROOT::Experimental::RNTupleReader; 0264 /// 0265 /// #include <iostream> 0266 /// 0267 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root"); 0268 /// auto pt = ntuple->GetView<float>("pt"); 0269 /// 0270 /// for (auto i : ntuple->GetEntryRange()) { 0271 /// std::cout << i << ": " << pt(i) << "\n"; 0272 /// } 0273 /// ~~~ 0274 template <typename T> 0275 RNTupleView<T, false> GetView(std::string_view fieldName) 0276 { 0277 return GetView<T>(RetrieveFieldId(fieldName)); 0278 } 0279 0280 template <typename T> 0281 RNTupleView<T, true> GetView(std::string_view fieldName, std::shared_ptr<T> objPtr) 0282 { 0283 return GetView<T>(RetrieveFieldId(fieldName), objPtr); 0284 } 0285 0286 template <typename T> 0287 RNTupleView<T, false> GetView(DescriptorId_t fieldId) 0288 { 0289 return RNTupleView<T, false>(fieldId, fSource.get()); 0290 } 0291 0292 template <typename T> 0293 RNTupleView<T, true> GetView(DescriptorId_t fieldId, std::shared_ptr<T> objPtr) 0294 { 0295 return RNTupleView<T, true>(fieldId, fSource.get(), objPtr); 0296 } 0297 0298 /// Raises an exception if: 0299 /// * there is no field with the given name or, 0300 /// * the field is not a collection 0301 RNTupleCollectionView GetCollectionView(std::string_view fieldName) 0302 { 0303 auto fieldId = fSource->GetSharedDescriptorGuard()->FindFieldId(fieldName); 0304 if (fieldId == kInvalidDescriptorId) { 0305 throw RException(R__FAIL("no field named '" + std::string(fieldName) + "' in RNTuple '" + 0306 fSource->GetSharedDescriptorGuard()->GetName() + "'")); 0307 } 0308 return GetCollectionView(fieldId); 0309 } 0310 0311 RNTupleCollectionView GetCollectionView(DescriptorId_t fieldId) 0312 { 0313 return RNTupleCollectionView(fieldId, fSource.get()); 0314 } 0315 0316 RIterator begin() { return RIterator(0); } 0317 RIterator end() { return RIterator(GetNEntries()); } 0318 0319 /// Enable performance measurements (decompression time, bytes read from storage, etc.) 0320 /// 0321 /// **Example: inspect the reader metrics after loading every entry** 0322 /// ~~~ {.cpp} 0323 /// #include <ROOT/RNTupleReader.hxx> 0324 /// using ROOT::Experimental::ENTupleInfo; 0325 /// using ROOT::Experimental::RNTupleReader; 0326 /// 0327 /// #include <iostream> 0328 /// 0329 /// auto ntuple = RNTupleReader::Open("myNTuple", "some/file.root"); 0330 /// // metrics must be turned on beforehand 0331 /// ntuple->EnableMetrics(); 0332 /// 0333 /// for (auto i : ntuple->GetEntryRange()) { 0334 /// ntuple->LoadEntry(i); 0335 /// } 0336 /// ntuple->PrintInfo(ENTupleInfo::kMetrics); 0337 /// ~~~ 0338 void EnableMetrics() { fMetrics.Enable(); } 0339 const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; } 0340 }; // class RNTupleReader 0341 0342 } // namespace Experimental 0343 } // namespace ROOT 0344 0345 #endif // ROOT7_RNTupleReader
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |