|
|
|||
File indexing completed on 2025-12-16 10:30:01
0001 /// \file ROOT/RNTupleProcessor.hxx 0002 /// \ingroup NTuple 0003 /// \author Florine de Geus <florine.de.geus@cern.ch> 0004 /// \date 2024-03-26 0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback 0006 /// is welcome! 0007 0008 /************************************************************************* 0009 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * 0010 * All rights reserved. * 0011 * * 0012 * For the licensing terms see $ROOTSYS/LICENSE. * 0013 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0014 *************************************************************************/ 0015 0016 #ifndef ROOT_RNTupleProcessor 0017 #define ROOT_RNTupleProcessor 0018 0019 #include <ROOT/REntry.hxx> 0020 #include <ROOT/RError.hxx> 0021 #include <ROOT/RNTupleDescriptor.hxx> 0022 #include <ROOT/RNTupleJoinTable.hxx> 0023 #include <ROOT/RNTupleModel.hxx> 0024 #include <ROOT/RNTupleTypes.hxx> 0025 #include <ROOT/RNTupleProcessorEntry.hxx> 0026 #include <ROOT/RPageStorage.hxx> 0027 0028 #include <memory> 0029 #include <string> 0030 #include <string_view> 0031 #include <vector> 0032 0033 namespace ROOT { 0034 namespace Experimental { 0035 0036 namespace Internal { 0037 struct RNTupleProcessorEntryLoader; 0038 } // namespace Internal 0039 0040 // clang-format off 0041 /** 0042 \class ROOT::Experimental::RNTupleOpenSpec 0043 \ingroup NTuple 0044 \brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor. 0045 0046 An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the 0047 TDirectory (or any of its subclasses) that contains the RNTuple. 0048 0049 Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python. 0050 */ 0051 // clang-format on 0052 class RNTupleOpenSpec { 0053 friend class RNTupleProcessor; 0054 friend class RNTupleSingleProcessor; 0055 friend class RNTupleJoinProcessor; 0056 0057 private: 0058 std::string fNTupleName; 0059 std::variant<std::string, TDirectory *> fStorage; 0060 0061 public: 0062 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {} 0063 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {} 0064 0065 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const; 0066 }; 0067 0068 // clang-format off 0069 /** 0070 \class ROOT::Experimental::RNTupleProcessorOptionalPtr<T> 0071 \ingroup NTuple 0072 \brief The RNTupleProcessorOptionalPtr provides access to values from fields present in an RNTupleProcessor, with support 0073 and checks for missing values. 0074 */ 0075 // clang-format on 0076 template <typename T> 0077 class RNTupleProcessorOptionalPtr { 0078 friend class RNTupleProcessor; 0079 0080 private: 0081 Internal::RNTupleProcessorEntry *fProcessorEntry; 0082 Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex; 0083 0084 RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, 0085 Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx) 0086 : fProcessorEntry(processorEntry), fFieldIndex(fieldIdx) 0087 { 0088 } 0089 0090 public: 0091 ///////////////////////////////////////////////////////////////////////////// 0092 /// \brief Check if the pointer currently holds a valid value. 0093 bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); } 0094 0095 ///////////////////////////////////////////////////////////////////////////// 0096 /// \brief Get a shared pointer to the field value managed by the processor's entry. 0097 /// 0098 /// \return A `std::shared_ptr<T>` if the field is valid in the current entry, or a `nullptr` otherwise. 0099 std::shared_ptr<T> GetPtr() const 0100 { 0101 if (fProcessorEntry->IsValidField(fFieldIndex)) 0102 return fProcessorEntry->GetPtr<T>(fFieldIndex); 0103 0104 return nullptr; 0105 } 0106 0107 ///////////////////////////////////////////////////////////////////////////// 0108 /// \brief Get a non-owning pointer to the field value managed by the processor's entry. 0109 /// 0110 /// \return A `T*` if the field is valid in the current entry, or a `nullptr` otherwise. 0111 T *GetRawPtr() const { return GetPtr().get(); } 0112 0113 ///////////////////////////////////////////////////////////////////////////// 0114 /// \brief Bind the value to `valuePtr`. 0115 /// 0116 /// \param[in] valuePtr Pointer to bind the value to. 0117 /// 0118 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for 0119 /// example when a field is not present in one of the chained processors or when during a join operation, no matching 0120 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading 0121 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing 0122 /// its data through this interface, to ensure that only valid data can be read. 0123 void BindRawPtr(T *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); } 0124 0125 ///////////////////////////////////////////////////////////////////////////// 0126 /// \brief Get a reference to the field value managed by the processor's entry. 0127 /// 0128 /// Throws an exception if the field is invalid in the processor's current entry. 0129 const T &operator*() const 0130 { 0131 if (auto ptr = GetPtr()) 0132 return *ptr; 0133 else 0134 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) + 0135 "\" because it has no value for the current entry")); 0136 } 0137 0138 ///////////////////////////////////////////////////////////////////////////// 0139 /// \brief Access the field value managed by the processor's entry. 0140 /// 0141 /// Throws an exception if the field is invalid in the processor's current entry. 0142 const T *operator->() const 0143 { 0144 if (auto ptr = GetPtr()) 0145 return ptr.get(); 0146 else 0147 throw RException(R__FAIL("cannot read \"" + fProcessorEntry->FindFieldName(fFieldIndex) + 0148 "\" because it has no value for the current entry")); 0149 } 0150 }; 0151 0152 // clang-format off 0153 /** 0154 \class ROOT::Experimental::RNTupleProcessorOptionalPtr<void> 0155 \ingroup NTuple 0156 \brief Specialization of RNTupleProcessorOptionalPtr<T> for `void`-type pointers. 0157 */ 0158 // clang-format on 0159 template <> 0160 class RNTupleProcessorOptionalPtr<void> { 0161 friend class RNTupleProcessor; 0162 0163 private: 0164 Internal::RNTupleProcessorEntry *fProcessorEntry; 0165 Internal::RNTupleProcessorEntry::FieldIndex_t fFieldIndex; 0166 0167 RNTupleProcessorOptionalPtr(Internal::RNTupleProcessorEntry *processorEntry, 0168 Internal::RNTupleProcessorEntry::FieldIndex_t fieldIdx) 0169 : fProcessorEntry(processorEntry), fFieldIndex(fieldIdx) 0170 { 0171 } 0172 0173 public: 0174 ///////////////////////////////////////////////////////////////////////////// 0175 /// \brief Check if the pointer currently holds a valid value. 0176 bool HasValue() const { return fProcessorEntry->IsValidField(fFieldIndex); } 0177 0178 ///////////////////////////////////////////////////////////////////////////// 0179 /// \brief Get the pointer to the field value managed by the processor's entry. 0180 /// 0181 /// \return A `std::shared_ptr<void>` if the field is valid in the current entry, or a `nullptr` otherwise. 0182 std::shared_ptr<void> GetPtr() const 0183 { 0184 if (fProcessorEntry->IsValidField(fFieldIndex)) 0185 return fProcessorEntry->GetPtr<void>(fFieldIndex); 0186 0187 return nullptr; 0188 } 0189 0190 ///////////////////////////////////////////////////////////////////////////// 0191 /// \brief Get a non-owning pointer to the field value managed by the processor's entry. 0192 /// 0193 /// \return A `void*` if the field is valid in the current entry, or a `nullptr` otherwise. 0194 void *GetRawPtr() const { return GetPtr().get(); } 0195 0196 ///////////////////////////////////////////////////////////////////////////// 0197 /// \brief Bind the value to `valuePtr`. 0198 /// 0199 /// \param[in] valuePtr Pointer to bind the value to. 0200 /// 0201 /// \warning Use this function with care! Values may not always be valid for every entry during processing, for 0202 /// example when a field is not present in one of the chained processors or when during a join operation, no matching 0203 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading 0204 /// invalid data. After binding a pointer to an `RNTupleProcessorOptionalPtr`, we *strongly* recommend only accessing 0205 /// its data through this interface, to ensure that only valid data can be read. 0206 void BindRawPtr(void *valuePtr) { fProcessorEntry->BindRawPtr(fFieldIndex, valuePtr); } 0207 }; 0208 0209 // clang-format off 0210 /** 0211 \class ROOT::Experimental::RNTupleProcessor 0212 \ingroup NTuple 0213 \brief Interface for iterating over entries of vertically ("chained") and/or horizontally ("joined") combined RNTuples. 0214 0215 Example usage (see ntpl012_processor_chain.C and ntpl015_processor_join.C for bigger examples): 0216 0217 ~~~{.cpp} 0218 #include <ROOT/RNTupleProcessor.hxx> 0219 using ROOT::Experimental::RNTupleProcessor; 0220 using ROOT::Experimental::RNTupleOpenSpec; 0221 0222 std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}}; 0223 auto processor = RNTupleProcessor::CreateChain(ntuples); 0224 0225 auto pt = processor->RequestField<float>("pt"); 0226 0227 for (const auto idx : *processor) { 0228 std::cout << "event = " << idx << ", pt = " << *pt << std::endl; 0229 } 0230 ~~~ 0231 0232 An RNTupleProcessor is created either: 0233 1. By providing one or more RNTupleOpenSpecs, each of which contains the name and storage location of a single RNTuple; 0234 2. By providing a previously created RNTupleProcessor. 0235 0236 The RNTupleProcessor provides an iterator which gives access to the index of the current *global* entry of the 0237 processor, i.e. taking into account previously processed RNTuples. 0238 0239 Because the schemas of each RNTuple that are part of an RNTupleProcessor may not necessarily be identical, or because 0240 it can occur that entries are only partially complete in a join-based processor, field values may be marked as 0241 "invalid", at which point their data should not be read. This is handled by the RNTupleProcessorOptionalPtr 0242 that is returned by RequestField(). 0243 */ 0244 // clang-format on 0245 class RNTupleProcessor { 0246 friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader; // for unit tests 0247 friend class RNTupleSingleProcessor; 0248 friend class RNTupleChainProcessor; 0249 friend class RNTupleJoinProcessor; 0250 0251 protected: 0252 std::string fProcessorName; 0253 std::unique_ptr<ROOT::RNTupleModel> fProtoModel = nullptr; 0254 std::shared_ptr<Internal::RNTupleProcessorEntry> fEntry = nullptr; 0255 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fFieldIdxs; 0256 0257 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public 0258 /// interface. 0259 ROOT::NTupleSize_t fNEntries = kInvalidNTupleIndex; 0260 0261 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far 0262 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number 0263 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor 0264 0265 ///////////////////////////////////////////////////////////////////////////// 0266 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting 0267 /// an existing one. 0268 virtual void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry) = 0; 0269 0270 ///////////////////////////////////////////////////////////////////////////// 0271 /// \brief Check if the processor already has been initialized. 0272 bool IsInitialized() const { return fProtoModel && fEntry; } 0273 0274 ///////////////////////////////////////////////////////////////////////////// 0275 /// \brief Connect fields to the page source of the processor's underlying RNTuple(s). 0276 /// 0277 /// \param[in] fieldIdxs Indices of the fields to connect. 0278 /// \param[in] provenance Provenance of the processor. 0279 /// \param[in] updateFields Whether the fields in the entry need to be updated, because the current underlying 0280 /// RNTuple source changed. 0281 virtual void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs, 0282 const Internal::RNTupleProcessorProvenance &provenance, bool updateFields) = 0; 0283 0284 ///////////////////////////////////////////////////////////////////////////// 0285 /// \brief Load the entry identified by the provided entry number. 0286 /// 0287 /// \param[in] entryNumber Entry number to load 0288 /// 0289 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise. 0290 virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0; 0291 0292 ///////////////////////////////////////////////////////////////////////////// 0293 /// \brief Get the proto model used by the processor. 0294 /// 0295 /// A processor's proto model contains all fields that can be accessed and is inferred from the descriptors of the 0296 /// underlying RNTuples. It is used in RequestField() to check that the requested field is actually valid. 0297 const ROOT::RNTupleModel &GetProtoModel() const 0298 { 0299 assert(fProtoModel); 0300 return *fProtoModel; 0301 } 0302 0303 ///////////////////////////////////////////////////////////////////////////// 0304 /// \brief Get the total number of entries in this processor 0305 virtual ROOT::NTupleSize_t GetNEntries() = 0; 0306 0307 ///////////////////////////////////////////////////////////////////////////// 0308 /// \brief Check if a field exists on-disk and can be read by the processor. 0309 /// 0310 /// \param[in] fieldName Name of the field to check. 0311 virtual bool CanReadFieldFromDisk(std::string_view fieldName) = 0; 0312 0313 ///////////////////////////////////////////////////////////////////////////// 0314 /// \brief Add a field to the entry. 0315 /// 0316 /// 0317 /// \param[in] fieldName Name of the field to add. 0318 /// \param[in] valuePtr Pointer to bind to the field's value in the entry. If this is a `nullptr`, a pointer will be 0319 /// created. 0320 /// \param[in] provenance Provenance of the processor. 0321 /// 0322 /// \return The index of the newly added field in the entry. 0323 /// 0324 /// In case the field was already present in the entry, the index of the existing field is returned. 0325 virtual ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> 0326 AddFieldToEntry(std::string_view fieldName, void *valuePtr, 0327 const Internal::RNTupleProcessorProvenance &provenance) = 0; 0328 0329 ///////////////////////////////////////////////////////////////////////////// 0330 /// \brief Add the entry mappings for this processor to the provided join table. 0331 /// 0332 /// \param[in] joinTable the join table to map the entries to. 0333 /// \param[in] entryOffset In case the entry mapping is added from a chain, the offset of the entry indexes to use 0334 /// with respect to the processor's position in the chain. 0335 virtual void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) = 0; 0336 0337 ///////////////////////////////////////////////////////////////////////////// 0338 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). 0339 /// 0340 /// \param[in,out] output Output stream to print to. 0341 virtual void PrintStructureImpl(std::ostream &output) const = 0; 0342 0343 ///////////////////////////////////////////////////////////////////////////// 0344 /// \brief Create a new base RNTupleProcessor. 0345 /// 0346 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for 0347 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary 0348 /// RNTuple for RNTupleJoinProcessor. 0349 RNTupleProcessor(std::string_view processorName) : fProcessorName(processorName) {} 0350 0351 public: 0352 RNTupleProcessor(const RNTupleProcessor &) = delete; 0353 RNTupleProcessor(RNTupleProcessor &&) = delete; 0354 RNTupleProcessor &operator=(const RNTupleProcessor &) = delete; 0355 RNTupleProcessor &operator=(RNTupleProcessor &&) = delete; 0356 virtual ~RNTupleProcessor() = default; 0357 0358 ///////////////////////////////////////////////////////////////////////////// 0359 /// \brief Get the total number of entries processed so far. 0360 ROOT::NTupleSize_t GetNEntriesProcessed() const { return fNEntriesProcessed; } 0361 0362 ///////////////////////////////////////////////////////////////////////////// 0363 /// \brief Get the entry number that is currently being processed. 0364 ROOT::NTupleSize_t GetCurrentEntryNumber() const { return fCurrentEntryNumber; } 0365 0366 ///////////////////////////////////////////////////////////////////////////// 0367 /// \brief Get the number of the inner processor currently being read. 0368 /// 0369 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned. 0370 std::size_t GetCurrentProcessorNumber() const { return fCurrentProcessorNumber; } 0371 0372 ///////////////////////////////////////////////////////////////////////////// 0373 /// \brief Get the name of the processor. 0374 /// 0375 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying 0376 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the 0377 /// primary processor for RNTupleJoinProcessor. 0378 const std::string &GetProcessorName() const { return fProcessorName; } 0379 0380 ///////////////////////////////////////////////////////////////////////////// 0381 /// \brief Request access to a field for reading during processing. 0382 /// 0383 /// \tparam T Type of the requested field. 0384 /// 0385 /// \param[in] fieldName Name of the requested field. 0386 /// 0387 /// \return An RNTupleProcessorOptionalPtr, which provides access to the field's value. 0388 /// 0389 /// \warning Provide a `valuePtr` with care! Values may not always be valid for every entry during processing, for 0390 /// example when a field is not present in one of the chained processors or when during a join operation, no matching 0391 /// entry in the auxiliary processor can be found. Reading `valuePtr` as-is therefore comes with the risk of reading 0392 /// invalid data. After passing a pointer to `RequestField`, we *strongly* recommend only accessing its data through 0393 /// the interface of the returned `RNTupleProcessorOptionalPtr`, to ensure that only valid data can be read. 0394 template <typename T> 0395 RNTupleProcessorOptionalPtr<T> RequestField(std::string_view fieldName, void *valuePtr = nullptr) 0396 { 0397 Initialize(fEntry); 0398 // TODO handle alternative (compatible field types) 0399 auto fieldIdx = AddFieldToEntry(fieldName, valuePtr, Internal::RNTupleProcessorProvenance()).Unwrap(); 0400 return RNTupleProcessorOptionalPtr<T>(fEntry.get(), fieldIdx); 0401 } 0402 0403 ///////////////////////////////////////////////////////////////////////////// 0404 /// \brief Print a graphical representation of the processor composition. 0405 /// 0406 /// \param[in,out] output Stream to print to (default is stdout). 0407 /// 0408 /// ### Example: 0409 /// The structure of a processor representing a join between a single primary RNTuple and a chain of two auxiliary 0410 /// RNTuples will be printed as follows: 0411 /// ~~~ 0412 /// +-----------------------------+ +-----------------------------+ 0413 /// | ntuple | | ntuple_aux | 0414 /// | ntuple.root | | ntuple_aux1.root | 0415 /// +-----------------------------+ +-----------------------------+ 0416 /// +-----------------------------+ 0417 /// | ntuple_aux | 0418 /// | ntuple_aux2.root | 0419 /// +-----------------------------+ 0420 /// ~~~ 0421 void PrintStructure(std::ostream &output = std::cout) { PrintStructureImpl(output); } 0422 0423 // clang-format off 0424 /** 0425 \class ROOT::Experimental::RNTupleProcessor::RIterator 0426 \ingroup NTuple 0427 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof. 0428 */ 0429 // clang-format on 0430 class RIterator { 0431 private: 0432 RNTupleProcessor &fProcessor; 0433 ROOT::NTupleSize_t fCurrentEntryNumber; 0434 0435 public: 0436 using iterator_category = std::input_iterator_tag; 0437 using iterator = RIterator; 0438 using value_type = ROOT::NTupleSize_t; 0439 using difference_type = std::ptrdiff_t; 0440 using pointer = ROOT::NTupleSize_t *; 0441 using reference = ROOT::NTupleSize_t &; 0442 0443 RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber) 0444 : fProcessor(processor), fCurrentEntryNumber(entryNumber) 0445 { 0446 if (!fProcessor.fEntry) { 0447 fCurrentEntryNumber = ROOT::kInvalidNTupleIndex; 0448 } 0449 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already 0450 // know there is nothing to load. 0451 if (fCurrentEntryNumber != ROOT::kInvalidNTupleIndex) { 0452 fProcessor.Connect(fProcessor.fEntry->GetFieldIndices(), Internal::RNTupleProcessorProvenance(), 0453 /*updateFields=*/false); 0454 fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber); 0455 } 0456 } 0457 0458 iterator operator++() 0459 { 0460 fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber + 1); 0461 return *this; 0462 } 0463 0464 iterator operator++(int) 0465 { 0466 auto obj = *this; 0467 ++(*this); 0468 return obj; 0469 } 0470 0471 reference operator*() { return fCurrentEntryNumber; } 0472 0473 friend bool operator!=(const iterator &lh, const iterator &rh) 0474 { 0475 return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber; 0476 } 0477 friend bool operator==(const iterator &lh, const iterator &rh) 0478 { 0479 return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber; 0480 } 0481 }; 0482 0483 RIterator begin() { return RIterator(*this, 0); } 0484 RIterator end() { return RIterator(*this, ROOT::kInvalidNTupleIndex); } 0485 0486 ///////////////////////////////////////////////////////////////////////////// 0487 /// \brief Create an RNTupleProcessor for a single RNTuple. 0488 /// 0489 /// \param[in] ntuple The name and storage location of the RNTuple to process. 0490 /// \param[in] processorName The name to give to the processor. If empty, the name of the input RNTuple is used. 0491 /// 0492 /// \return A pointer to the newly created RNTupleProcessor. 0493 static std::unique_ptr<RNTupleProcessor> Create(RNTupleOpenSpec ntuple, std::string_view processorName = ""); 0494 0495 ///////////////////////////////////////////////////////////////////////////// 0496 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples. 0497 /// 0498 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process. 0499 /// \param[in] processorName The name to give to the processor. If empty, the name of the first RNTuple is used. 0500 /// 0501 /// \return A pointer to the newly created RNTupleProcessor. 0502 static std::unique_ptr<RNTupleProcessor> 0503 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::string_view processorName = ""); 0504 0505 ///////////////////////////////////////////////////////////////////////////// 0506 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors. 0507 /// 0508 /// \param[in] innerProcessors A list with the processors to chain. 0509 /// \param[in] processorName The name to give to the processor. If empty, the name of the first inner processor is 0510 /// used. 0511 /// 0512 /// \return A pointer to the newly created RNTupleProcessor. 0513 static std::unique_ptr<RNTupleProcessor> 0514 CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, std::string_view processorName = ""); 0515 0516 ///////////////////////////////////////////////////////////////////////////// 0517 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples. 0518 /// 0519 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential 0520 /// order. 0521 /// \param[in] auxNTuple The name and location of the RNTuple to join the primary RNTuple with. The order in which 0522 /// its entries are processed is determined by the primary RNTuple and doesn't necessarily have to be sequential. 0523 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. 0524 /// The join is made based on the combined join field values, and therefore each field has to be present in each 0525 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned. 0526 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary RNTuple is used. 0527 /// 0528 /// \return A pointer to the newly created RNTupleProcessor. 0529 static std::unique_ptr<RNTupleProcessor> CreateJoin(RNTupleOpenSpec primaryNTuple, RNTupleOpenSpec auxNTuple, 0530 const std::vector<std::string> &joinFields, 0531 std::string_view processorName = ""); 0532 0533 ///////////////////////////////////////////////////////////////////////////// 0534 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples. 0535 /// 0536 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order. 0537 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are 0538 /// processed is determined by the primary processor and doesn't necessarily have to be sequential. 0539 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned. 0540 /// The join is made based on the combined join field values, and therefore each field has to be present in each 0541 /// specified processors. If an empty list is provided, it is assumed that the specified processors are fully 0542 /// aligned. 0543 /// \param[in] processorName The name to give to the processor. If empty, the name of the primary processor is used. 0544 /// 0545 /// \return A pointer to the newly created RNTupleProcessor. 0546 static std::unique_ptr<RNTupleProcessor> 0547 CreateJoin(std::unique_ptr<RNTupleProcessor> primaryProcessor, std::unique_ptr<RNTupleProcessor> auxProcessor, 0548 const std::vector<std::string> &joinFields, std::string_view processorName = ""); 0549 }; 0550 0551 // clang-format off 0552 /** 0553 \class ROOT::Experimental::RNTupleSingleProcessor 0554 \ingroup NTuple 0555 \brief Processor specialization for processing a single RNTuple. 0556 */ 0557 // clang-format on 0558 class RNTupleSingleProcessor : public RNTupleProcessor { 0559 friend class RNTupleProcessor; 0560 0561 private: 0562 RNTupleOpenSpec fNTupleSpec; 0563 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource; 0564 0565 ///////////////////////////////////////////////////////////////////////////// 0566 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting 0567 /// an existing one. 0568 /// 0569 /// At this point, the page source for the underlying RNTuple of the processor will be created and opened. 0570 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final; 0571 0572 ///////////////////////////////////////////////////////////////////////////// 0573 /// \brief Connect the provided fields indices in the entry to their on-disk fields. 0574 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs, 0575 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(), 0576 bool updateFields = false) final; 0577 0578 ///////////////////////////////////////////////////////////////////////////// 0579 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this 0580 /// processor). 0581 /// 0582 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry 0583 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; 0584 0585 ///////////////////////////////////////////////////////////////////////////// 0586 /// \brief Get the total number of entries in this processor. 0587 ROOT::NTupleSize_t GetNEntries() final 0588 { 0589 Initialize(); 0590 if (fNEntries == ROOT::kInvalidNTupleIndex) 0591 Connect(fFieldIdxs); 0592 return fNEntries; 0593 } 0594 0595 ///////////////////////////////////////////////////////////////////////////// 0596 /// \brief Check if a field exists on-disk and can be read by the processor. 0597 /// 0598 /// \sa RNTupleProcessor::CanReadFieldFromDisk() 0599 bool CanReadFieldFromDisk(std::string_view fieldName) final; 0600 0601 ///////////////////////////////////////////////////////////////////////////// 0602 /// \brief Add a field to the entry. 0603 /// 0604 /// \sa RNTupleProcessor::AddFieldToEntry() 0605 ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry( 0606 std::string_view fieldName, void *valuePtr = nullptr, 0607 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; 0608 0609 ///////////////////////////////////////////////////////////////////////////// 0610 /// \brief Add the entry mappings for this processor to the provided join table. 0611 /// 0612 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable 0613 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; 0614 0615 ///////////////////////////////////////////////////////////////////////////// 0616 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). 0617 /// 0618 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl 0619 void PrintStructureImpl(std::ostream &output) const final; 0620 0621 ///////////////////////////////////////////////////////////////////////////// 0622 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple. 0623 /// 0624 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process. 0625 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is 0626 /// the name of the underlying RNTuple. 0627 RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName); 0628 0629 public: 0630 RNTupleSingleProcessor(const RNTupleSingleProcessor &) = delete; 0631 RNTupleSingleProcessor(RNTupleSingleProcessor &&) = delete; 0632 RNTupleSingleProcessor &operator=(const RNTupleSingleProcessor &) = delete; 0633 RNTupleSingleProcessor &operator=(RNTupleSingleProcessor &&) = delete; 0634 ~RNTupleSingleProcessor() override 0635 { 0636 // The proto model needs to be deleted before fPageSource. 0637 fProtoModel.release(); 0638 }; 0639 }; 0640 0641 // clang-format off 0642 /** 0643 \class ROOT::Experimental::RNTupleChainProcessor 0644 \ingroup NTuple 0645 \brief Processor specialization for vertically combined (*chained*) RNTupleProcessors. 0646 */ 0647 // clang-format on 0648 class RNTupleChainProcessor : public RNTupleProcessor { 0649 friend class RNTupleProcessor; 0650 0651 private: 0652 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors; 0653 std::vector<ROOT::NTupleSize_t> fInnerNEntries; 0654 0655 Internal::RNTupleProcessorProvenance fProvenance; 0656 0657 ///////////////////////////////////////////////////////////////////////////// 0658 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting 0659 /// an existing one. 0660 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final; 0661 0662 ///////////////////////////////////////////////////////////////////////////// 0663 /// \brief Connect the provided fields indices in the entry to their on-disk fields. 0664 /// 0665 /// \sa RNTupleProcessor::Connect() 0666 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs, 0667 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(), 0668 bool updateFields = false) final; 0669 0670 ///////////////////////////////////////////////////////////////////////////// 0671 /// \brief Update the entry to reflect any missing fields in the current inner processor. 0672 void ConnectInnerProcessor(std::size_t processorNumber); 0673 0674 ///////////////////////////////////////////////////////////////////////////// 0675 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this 0676 /// processor). 0677 /// 0678 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry 0679 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; 0680 0681 ///////////////////////////////////////////////////////////////////////////// 0682 /// \brief Get the total number of entries in this processor. 0683 /// 0684 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly! 0685 ROOT::NTupleSize_t GetNEntries() final; 0686 0687 ///////////////////////////////////////////////////////////////////////////// 0688 /// \brief Check if a field exists on-disk and can be read by the processor. 0689 /// 0690 /// \sa RNTupleProcessor::CanReadFieldFromDisk() 0691 bool CanReadFieldFromDisk(std::string_view fieldName) final 0692 { 0693 return fInnerProcessors[fCurrentProcessorNumber]->CanReadFieldFromDisk(fieldName); 0694 } 0695 0696 ///////////////////////////////////////////////////////////////////////////// 0697 /// \brief Add a field to the entry. 0698 /// 0699 /// \sa RNTupleProcessor::AddFieldToEntry() 0700 ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry( 0701 std::string_view fieldName, void *valuePtr = nullptr, 0702 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; 0703 0704 ///////////////////////////////////////////////////////////////////////////// 0705 /// \brief Add the entry mappings for this processor to the provided join table. 0706 /// 0707 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable 0708 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; 0709 0710 ///////////////////////////////////////////////////////////////////////////// 0711 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). 0712 /// 0713 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl 0714 void PrintStructureImpl(std::ostream &output) const final; 0715 0716 ///////////////////////////////////////////////////////////////////////////// 0717 /// \brief Construct a new RNTupleChainProcessor. 0718 /// 0719 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process. 0720 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this 0721 /// is the name of the first inner processor. 0722 /// 0723 /// RNTuples are processed in the order in which they are specified. 0724 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName); 0725 0726 public: 0727 RNTupleChainProcessor(const RNTupleChainProcessor &) = delete; 0728 RNTupleChainProcessor(RNTupleChainProcessor &&) = delete; 0729 RNTupleChainProcessor &operator=(const RNTupleChainProcessor &) = delete; 0730 RNTupleChainProcessor &operator=(RNTupleChainProcessor &&) = delete; 0731 ~RNTupleChainProcessor() override = default; 0732 }; 0733 0734 // clang-format off 0735 /** 0736 \class ROOT::Experimental::RNTupleJoinProcessor 0737 \ingroup NTuple 0738 \brief Processor specialization for horizontally combined (*joined*) RNTupleProcessors. 0739 */ 0740 // clang-format on 0741 class RNTupleJoinProcessor : public RNTupleProcessor { 0742 friend class RNTupleProcessor; 0743 0744 private: 0745 std::unique_ptr<RNTupleProcessor> fPrimaryProcessor; 0746 std::unique_ptr<RNTupleProcessor> fAuxiliaryProcessor; 0747 0748 std::vector<std::string> fJoinFieldNames; 0749 std::set<Internal::RNTupleProcessorEntry::FieldIndex_t> fJoinFieldIdxs; 0750 0751 std::unique_ptr<Internal::RNTupleJoinTable> fJoinTable; 0752 bool fJoinTableIsBuilt = false; 0753 0754 std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> fAuxiliaryFieldIdxs; 0755 0756 /// \brief Initialize the processor, by setting `fProtoModel` and creating an (initially empty) `fEntry`, or setting 0757 /// an existing one. 0758 void Initialize(std::shared_ptr<Internal::RNTupleProcessorEntry> entry = nullptr) final; 0759 0760 ///////////////////////////////////////////////////////////////////////////// 0761 /// \brief Connect the provided fields indices in the entry to their on-disk fields. 0762 /// 0763 /// \sa RNTupleProcessor::Connect() 0764 void Connect(const std::unordered_set<Internal::RNTupleProcessorEntry::FieldIndex_t> &fieldIdxs, 0765 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance(), 0766 bool updateFields = false) final; 0767 0768 ///////////////////////////////////////////////////////////////////////////// 0769 /// \brief Load the entry identified by the provided entry number of the primary processor. 0770 /// 0771 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry 0772 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; 0773 0774 ///////////////////////////////////////////////////////////////////////////// 0775 /// \brief Get the total number of entries in this processor. 0776 ROOT::NTupleSize_t GetNEntries() final; 0777 0778 ///////////////////////////////////////////////////////////////////////////// 0779 /// \brief Set the processor's proto model by combining the primary and auxiliary models. 0780 /// 0781 /// \param[in] primaryModel The proto model of the primary processor. 0782 /// \param[in] auxModel The proto model of the auxiliary processors. 0783 /// 0784 /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each 0785 /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model. 0786 /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`. 0787 void SetProtoModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel, std::unique_ptr<ROOT::RNTupleModel> auxModel); 0788 0789 ///////////////////////////////////////////////////////////////////////////// 0790 /// \brief Set the validity for all fields in the auxiliary processor at once. 0791 void SetAuxiliaryFieldValidity(bool validity); 0792 0793 ///////////////////////////////////////////////////////////////////////////// 0794 /// \brief Check if a field exists on-disk and can be read by the processor. 0795 /// 0796 /// \sa RNTupleProcessor::CanReadFieldFromDisk() 0797 bool CanReadFieldFromDisk(std::string_view fieldName) final 0798 { 0799 if (!fPrimaryProcessor->CanReadFieldFromDisk(fieldName)) { 0800 if (fieldName.find(fAuxiliaryProcessor->GetProcessorName()) == 0) 0801 fieldName = fieldName.substr(fAuxiliaryProcessor->GetProcessorName().size() + 1); 0802 return fAuxiliaryProcessor->CanReadFieldFromDisk(fieldName); 0803 } 0804 0805 return true; 0806 } 0807 0808 ///////////////////////////////////////////////////////////////////////////// 0809 /// \brief Add a field to the entry. 0810 /// 0811 /// \sa RNTupleProcessor::AddFieldToEntry() 0812 ROOT::RResult<Internal::RNTupleProcessorEntry::FieldIndex_t> AddFieldToEntry( 0813 std::string_view fieldName, void *valuePtr = nullptr, 0814 const Internal::RNTupleProcessorProvenance &provenance = Internal::RNTupleProcessorProvenance()) final; 0815 0816 ///////////////////////////////////////////////////////////////////////////// 0817 /// \brief Add the entry mappings for this processor to the provided join table. 0818 /// 0819 /// \sa ROOT::Experimental::RNTupleProcessor::AddEntriesToJoinTable 0820 void AddEntriesToJoinTable(Internal::RNTupleJoinTable &joinTable, ROOT::NTupleSize_t entryOffset = 0) final; 0821 0822 ///////////////////////////////////////////////////////////////////////////// 0823 /// \brief Processor-specific implementation for printing its structure, called by PrintStructure(). 0824 /// 0825 /// \sa ROOT::Experimental::RNTupleProcessor::PrintStructureImpl 0826 void PrintStructureImpl(std::ostream &output) const final; 0827 0828 ///////////////////////////////////////////////////////////////////////////// 0829 /// \brief Construct a new RNTupleJoinProcessor. 0830 /// \param[in] primaryProcessor The primary processor. Its entries are processed in sequential order. 0831 /// \param[in] auxProcessor The processor to join the primary processor with. The order in which its entries are 0832 /// processed is determined by the primary processor and doesn't necessarily have to be sequential. 0833 /// \param[in] joinFields The names of the fields on which to join, in case the specified processors are unaligned. 0834 /// The join is made based on the combined join field values, and therefore each field has to be present in each 0835 /// specified processor. If an empty list is provided, it is assumed that the processors are fully aligned. 0836 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this 0837 /// is the name of the primary processor. 0838 RNTupleJoinProcessor(std::unique_ptr<RNTupleProcessor> primaryProcessor, 0839 std::unique_ptr<RNTupleProcessor> auxProcessor, const std::vector<std::string> &joinFields, 0840 std::string_view processorName); 0841 0842 public: 0843 RNTupleJoinProcessor(const RNTupleJoinProcessor &) = delete; 0844 RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete; 0845 RNTupleJoinProcessor(RNTupleJoinProcessor &&) = delete; 0846 RNTupleJoinProcessor operator=(RNTupleJoinProcessor &&) = delete; 0847 ~RNTupleJoinProcessor() override = default; 0848 }; 0849 0850 } // namespace Experimental 0851 } // namespace ROOT 0852 0853 #endif // ROOT_RNTupleProcessor
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|