![]() |
|
|||
Warning, file /include/root/ROOT/RNTupleProcessor.hxx was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 /// \file ROOT/RNTupleProcessor.hxx 0002 /// \ingroup NTuple 0003 /// \author Florine de Geus <florine.de.geus@cern.ch> 0004 /// \date 2024-03-26 0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback 0006 /// is welcome! 0007 0008 /************************************************************************* 0009 * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers. * 0010 * All rights reserved. * 0011 * * 0012 * For the licensing terms see $ROOTSYS/LICENSE. * 0013 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0014 *************************************************************************/ 0015 0016 #ifndef ROOT_RNTupleProcessor 0017 #define ROOT_RNTupleProcessor 0018 0019 #include <ROOT/REntry.hxx> 0020 #include <ROOT/RError.hxx> 0021 #include <ROOT/RFieldToken.hxx> 0022 #include <ROOT/RNTupleDescriptor.hxx> 0023 #include <ROOT/RNTupleJoinTable.hxx> 0024 #include <ROOT/RNTupleModel.hxx> 0025 #include <ROOT/RNTupleUtil.hxx> 0026 #include <ROOT/RPageStorage.hxx> 0027 0028 #include <memory> 0029 #include <string> 0030 #include <string_view> 0031 #include <vector> 0032 0033 namespace ROOT { 0034 namespace Experimental { 0035 0036 namespace Internal { 0037 struct RNTupleProcessorEntryLoader; 0038 } // namespace Internal 0039 0040 // clang-format off 0041 /** 0042 \class ROOT::Experimental::RNTupleOpenSpec 0043 \ingroup NTuple 0044 \brief Specification of the name and location of an RNTuple, used for creating a new RNTupleProcessor. 0045 0046 An RNTupleOpenSpec can be created by providing either a string with a path to the ROOT file or a pointer to the 0047 TDirectory (or any of its subclasses) that contains the RNTuple. 0048 0049 Note that the RNTupleOpenSpec is *write-only*, to prevent usability issues with Python. 0050 */ 0051 // clang-format on 0052 class RNTupleOpenSpec { 0053 friend class RNTupleProcessor; 0054 friend class RNTupleSingleProcessor; 0055 friend class RNTupleJoinProcessor; 0056 0057 private: 0058 std::string fNTupleName; 0059 std::variant<std::string, TDirectory *> fStorage; 0060 0061 public: 0062 RNTupleOpenSpec(std::string_view n, TDirectory *s) : fNTupleName(n), fStorage(s) {} 0063 RNTupleOpenSpec(std::string_view n, const std::string &s) : fNTupleName(n), fStorage(s) {} 0064 0065 std::unique_ptr<ROOT::Internal::RPageSource> CreatePageSource() const; 0066 }; 0067 0068 // clang-format off 0069 /** 0070 \class ROOT::Experimental::RNTupleProcessor 0071 \ingroup NTuple 0072 \brief Interface for iterating over entries of RNTuples and vertically concatenated RNTuples (chains). 0073 0074 Example usage (see ntpl012_processor.C for a full example): 0075 0076 ~~~{.cpp} 0077 #include <ROOT/RNTupleProcessor.hxx> 0078 using ROOT::Experimental::RNTupleProcessor; 0079 using ROOT::Experimental::RNTupleOpenSpec; 0080 0081 std::vector<RNTupleOpenSpec> ntuples = {{"ntuple1", "ntuple1.root"}, {"ntuple2", "ntuple2.root"}}; 0082 auto processor = RNTupleProcessor::CreateChain(ntuples); 0083 0084 for (const auto &entry : processor) { 0085 std::cout << "pt = " << *entry.GetPtr<float>("pt") << std::endl; 0086 } 0087 ~~~ 0088 0089 An RNTupleProcessor is created by providing one or more RNTupleOpenSpecs, each of which contains the name and storage 0090 location of a single RNTuple. The RNTuples are processed in the order in which they were provided. 0091 0092 The RNTupleProcessor constructor also (optionally) accepts an RNTupleModel, which determines which fields should be 0093 read. If no model is provided, a default model based on the descriptor of the first specified RNTuple will be used. 0094 If a field that was present in the first RNTuple is not found in a subsequent one, an error will be thrown. 0095 0096 The RNTupleProcessor provides an iterator which gives access to the REntry containing the field data for the current 0097 entry. Additional bookkeeping information can be obtained through the RNTupleProcessor itself. 0098 */ 0099 // clang-format on 0100 class RNTupleProcessor { 0101 friend struct ROOT::Experimental::Internal::RNTupleProcessorEntryLoader; // for unit tests 0102 friend class RNTupleSingleProcessor; 0103 friend class RNTupleChainProcessor; 0104 friend class RNTupleJoinProcessor; 0105 0106 protected: 0107 // clang-format off 0108 /** 0109 \class ROOT::Experimental::RNTupleProcessor::RFieldContext 0110 \ingroup NTuple 0111 \brief Manager for a field as part of the RNTupleProcessor. 0112 0113 An RFieldContext contains two fields: a proto-field which is not connected to any page source but serves as the 0114 blueprint for this particular field, and a concrete field that is connected to the page source currently connected 0115 to the RNTupleProcessor for reading. When a new page source is connected, the current concrete field gets reset. A 0116 new concrete field that is connected to this new page source is subsequently created from the proto-field. 0117 */ 0118 // clang-format on 0119 class RFieldContext { 0120 friend class RNTupleProcessor; 0121 friend class RNTupleSingleProcessor; 0122 friend class RNTupleChainProcessor; 0123 friend class RNTupleJoinProcessor; 0124 0125 private: 0126 std::unique_ptr<ROOT::RFieldBase> fProtoField; 0127 std::unique_ptr<ROOT::RFieldBase> fConcreteField; 0128 ROOT::RFieldToken fToken; 0129 // Which RNTuple the field belongs to, in case the field belongs to an auxiliary RNTuple, according to the order 0130 // in which it was specified. For chained RNTuples, this value will always be 0. 0131 std::size_t fNTupleIdx; 0132 0133 public: 0134 RFieldContext(std::unique_ptr<ROOT::RFieldBase> protoField, ROOT::RFieldToken token, std::size_t ntupleIdx = 0) 0135 : fProtoField(std::move(protoField)), fToken(token), fNTupleIdx(ntupleIdx) 0136 { 0137 } 0138 0139 const ROOT::RFieldBase &GetProtoField() const { return *fProtoField; } 0140 /// Concrete pages need to be reset explicitly before the page source they belong to is destroyed. 0141 void ResetConcreteField() { fConcreteField.reset(); } 0142 void SetConcreteField() { fConcreteField = fProtoField->Clone(fProtoField->GetFieldName()); } 0143 bool IsAuxiliary() const { return fNTupleIdx > 0; } 0144 }; 0145 0146 std::string fProcessorName; 0147 std::vector<RNTupleOpenSpec> fNTuples; 0148 std::unique_ptr<ROOT::REntry> fEntry; 0149 std::unique_ptr<ROOT::Internal::RPageSource> fPageSource; 0150 /// Maps the (qualified) field name to its corresponding field context. 0151 std::unordered_map<std::string, RFieldContext> fFieldContexts; 0152 0153 std::unique_ptr<ROOT::RNTupleModel> fModel; 0154 0155 /// Total number of entries. Only to be used internally by the processor, not meant to be exposed in the public 0156 /// interface. 0157 ROOT::NTupleSize_t fNEntries = kInvalidNTupleIndex; 0158 0159 ROOT::NTupleSize_t fNEntriesProcessed = 0; //< Total number of entries processed so far 0160 ROOT::NTupleSize_t fCurrentEntryNumber = 0; //< Current processor entry number 0161 std::size_t fCurrentProcessorNumber = 0; //< Number of the currently open inner processor 0162 0163 ///////////////////////////////////////////////////////////////////////////// 0164 /// \brief Create and connect a concrete field to the current page source, based on its proto field. 0165 void ConnectField(RFieldContext &fieldContext, ROOT::Internal::RPageSource &pageSource, ROOT::REntry &entry); 0166 0167 ///////////////////////////////////////////////////////////////////////////// 0168 /// \brief Load the entry identified by the provided entry number. 0169 /// 0170 /// \param[in] entryNumber Entry number to load 0171 /// 0172 /// \return `entryNumber` if the entry was successfully loaded, `kInvalidNTupleIndex` otherwise. 0173 virtual ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) = 0; 0174 0175 ///////////////////////////////////////////////////////////////////////////// 0176 /// \brief Point the entry's field values of the processor to the pointers from the provided entry. 0177 /// 0178 /// \param[in] entry The entry whose field values to use. 0179 virtual void SetEntryPointers(const ROOT::REntry &entry) = 0; 0180 0181 ///////////////////////////////////////////////////////////////////////////// 0182 /// \brief Get the total number of entries in this processor 0183 virtual ROOT::NTupleSize_t GetNEntries() = 0; 0184 0185 ///////////////////////////////////////////////////////////////////////////// 0186 /// \brief Create a new base RNTupleProcessor. 0187 /// 0188 /// \param[in] processorName Name of the processor. By default, this is the name of the underlying RNTuple for 0189 /// RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the primary 0190 /// RNTuple for RNTupleJoinProcessor. 0191 /// \param[in] model The RNTupleModel representing the entries returned by the processor. 0192 /// 0193 /// \note Before processing, a model *must* exist. However, this is handled downstream by the RNTupleProcessor's 0194 /// factory functions (CreateSingle, CreateChain and CreateJoin) and constructors. 0195 RNTupleProcessor(std::string_view processorName, std::unique_ptr<ROOT::RNTupleModel> model) 0196 : fProcessorName(processorName), fModel(std::move(model)) 0197 { 0198 } 0199 0200 public: 0201 RNTupleProcessor(const RNTupleProcessor &) = delete; 0202 RNTupleProcessor(RNTupleProcessor &&) = delete; 0203 RNTupleProcessor &operator=(const RNTupleProcessor &) = delete; 0204 RNTupleProcessor &operator=(RNTupleProcessor &&) = delete; 0205 virtual ~RNTupleProcessor() = default; 0206 0207 ///////////////////////////////////////////////////////////////////////////// 0208 /// \brief Get the total number of entries processed so far. 0209 ROOT::NTupleSize_t GetNEntriesProcessed() const { return fNEntriesProcessed; } 0210 0211 ///////////////////////////////////////////////////////////////////////////// 0212 /// \brief Get the entry number that is currently being processed. 0213 ROOT::NTupleSize_t GetCurrentEntryNumber() const { return fCurrentEntryNumber; } 0214 0215 ///////////////////////////////////////////////////////////////////////////// 0216 /// \brief Get the number of the inner processor currently being read. 0217 /// 0218 /// This method is only relevant for the RNTupleChainProcessor. For the other processors, 0 is always returned. 0219 std::size_t GetCurrentProcessorNumber() const { return fCurrentProcessorNumber; } 0220 0221 ///////////////////////////////////////////////////////////////////////////// 0222 /// \brief Get the name of the processor. 0223 /// 0224 /// Unless this name was explicitly specified during creation of the processor, this is the name of the underlying 0225 /// RNTuple for RNTupleSingleProcessor, the name of the first processor for RNTupleChainProcessor, or the name of the 0226 /// primary RNTuple for RNTupleJoinProcessor. 0227 const std::string &GetProcessorName() const { return fProcessorName; } 0228 0229 ///////////////////////////////////////////////////////////////////////////// 0230 /// \brief Get the model used by the processor. 0231 const ROOT::RNTupleModel &GetModel() const { return *fModel; } 0232 0233 ///////////////////////////////////////////////////////////////////////////// 0234 /// \brief Get a reference to the entry used by the processor. 0235 /// 0236 /// \return A reference to the entry used by the processor. 0237 const ROOT::REntry &GetEntry() const { return *fEntry; } 0238 0239 // clang-format off 0240 /** 0241 \class ROOT::Experimental::RNTupleProcessor::RIterator 0242 \ingroup NTuple 0243 \brief Iterator over the entries of an RNTuple, or vertical concatenation thereof. 0244 */ 0245 // clang-format on 0246 class RIterator { 0247 private: 0248 RNTupleProcessor &fProcessor; 0249 ROOT::NTupleSize_t fCurrentEntryNumber; 0250 0251 public: 0252 using iterator_category = std::forward_iterator_tag; 0253 using iterator = RIterator; 0254 using value_type = ROOT::REntry; 0255 using difference_type = std::ptrdiff_t; 0256 using pointer = ROOT::REntry *; 0257 using reference = const ROOT::REntry &; 0258 0259 RIterator(RNTupleProcessor &processor, ROOT::NTupleSize_t entryNumber) 0260 : fProcessor(processor), fCurrentEntryNumber(entryNumber) 0261 { 0262 // This constructor is called with kInvalidNTupleIndex for RNTupleProcessor::end(). In that case, we already 0263 // know there is nothing to load. 0264 if (fCurrentEntryNumber != ROOT::kInvalidNTupleIndex) { 0265 fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber); 0266 } 0267 } 0268 0269 iterator operator++() 0270 { 0271 fCurrentEntryNumber = fProcessor.LoadEntry(fCurrentEntryNumber + 1); 0272 return *this; 0273 } 0274 0275 iterator operator++(int) 0276 { 0277 auto obj = *this; 0278 ++(*this); 0279 return obj; 0280 } 0281 0282 reference operator*() { return fProcessor.GetEntry(); } 0283 0284 friend bool operator!=(const iterator &lh, const iterator &rh) 0285 { 0286 return lh.fCurrentEntryNumber != rh.fCurrentEntryNumber; 0287 } 0288 friend bool operator==(const iterator &lh, const iterator &rh) 0289 { 0290 return lh.fCurrentEntryNumber == rh.fCurrentEntryNumber; 0291 } 0292 }; 0293 0294 RIterator begin() { return RIterator(*this, 0); } 0295 RIterator end() { return RIterator(*this, ROOT::kInvalidNTupleIndex); } 0296 0297 ///////////////////////////////////////////////////////////////////////////// 0298 /// \brief Create an RNTupleProcessor for a single RNTuple. 0299 /// 0300 /// \param[in] ntuple The name and storage location of the RNTuple to process. 0301 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided, 0302 /// one will be created based on the descriptor of the first ntuple specified. 0303 /// 0304 /// \return A pointer to the newly created RNTupleProcessor. 0305 static std::unique_ptr<RNTupleProcessor> 0306 Create(RNTupleOpenSpec ntuple, std::unique_ptr<ROOT::RNTupleModel> model = nullptr); 0307 0308 ///////////////////////////////////////////////////////////////////////////// 0309 /// \brief Create an RNTupleProcessor for a single RNTuple. 0310 /// 0311 /// \param[in] ntuple The name and storage location of the RNTuple to process. 0312 /// \param[in] processorName The name to give to the processor. Use 0313 /// Create(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the input RNTuple 0314 /// instead. 0315 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided, 0316 /// one will be created based on the descriptor of the first ntuple specified. 0317 /// 0318 /// \return A pointer to the newly created RNTupleProcessor. 0319 static std::unique_ptr<RNTupleProcessor> 0320 Create(RNTupleOpenSpec ntuple, std::string_view processorName, std::unique_ptr<ROOT::RNTupleModel> model = nullptr); 0321 0322 ///////////////////////////////////////////////////////////////////////////// 0323 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples. 0324 /// 0325 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process. 0326 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided, 0327 /// one will be created based on the descriptor of the first RNTuple specified. 0328 /// 0329 /// \return A pointer to the newly created RNTupleProcessor. 0330 static std::unique_ptr<RNTupleProcessor> 0331 CreateChain(std::vector<RNTupleOpenSpec> ntuples, std::unique_ptr<ROOT::RNTupleModel> model = nullptr); 0332 0333 ///////////////////////////////////////////////////////////////////////////// 0334 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of RNTuples. 0335 /// 0336 /// \param[in] ntuples A list specifying the names and locations of the RNTuples to process. 0337 /// \param[in] processorName The name to give to the processor. Use 0338 /// CreateChain(const RNTupleOpenSpec &, std::unique_ptr<RNTupleModel>) to automatically use the name of the first 0339 /// input RNTuple instead. 0340 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided, 0341 /// one will be created based on the descriptor of the first RNTuple specified. 0342 /// 0343 /// \return A pointer to the newly created RNTupleProcessor. 0344 static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<RNTupleOpenSpec> ntuples, 0345 std::string_view processorName, 0346 std::unique_ptr<ROOT::RNTupleModel> model = nullptr); 0347 0348 ///////////////////////////////////////////////////////////////////////////// 0349 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors. 0350 /// 0351 /// \param[in] innerProcessors A list with the processors to chain. 0352 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided, 0353 /// one will be created based on the model used by the first inner processor. 0354 /// 0355 /// \return A pointer to the newly created RNTupleProcessor. 0356 static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, 0357 std::unique_ptr<ROOT::RNTupleModel> model = nullptr); 0358 0359 ///////////////////////////////////////////////////////////////////////////// 0360 /// \brief Create an RNTupleProcessor for a *chain* (i.e., a vertical combination) of other RNTupleProcessors. 0361 /// 0362 /// \param[in] innerProcessors A list with the processors to chain. 0363 /// \param[in] processorName The name to give to the processor. Use 0364 /// CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>>, std::unique_ptr<RNTupleModel>) to automatically use 0365 /// the name of the first inner processor instead. 0366 /// \param[in] model An RNTupleModel specifying which fields can be read by the processor. If no model is provided, 0367 /// one will be created based on the model used by the first inner processor. 0368 /// 0369 /// \return A pointer to the newly created RNTupleProcessor. 0370 static std::unique_ptr<RNTupleProcessor> CreateChain(std::vector<std::unique_ptr<RNTupleProcessor>> innerProcessors, 0371 std::string_view processorName, 0372 std::unique_ptr<ROOT::RNTupleModel> model = nullptr); 0373 0374 ///////////////////////////////////////////////////////////////////////////// 0375 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples. 0376 /// 0377 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential 0378 /// order. 0379 /// \param[in] auxNTuples The names and locations of the RNTuples to join the primary RNTuple with. The order in 0380 /// which their entries are processed are determined by the primary RNTuple and doesn't necessarily have to be 0381 /// sequential. 0382 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. 0383 /// The join is made based on the combined join field values, and therefore each field has to be present in each 0384 /// specified RNTuple. If an empty list is provided, it is assumed that the specified ntuple are fully aligned. 0385 /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the 0386 /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple. 0387 /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple 0388 /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will 0389 /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary 0390 /// RNTuples for which the provided model is a `nullptr`. 0391 /// 0392 /// \return A pointer to the newly created RNTupleProcessor. 0393 static std::unique_ptr<RNTupleProcessor> 0394 CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples, 0395 const std::vector<std::string> &joinFields, std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr, 0396 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {}); 0397 0398 ///////////////////////////////////////////////////////////////////////////// 0399 /// \brief Create an RNTupleProcessor for a *join* (i.e., a horizontal combination) of RNTuples. 0400 /// 0401 /// \param[in] primaryNTuple The name and location of the primary RNTuple. Its entries are processed in sequential 0402 /// order. 0403 /// \param[in] auxNTuples The names and locations of the RNTuples to join the primary RNTuple with. The order in 0404 /// which their entries are processed are determined by the primary RNTuple and doesn't necessarily have to be 0405 /// sequential. 0406 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. 0407 /// The join is made based on the combined join field values, and therefore each field has to be present in each 0408 /// specified RNTuple. If an empty list is provided, it is assumed that the specified RNTuple are fully aligned. 0409 /// \param[in] processorName The name to give to the processor. Use 0410 /// CreateJoin(const RNTupleOpenSpec &, const std::vector<RNTupleOpenSpec> &, const std::vector<std::string> &, 0411 /// std::unique_ptr<RNTupleModel>, std::vector<std::unique_ptr<RNTupleModel>>) to automatically use the name of the 0412 /// input RNTuple instead. 0413 /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple 0414 /// can be read by the processor. If no model is provided, one will be created based on the descriptor of the primary 0415 /// RNTuple. 0416 /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary 0417 /// RNTuple (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the 0418 /// models will be created based on the descriptors of their corresponding RNTuples. This also applies to individual 0419 /// auxiliary RNTuples for which the provided model is a `nullptr`. 0420 /// 0421 /// \return A pointer to the newly created RNTupleProcessor. 0422 static std::unique_ptr<RNTupleProcessor> 0423 CreateJoin(const RNTupleOpenSpec &primaryNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples, 0424 const std::vector<std::string> &joinFields, std::string_view processorName, 0425 std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr, 0426 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {}); 0427 }; 0428 0429 // clang-format off 0430 /** 0431 \class ROOT::Experimental::RNTupleSingleProcessor 0432 \ingroup NTuple 0433 \brief Processor specialization for processing a single RNTuple. 0434 */ 0435 // clang-format on 0436 class RNTupleSingleProcessor : public RNTupleProcessor { 0437 friend class RNTupleProcessor; 0438 0439 private: 0440 RNTupleOpenSpec fNTupleSpec; 0441 0442 ///////////////////////////////////////////////////////////////////////////// 0443 /// \brief Connect the page source of the underlying RNTuple. 0444 void Connect(); 0445 0446 ///////////////////////////////////////////////////////////////////////////// 0447 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this 0448 /// processor). 0449 /// 0450 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry 0451 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; 0452 0453 ///////////////////////////////////////////////////////////////////////////// 0454 /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers. 0455 void SetEntryPointers(const ROOT::REntry &entry) final; 0456 0457 ///////////////////////////////////////////////////////////////////////////// 0458 /// \brief Get the total number of entries in this processor. 0459 ROOT::NTupleSize_t GetNEntries() final 0460 { 0461 Connect(); 0462 return fNEntries; 0463 } 0464 0465 ///////////////////////////////////////////////////////////////////////////// 0466 /// \brief Construct a new RNTupleProcessor for processing a single RNTuple. 0467 /// 0468 /// \param[in] ntuple The source specification (name and storage location) for the RNTuple to process. 0469 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::Create, this is 0470 /// the name of the underlying RNTuple. 0471 /// \param[in] model The model that specifies which fields should be read by the processor. 0472 RNTupleSingleProcessor(RNTupleOpenSpec ntuple, std::string_view processorName, 0473 std::unique_ptr<ROOT::RNTupleModel> model); 0474 }; 0475 0476 // clang-format off 0477 /** 0478 \class ROOT::Experimental::RNTupleChainProcessor 0479 \ingroup NTuple 0480 \brief Processor specialization for vertically combined (*chained*) RNTupleProcessors. 0481 */ 0482 // clang-format on 0483 class RNTupleChainProcessor : public RNTupleProcessor { 0484 friend class RNTupleProcessor; 0485 0486 private: 0487 std::vector<std::unique_ptr<RNTupleProcessor>> fInnerProcessors; 0488 std::vector<ROOT::NTupleSize_t> fInnerNEntries; 0489 0490 ///////////////////////////////////////////////////////////////////////////// 0491 /// \brief Load the entry identified by the provided (global) entry number (i.e., considering all RNTuples in this 0492 /// processor). 0493 /// 0494 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry 0495 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; 0496 0497 ///////////////////////////////////////////////////////////////////////////// 0498 /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers. 0499 void SetEntryPointers(const ROOT::REntry &) final; 0500 0501 ///////////////////////////////////////////////////////////////////////////// 0502 /// \brief Get the total number of entries in this processor. 0503 /// 0504 /// \note This requires opening all underlying RNTuples being processed in the chain, and could become costly! 0505 ROOT::NTupleSize_t GetNEntries() final; 0506 0507 ///////////////////////////////////////////////////////////////////////////// 0508 /// \brief Construct a new RNTupleChainProcessor. 0509 /// 0510 /// \param[in] ntuples The source specification (name and storage location) for each RNTuple to process. 0511 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateChain, this 0512 /// is the name of the first inner processor. 0513 /// \param[in] model The model that specifies which fields should be read by the processor. The pointer returned by 0514 /// RNTupleModel::MakeField can be used to access a field's value during the processor iteration. When no model is 0515 /// specified, it is created from the descriptor of the first RNTuple specified in `ntuples`. 0516 /// 0517 /// RNTuples are processed in the order in which they are specified. 0518 RNTupleChainProcessor(std::vector<std::unique_ptr<RNTupleProcessor>> processors, std::string_view processorName, 0519 std::unique_ptr<ROOT::RNTupleModel> model); 0520 }; 0521 0522 // clang-format off 0523 /** 0524 \class ROOT::Experimental::RNTupleJoinProcessor 0525 \ingroup NTuple 0526 \brief Processor specialization for horizontally combined (*joined*) RNTuples. 0527 */ 0528 // clang-format on 0529 class RNTupleJoinProcessor : public RNTupleProcessor { 0530 friend class RNTupleProcessor; 0531 0532 private: 0533 std::vector<std::unique_ptr<ROOT::Internal::RPageSource>> fAuxiliaryPageSources; 0534 /// Tokens representing the join fields present in the main RNTuple 0535 std::vector<ROOT::RFieldToken> fJoinFieldTokens; 0536 std::vector<std::unique_ptr<Internal::RNTupleJoinTable>> fJoinTables; 0537 bool fJoinTablesAreBuilt = false; 0538 0539 bool HasJoinTable() const { return fJoinTables.size() > 0; } 0540 0541 ///////////////////////////////////////////////////////////////////////////// 0542 /// \brief Load the entry identified by the provided entry number of the primary RNTuple. 0543 /// 0544 /// \sa ROOT::Experimental::RNTupleProcessor::LoadEntry 0545 ROOT::NTupleSize_t LoadEntry(ROOT::NTupleSize_t entryNumber) final; 0546 0547 ///////////////////////////////////////////////////////////////////////////// 0548 /// \sa ROOT::Experimental::RNTupleProcessor::SetEntryPointers. 0549 void SetEntryPointers(const ROOT::REntry &) final; 0550 0551 ///////////////////////////////////////////////////////////////////////////// 0552 /// \brief Get the total number of entries in this processor. 0553 ROOT::NTupleSize_t GetNEntries() final { return fNEntries; } 0554 0555 ///////////////////////////////////////////////////////////////////////////// 0556 /// \brief Set fModel by combining the primary and auxiliary models. 0557 /// 0558 /// \param[in] primaryModel The model of the primary RNTuple. 0559 /// \param[in] auxModels Models of the auxiliary RNTuples. 0560 /// 0561 /// To prevent field name clashes when one or more models have fields with duplicate names, fields from each 0562 /// auxiliary model are stored as a anonymous record, and subsequently registered as subfields in the join model. 0563 /// This way, they can be accessed from the processor's entry as `auxNTupleName.fieldName`. 0564 void SetModel(std::unique_ptr<ROOT::RNTupleModel> primaryModel, 0565 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels); 0566 0567 ///////////////////////////////////////////////////////////////////////////// 0568 /// \brief Connect all fields, once the primary and all auxiliary RNTuples have been added. 0569 void ConnectFields(); 0570 0571 ///////////////////////////////////////////////////////////////////////////// 0572 /// \brief Populate fJoinFieldTokens with tokens for join fields belonging to the main RNTuple in the join model. 0573 /// 0574 /// \param[in] joinFields The names of the fields used in the join. 0575 void SetJoinFieldTokens(const std::vector<std::string> &joinFields) 0576 { 0577 fJoinFieldTokens.reserve(joinFields.size()); 0578 for (const auto &fieldName : joinFields) { 0579 fJoinFieldTokens.emplace_back(fEntry->GetToken(fieldName)); 0580 } 0581 } 0582 0583 ///////////////////////////////////////////////////////////////////////////// 0584 /// \brief Construct a new RNTupleJoinProcessor. 0585 /// 0586 /// \param[in] mainNTuple The source specification (name and storage location) of the primary RNTuple. 0587 /// \param[in] auxNTUples The source specifications (name and storage location) of the auxiliary RNTuples. 0588 /// \param[in] joinFields The names of the fields on which to join, in case the specified RNTuples are unaligned. 0589 /// The join is made based on the combined join field values, and therefore each field has to be present in each 0590 /// specified RNTuple. If an empty list is provided, it is assumed that the RNTuples are fully aligned. 0591 /// \param[in] processorName Name of the processor. Unless specified otherwise in RNTupleProcessor::CreateJoin, this 0592 /// is the name of the main RNTuple. 0593 /// \param[in] primaryModel An RNTupleModel specifying which fields from the primary RNTuple can be read by the 0594 /// processor. If no model is provided, one will be created based on the descriptor of the primary RNTuple. 0595 /// \param[in] auxModels A list of RNTupleModels specifying which fields from the corresponding auxiliary RNTuple 0596 /// (according to the order of `auxNTuples`) can be read by the processor. If this vector is empty, the models will 0597 /// be created based on the descriptors of their corresponding RNTuples. This also applies to individual auxiliary 0598 /// RNTuples for which the provided model is a `nullptr`. 0599 RNTupleJoinProcessor(const RNTupleOpenSpec &mainNTuple, const std::vector<RNTupleOpenSpec> &auxNTuples, 0600 const std::vector<std::string> &joinFields, std::string_view processorName, 0601 std::unique_ptr<ROOT::RNTupleModel> primaryModel = nullptr, 0602 std::vector<std::unique_ptr<ROOT::RNTupleModel>> auxModels = {}); 0603 0604 public: 0605 RNTupleJoinProcessor(const RNTupleJoinProcessor &) = delete; 0606 RNTupleJoinProcessor operator=(const RNTupleJoinProcessor &) = delete; 0607 RNTupleJoinProcessor(RNTupleJoinProcessor &&) = delete; 0608 RNTupleJoinProcessor operator=(RNTupleJoinProcessor &&) = delete; 0609 ~RNTupleJoinProcessor() override 0610 { 0611 for (auto &[_, fieldContext] : fFieldContexts) { 0612 fieldContext.ResetConcreteField(); 0613 } 0614 } 0615 }; 0616 0617 } // namespace Experimental 0618 } // namespace ROOT 0619 0620 #endif // ROOT_RNTupleProcessor
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |