![]() |
|
|||
File indexing completed on 2025-09-16 09:08:34
0001 /// \file ROOT/RNTupleModel.hxx 0002 /// \ingroup NTuple 0003 /// \author Jakob Blomer <jblomer@cern.ch> 0004 /// \date 2018-10-04 0005 0006 /************************************************************************* 0007 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. * 0008 * All rights reserved. * 0009 * * 0010 * For the licensing terms see $ROOTSYS/LICENSE. * 0011 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0012 *************************************************************************/ 0013 0014 #ifndef ROOT_RNTupleModel 0015 #define ROOT_RNTupleModel 0016 0017 #include <ROOT/REntry.hxx> 0018 #include <ROOT/RError.hxx> 0019 #include <ROOT/RField.hxx> 0020 #include <ROOT/RFieldToken.hxx> 0021 #include <ROOT/RNTupleUtil.hxx> 0022 #include <string_view> 0023 0024 #include <cstdint> 0025 #include <functional> 0026 #include <memory> 0027 #include <string> 0028 #include <unordered_map> 0029 #include <unordered_set> 0030 #include <utility> 0031 0032 namespace ROOT { 0033 0034 class RNTupleWriteOptions; 0035 class RNTupleModel; 0036 class RNTupleWriter; 0037 0038 namespace Experimental { 0039 namespace Detail { 0040 class RRawPtrWriteEntry; 0041 } // namespace Detail 0042 } // namespace Experimental 0043 0044 namespace Internal { 0045 class RProjectedFields; 0046 0047 ROOT::RFieldZero &GetFieldZeroOfModel(RNTupleModel &model); 0048 RProjectedFields &GetProjectedFieldsOfModel(RNTupleModel &model); 0049 0050 // clang-format off 0051 /** 0052 \class ROOT::Internal::RProjectedFields 0053 \ingroup NTuple 0054 \brief Container for the projected fields of an RNTupleModel 0055 0056 Projected fields are fields whose columns are reused from existing fields. Projected fields are not attached 0057 to the model's zero field but form a separate hierarchy with their own zero field (which is stored in this class). 0058 Only the real source fields are written to: projected fields are stored as metadata 0059 (header) information only. Only top-level projected fields are supported because otherwise the layout of types 0060 could be altered in unexpected ways. 0061 This class owns the hierarchy of projected fields and keeps the mapping between them and their backing source fields. 0062 */ 0063 // clang-format on 0064 class RProjectedFields { 0065 public: 0066 /// The map keys are the projected target fields, the map values are the backing source fields 0067 /// Note that sub fields are treated individually and indepently of their parent field 0068 using FieldMap_t = std::unordered_map<const ROOT::RFieldBase *, const ROOT::RFieldBase *>; 0069 0070 private: 0071 explicit RProjectedFields(std::unique_ptr<ROOT::RFieldZero> fieldZero) : fFieldZero(std::move(fieldZero)) {} 0072 /// The projected fields are attached to this zero field 0073 std::unique_ptr<ROOT::RFieldZero> fFieldZero; 0074 /// Maps the source fields from fModel to the target projected fields attached to fFieldZero 0075 FieldMap_t fFieldMap; 0076 /// The model this set of projected fields belongs to 0077 const RNTupleModel *fModel; 0078 0079 /// Asserts that the passed field is a valid target of the source field provided in the field map. 0080 /// Checks the field without looking into sub fields. 0081 RResult<void> EnsureValidMapping(const ROOT::RFieldBase *target, const FieldMap_t &fieldMap); 0082 0083 public: 0084 explicit RProjectedFields(const RNTupleModel &model) 0085 : fFieldZero(std::make_unique<ROOT::RFieldZero>()), fModel(&model) 0086 { 0087 } 0088 RProjectedFields(const RProjectedFields &) = delete; 0089 RProjectedFields(RProjectedFields &&) = default; 0090 RProjectedFields &operator=(const RProjectedFields &) = delete; 0091 RProjectedFields &operator=(RProjectedFields &&) = default; 0092 ~RProjectedFields() = default; 0093 0094 /// Clones this container and all the projected fields it owns. `newModel` must be a clone of the model 0095 /// that this RProjectedFields was constructed with. 0096 std::unique_ptr<RProjectedFields> Clone(const RNTupleModel &newModel) const; 0097 0098 ROOT::RFieldZero &GetFieldZero() { return *fFieldZero; } 0099 const ROOT::RFieldBase *GetSourceField(const ROOT::RFieldBase *target) const; 0100 /// Adds a new projected field. The field map needs to provide valid source fields of fModel for 'field' 0101 /// and each of its sub fields. 0102 RResult<void> Add(std::unique_ptr<ROOT::RFieldBase> field, const FieldMap_t &fieldMap); 0103 bool IsEmpty() const { return fFieldZero->begin() == fFieldZero->end(); } 0104 }; 0105 0106 } // namespace Internal 0107 0108 // clang-format off 0109 /** 0110 \class ROOT::RNTupleModel 0111 \ingroup NTuple 0112 \brief The RNTupleModel encapulates the schema of an RNTuple. 0113 0114 The RNTupleModel comprises a collection of hierarchically organized fields. From a model, "entries" 0115 can be extracted or created. For convenience, the RNTupleModel provides a default entry unless it is created as a "bare model". 0116 Models have a unique model identifier that facilitates checking whether entries are compatible with it 0117 (i.e.: have been extracted from that model). 0118 0119 A model is subject to state transitions during its lifetime: it starts in a *building* state, in which fields can be 0120 added and modified. Once the schema is finalized, the model gets *frozen*. Only frozen models can create entries. 0121 From frozen, models move into an *expired* state. In this state, the model is only partially usable: it can be cloned 0122 and queried, but it can't be unfrozen anymore and no new entries can be created. This state is used for models 0123 that were used for writing and are no longer connected to a page sink. 0124 0125 ``` 0126 (Model gets created) 0127 | 0128 | (passed to a Sink (detached from 0129 ____v______ or explicitly __________ Sink after ___________ 0130 | | frozen) | | writing) | | 0131 | Building |---------------->| Frozen |-------------->| Expired | 0132 |___________|<----------------|__________| |___________| 0133 (explicitly 0134 unfrozen) 0135 ``` 0136 0137 */ 0138 // clang-format on 0139 class RNTupleModel { 0140 friend ROOT::RFieldZero &Internal::GetFieldZeroOfModel(RNTupleModel &); 0141 friend Internal::RProjectedFields &Internal::GetProjectedFieldsOfModel(RNTupleModel &); 0142 0143 public: 0144 /// User-provided function that describes the mapping of existing source fields to projected fields in terms 0145 /// of fully qualified field names. The mapping function is called with the qualified field names of the provided 0146 /// field and the subfields. It should return the qualified field names used as a mapping source. 0147 /// See AddProjectedFields() for more details. 0148 using FieldMappingFunc_t = std::function<std::string(const std::string &)>; 0149 0150 class RUpdater; 0151 0152 private: 0153 /// The states a model can be in. Possible transitions are between kBuilding and kFrozen 0154 /// and from kFrozen to kExpired. 0155 /// See RNTupleModel for the state transition graph. 0156 enum class EState { 0157 kBuilding, 0158 kFrozen, 0159 kExpired 0160 }; 0161 0162 /// Hierarchy of fields consisting of simple types and collections (sub trees) 0163 std::unique_ptr<ROOT::RFieldZero> fFieldZero; 0164 /// Contains field values corresponding to the created top-level fields, as well as registered subfields 0165 std::unique_ptr<ROOT::REntry> fDefaultEntry; 0166 /// Keeps track of which field names are taken, including projected field names. 0167 std::unordered_set<std::string> fFieldNames; 0168 /// Free text set by the user 0169 std::string fDescription; 0170 /// The set of projected top-level fields 0171 std::unique_ptr<Internal::RProjectedFields> fProjectedFields; 0172 /// Keeps track of which subfields have been registered to be included in entries belonging to this model. 0173 std::unordered_set<std::string> fRegisteredSubfields; 0174 /// Every model has a unique ID to distinguish it from other models. Entries are linked to models via the ID. 0175 /// Cloned models get a new model ID. Expired models are cloned into frozen models. 0176 std::uint64_t fModelId = 0; 0177 /// Models have a separate schema ID to remember that the clone of a frozen model still has the same schema. 0178 std::uint64_t fSchemaId = 0; 0179 /// Changed by Freeze() / Unfreeze() and by the RUpdater. 0180 EState fModelState = EState::kBuilding; 0181 0182 /// Checks that user-provided field names are valid in the context of this RNTupleModel. 0183 /// Throws an RException for invalid names, empty names (which is reserved for the zero field) and duplicate field 0184 /// names. 0185 void EnsureValidFieldName(std::string_view fieldName); 0186 0187 /// Throws an RException if fFrozen is true 0188 void EnsureNotFrozen() const; 0189 0190 /// Throws an RException if fDefaultEntry is nullptr 0191 void EnsureNotBare() const; 0192 0193 /// The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the model. 0194 ROOT::RFieldBase *FindField(std::string_view fieldName) const; 0195 0196 /// Add a subfield to the provided entry. If `initializeValue` is false, a nullptr will be bound to the entry value 0197 /// (used in bare models). 0198 void AddSubfield(std::string_view fieldName, ROOT::REntry &entry, bool initializeValue = true) const; 0199 0200 RNTupleModel(std::unique_ptr<ROOT::RFieldZero> fieldZero); 0201 0202 public: 0203 RNTupleModel(const RNTupleModel &) = delete; 0204 RNTupleModel &operator=(const RNTupleModel &) = delete; 0205 ~RNTupleModel() = default; 0206 0207 std::unique_ptr<RNTupleModel> Clone() const; 0208 static std::unique_ptr<RNTupleModel> Create(); 0209 static std::unique_ptr<RNTupleModel> Create(std::unique_ptr<ROOT::RFieldZero> fieldZero); 0210 /// Creates a "bare model", i.e. an RNTupleModel with no default entry 0211 static std::unique_ptr<RNTupleModel> CreateBare(); 0212 /// Creates a "bare model", i.e. an RNTupleModel with no default entry, with the given field zero. 0213 static std::unique_ptr<RNTupleModel> CreateBare(std::unique_ptr<ROOT::RFieldZero> fieldZero); 0214 0215 /// Creates a new field given a `name` or `{name, description}` pair and a 0216 /// corresponding, default-constructed value that is managed by a shared pointer. 0217 /// 0218 /// **Example: create some fields and fill an %RNTuple** 0219 /// ~~~ {.cpp} 0220 /// #include <ROOT/RNTupleModel.hxx> 0221 /// #include <ROOT/RNTupleWriter.hxx> 0222 /// using ROOT::RNTupleWriter; 0223 /// 0224 /// #include <vector> 0225 /// 0226 /// auto model = ROOT::RNTupleModel::Create(); 0227 /// auto pt = model->MakeField<float>("pt"); 0228 /// auto vec = model->MakeField<std::vector<int>>("vec"); 0229 /// 0230 /// // The RNTuple is written to disk when the RNTupleWriter goes out of scope 0231 /// { 0232 /// auto writer = RNTupleWriter::Recreate(std::move(model), "myNTuple", "myFile.root"); 0233 /// for (int i = 0; i < 100; i++) { 0234 /// *pt = static_cast<float>(i); 0235 /// *vec = {i, i+1, i+2}; 0236 /// writer->Fill(); 0237 /// } 0238 /// } 0239 /// ~~~ 0240 /// 0241 /// **Example: create a field with a description** 0242 /// ~~~ {.cpp} 0243 /// #include <ROOT/RNTupleModel.hxx> 0244 /// 0245 /// auto model = ROOT::RNTupleModel::Create(); 0246 /// auto hadronFlavour = model->MakeField<float>( 0247 /// "hadronFlavour", "flavour from hadron ghost clustering" 0248 /// ); 0249 /// ~~~ 0250 template <typename T> 0251 std::shared_ptr<T> MakeField(std::string_view name, std::string_view description = "") 0252 { 0253 EnsureNotFrozen(); 0254 EnsureValidFieldName(name); 0255 auto field = std::make_unique<ROOT::RField<T>>(name); 0256 field->SetDescription(description); 0257 std::shared_ptr<T> ptr; 0258 if (fDefaultEntry) 0259 ptr = fDefaultEntry->AddValue<T>(*field); 0260 fFieldNames.insert(field->GetFieldName()); 0261 fFieldZero->Attach(std::move(field)); 0262 return ptr; 0263 } 0264 0265 /// Adds a field whose type is not known at compile time. No shared pointer is returned in this case: 0266 /// pointers should be retrieved or bound via REntry. 0267 /// 0268 /// Throws an RException if the field is null. 0269 void AddField(std::unique_ptr<ROOT::RFieldBase> field); 0270 0271 /// Register a subfield so it can be accessed directly from entries belonging to the model. Because registering a 0272 /// subfield does not fundamentally change the model, previously created entries will not be invalidated, nor 0273 /// modified in any way; a registered subfield is merely an accessor added to the default entry (if present) and any 0274 /// entries created afterwards. Note that previously created entries won't have this subfield added to them. 0275 /// 0276 /// Using models with registered subfields for writing is not allowed. Attempting to do so will result in an 0277 /// exception. 0278 /// 0279 /// Throws an RException if the provided subfield could not be found in the model. 0280 void RegisterSubfield(std::string_view qualifiedFieldName); 0281 0282 /// Adds a top-level field based on existing fields. 0283 /// 0284 /// The mapping function takes one argument, which is a string containing the name of the projected field. The return 0285 /// value of the mapping function should be the name of the (existing) field onto which the projection is made. 0286 /// **Example** 0287 /// ~~~ {.cpp} 0288 /// auto model = RNTupleModel::Create(); 0289 /// model->MakeField<float>("met"); 0290 /// auto metProjection = ROOT::RFieldBase::Create("missingE", "float").Unwrap(); 0291 /// model->AddProjectedField(std::move(metProjection), [](const std::string &) { return "met"; }); 0292 /// ~~~ 0293 /// 0294 /// Adding projections for collection fields is also possible, as long as they follow the same schema structure. For 0295 /// example, a projection of a collection of structs onto a collection of scalars is possible, but a projection of a 0296 /// collection of a collection of scalars onto a collection of scalars is not. 0297 /// 0298 /// In the case of projections for nested fields, the mapping function must provide a mapping for every nesting 0299 /// level. 0300 /// **Example** 0301 /// ~~~ {.cpp} 0302 /// struct P { int x, y; }; 0303 /// 0304 /// auto model = RNTupleModel::Create(); 0305 /// model->MakeField<std::vector<P>>("points"); 0306 /// auto pxProjection = ROOT::RFieldBase::Create("pxs", "std::vector<int>").Unwrap(); 0307 /// model->AddProjectedField(std::move(pxProjection), [](const std::string &fieldName) { 0308 /// if (fieldName == "pxs") 0309 /// return "points"; 0310 /// else 0311 /// return "points._0.x"; 0312 /// }); 0313 /// ~~~ 0314 /// 0315 /// Creating projections for fields containing `std::variant` or fixed-size arrays is unsupported. 0316 RResult<void> AddProjectedField(std::unique_ptr<ROOT::RFieldBase> field, FieldMappingFunc_t mapping); 0317 0318 /// Transitions an RNTupleModel from the *building* state to the *frozen* state, disabling adding additional fields 0319 /// and enabling creating entries from it. Freezing an already-frozen model is a no-op. Throws an RException if the 0320 /// model is in the *expired* state. See RNTupleModel for more detailed explanation on the state transitions. 0321 void Freeze(); 0322 /// Transitions an RNTupleModel from the *frozen* state back to the *building* state, invalidating all previously 0323 /// created entries, re-enabling adding additional fields and disabling creating entries from it. Unfreezing a model 0324 /// that is already in the *building* state is a no-op. Throws an RException if the model is in the *expired* state. 0325 /// See RNTupleModel for a more detailed explanation on the state transitions. 0326 void Unfreeze(); 0327 /// Transitions an RNTupleModel from the *frozen* state to the *expired* state, invalidating all previously created 0328 /// entries, disabling creating new entries from it and disabling further state transitions. Expiring a model that is 0329 /// already expired is a no-op. Throws an RException if the model is in the *building* state. See RNTupleModel for a 0330 /// more detailed explanation on the state transitions. 0331 void Expire(); 0332 /// \see Expire() 0333 bool IsExpired() const { return fModelState == EState::kExpired; } 0334 /// \see Freeze() 0335 bool IsFrozen() const { return (fModelState == EState::kFrozen) || (fModelState == EState::kExpired); } 0336 /// \see CreateBare() 0337 bool IsBare() const { return !fDefaultEntry; } 0338 std::uint64_t GetModelId() const { return fModelId; } 0339 std::uint64_t GetSchemaId() const { return fSchemaId; } 0340 0341 /// Creates a new entry with default values for each field. 0342 std::unique_ptr<REntry> CreateEntry() const; 0343 /// Creates a "bare entry", i.e. a entry with all null values. The user needs to explicitly call BindValue() or 0344 /// BindRawPtr() to set memory addresses before serializing / deserializing the entry. 0345 std::unique_ptr<REntry> CreateBareEntry() const; 0346 std::unique_ptr<Experimental::Detail::RRawPtrWriteEntry> CreateRawPtrWriteEntry() const; 0347 /// Creates a token to be used in REntry methods to address a field present in the entry 0348 ROOT::RFieldToken GetToken(std::string_view fieldName) const; 0349 /// Calls the given field's CreateBulk() method. Throws an RException if no field with the given name exists. 0350 ROOT::RFieldBase::RBulkValues CreateBulk(std::string_view fieldName) const; 0351 0352 /// Retrieves the default entry of this model. 0353 /// Throws an RException if this is a bare model (i.e. if it was created with CreateBare()). 0354 REntry &GetDefaultEntry(); 0355 /// \see GetDefaultEntry() 0356 const REntry &GetDefaultEntry() const; 0357 0358 /// Retrieves the field zero of this model, i.e. the root of the field hierarchy. 0359 /// This may be used to make adjustments on the field hierarchy before the model is frozen. 0360 ROOT::RFieldZero &GetMutableFieldZero(); 0361 /// Retrieves the field zero of this model, i.e. the root of the field hierarchy. 0362 const ROOT::RFieldZero &GetConstFieldZero() const { return *fFieldZero; } 0363 /// Retrieves the field with fully-qualified name `fieldName`. 0364 /// Dot-separated names are used to walk down the field hierarchy: e.g. `"parent.child"` should 0365 /// be used to retrieve a field with name `"child"` whose parent is the top-level field with name `"parent"`. 0366 /// Throws an RException if no field is found with the given name. 0367 ROOT::RFieldBase &GetMutableField(std::string_view fieldName); 0368 /// \see GetMutableField() 0369 const ROOT::RFieldBase &GetConstField(std::string_view fieldName) const; 0370 0371 const std::string &GetDescription() const { return fDescription; } 0372 void SetDescription(std::string_view description); 0373 0374 /// Get the names of the fields currently present in the model, including projected fields. Registered subfields 0375 /// are not included, use GetRegisteredSubfieldnames() for this. 0376 const std::unordered_set<std::string> &GetFieldNames() const { return fFieldNames; } 0377 /// Get the (qualified) names of subfields that have been registered (via RegisterSubfield()) to be included in 0378 /// entries from this model. 0379 const std::unordered_set<std::string> &GetRegisteredSubfieldNames() const { return fRegisteredSubfields; } 0380 0381 /// Estimate the memory usage for this model during writing 0382 /// 0383 /// This will return an estimate in bytes for the internal page and compression buffers. The value should be 0384 /// understood per sequential RNTupleWriter or per RNTupleFillContext created for an RNTupleParallelWriter 0385 /// constructed with this model. 0386 std::size_t EstimateWriteMemoryUsage(const ROOT::RNTupleWriteOptions &options = ROOT::RNTupleWriteOptions()) const; 0387 }; 0388 0389 namespace Internal { 0390 0391 // clang-format off 0392 /** 0393 \class ROOT::Internal::RNTupleModelChangeset 0394 \ingroup NTuple 0395 \brief The incremental changes to a `RNTupleModel` 0396 0397 Represents a set of alterations to a `RNTupleModel` that happened after the model is used to initialize a `RPageSink` 0398 instance. This object can be used to communicate metadata updates to a `RPageSink`. 0399 You will not normally use this directly; see `RNTupleModel::RUpdater` instead. 0400 */ 0401 // clang-format on 0402 struct RNTupleModelChangeset { 0403 RNTupleModel &fModel; 0404 /// Points to the fields in fModel that were added as part of an updater transaction 0405 std::vector<ROOT::RFieldBase *> fAddedFields; 0406 /// Points to the projected fields in fModel that were added as part of an updater transaction 0407 std::vector<ROOT::RFieldBase *> fAddedProjectedFields; 0408 0409 RNTupleModelChangeset(RNTupleModel &model) : fModel(model) {} 0410 bool IsEmpty() const { return fAddedFields.empty() && fAddedProjectedFields.empty(); } 0411 0412 void AddField(std::unique_ptr<ROOT::RFieldBase> field); 0413 0414 /// \see RNTupleModel::AddProjectedField() 0415 ROOT::RResult<void> 0416 AddProjectedField(std::unique_ptr<ROOT::RFieldBase> field, RNTupleModel::FieldMappingFunc_t mapping); 0417 }; 0418 0419 } // namespace Internal 0420 0421 /// A model is usually immutable after passing it to an `RNTupleWriter`. However, for the rare 0422 /// cases that require changing the model after the fact, `RUpdater` provides limited support for 0423 /// incremental updates, e.g. addition of new fields. 0424 /// 0425 /// See `RNTupleWriter::CreateModelUpdater()` for an example. 0426 class RNTupleModel::RUpdater { 0427 private: 0428 ROOT::RNTupleWriter &fWriter; 0429 Internal::RNTupleModelChangeset fOpenChangeset; 0430 std::uint64_t fNewModelId = 0; ///< The model ID after committing 0431 0432 public: 0433 explicit RUpdater(ROOT::RNTupleWriter &writer); 0434 ~RUpdater() { CommitUpdate(); } 0435 /// Begin a new set of alterations to the underlying model. As a side effect, all REntry 0436 /// instances related to the model are invalidated. 0437 void BeginUpdate(); 0438 /// Commit changes since the last call to `BeginUpdate()`. All the invalidated REntries remain 0439 /// invalid. `CreateEntry()` or `CreateBareEntry()` can be used to create an REntry that 0440 /// matches the new model. Upon completion, `BeginUpdate()` can be called again to begin a new set of changes. 0441 void CommitUpdate(); 0442 0443 template <typename T> 0444 std::shared_ptr<T> MakeField(std::string_view name, std::string_view description = "") 0445 { 0446 auto objPtr = fOpenChangeset.fModel.MakeField<T>(name, description); 0447 auto fieldZero = fOpenChangeset.fModel.fFieldZero.get(); 0448 auto it = 0449 std::find_if(fieldZero->begin(), fieldZero->end(), [&](const auto &f) { return f.GetFieldName() == name; }); 0450 R__ASSERT(it != fieldZero->end()); 0451 fOpenChangeset.fAddedFields.emplace_back(&(*it)); 0452 return objPtr; 0453 } 0454 0455 void AddField(std::unique_ptr<ROOT::RFieldBase> field); 0456 0457 /// \see RNTupleModel::AddProjectedField() 0458 RResult<void> AddProjectedField(std::unique_ptr<ROOT::RFieldBase> field, FieldMappingFunc_t mapping); 0459 }; 0460 0461 } // namespace ROOT 0462 0463 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |