|
||||
File indexing completed on 2025-01-18 10:10:46
0001 /// \file ROOT/RNTupleModel.hxx 0002 /// \ingroup NTuple ROOT7 0003 /// \author Jakob Blomer <jblomer@cern.ch> 0004 /// \date 2018-10-04 0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback 0006 /// is welcome! 0007 0008 /************************************************************************* 0009 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. * 0010 * All rights reserved. * 0011 * * 0012 * For the licensing terms see $ROOTSYS/LICENSE. * 0013 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0014 *************************************************************************/ 0015 0016 #ifndef ROOT7_RNTupleModel 0017 #define ROOT7_RNTupleModel 0018 0019 #include <ROOT/REntry.hxx> 0020 #include <ROOT/RError.hxx> 0021 #include <ROOT/RField.hxx> 0022 #include <ROOT/RNTupleUtil.hxx> 0023 #include <string_view> 0024 0025 #include <cstdint> 0026 #include <functional> 0027 #include <memory> 0028 #include <string> 0029 #include <unordered_map> 0030 #include <unordered_set> 0031 #include <utility> 0032 0033 namespace ROOT { 0034 namespace Experimental { 0035 0036 class RNTupleCollectionWriter; 0037 class RNTupleModel; 0038 class RNTupleWriter; 0039 0040 namespace Internal { 0041 class RPageSinkBuf; 0042 0043 // clang-format off 0044 /** 0045 \class ROOT::Experimental::Internal::RNTupleModelChangeset 0046 \ingroup NTuple 0047 \brief The incremental changes to a `RNTupleModel` 0048 0049 Represents a set of alterations to a `RNTupleModel` that happened after the model is used to initialize a `RPageSink` 0050 instance. This object can be used to communicate metadata updates to a `RPageSink`. 0051 You will not normally use this directly; see `RNTupleModel::RUpdater` instead. 0052 */ 0053 // clang-format on 0054 struct RNTupleModelChangeset { 0055 RNTupleModel &fModel; 0056 /// Points to the fields in fModel that were added as part of an updater transaction 0057 std::vector<RFieldBase *> fAddedFields; 0058 /// Points to the projected fields in fModel that were added as part of an updater transaction 0059 std::vector<RFieldBase *> fAddedProjectedFields; 0060 0061 RNTupleModelChangeset(RNTupleModel &model) : fModel(model) {} 0062 bool IsEmpty() const { return fAddedFields.empty() && fAddedProjectedFields.empty(); } 0063 }; 0064 0065 } // namespace Internal 0066 0067 // clang-format off 0068 /** 0069 \class ROOT::Experimental::RNTupleModel 0070 \ingroup NTuple 0071 \brief The RNTupleModel encapulates the schema of an ntuple. 0072 0073 The ntuple model comprises a collection of hierarchically organized fields. From a model, "entries" 0074 can be extracted. For convenience, the model provides a default entry unless it is created as a "bare model". 0075 Models have a unique model identifier that faciliates checking whether entries are compatible with it 0076 (i.e.: have been extracted from that model). 0077 0078 A model is subject to a state transition during its lifetime: it starts in a building state, in which fields can be 0079 added and modified. Once the schema is finalized, the model gets frozen. Only frozen models can create entries. 0080 */ 0081 // clang-format on 0082 class RNTupleModel { 0083 public: 0084 /// A wrapper over a field name and an optional description; used in `AddField()` and `RUpdater::AddField()` 0085 struct NameWithDescription_t { 0086 NameWithDescription_t(const char *name) : fName(name) {} 0087 NameWithDescription_t(const std::string &name) : fName(name) {} 0088 NameWithDescription_t(std::string_view name) : fName(name) {} 0089 NameWithDescription_t(std::string_view name, std::string_view descr) : fName(name), fDescription(descr) {} 0090 0091 std::string_view fName; 0092 std::string_view fDescription = ""; 0093 }; 0094 0095 /// Projected fields are fields whose columns are reused from existing fields. Projected fields are not attached 0096 /// to the models zero field. Only the real source fields are written to, projected fields are stored as meta-data 0097 /// (header) information only. Only top-level projected fields are supported because otherwise the layout of types 0098 /// could be altered in unexpected ways. 0099 /// All projected fields and the source fields used to back them are kept in this class. 0100 class RProjectedFields { 0101 public: 0102 /// The map keys are the projected target fields, the map values are the backing source fields 0103 /// Note that sub fields are treated individually and indepently of their parent field 0104 using FieldMap_t = std::unordered_map<const RFieldBase *, const RFieldBase *>; 0105 0106 private: 0107 explicit RProjectedFields(std::unique_ptr<RFieldZero> fieldZero) : fFieldZero(std::move(fieldZero)) {} 0108 /// The projected fields are attached to this zero field 0109 std::unique_ptr<RFieldZero> fFieldZero; 0110 /// Maps the source fields from fModel to the target projected fields attached to fFieldZero 0111 FieldMap_t fFieldMap; 0112 /// The model this set of projected fields belongs to 0113 const RNTupleModel *fModel; 0114 0115 /// Asserts that the passed field is a valid target of the source field provided in the field map. 0116 /// Checks the field without looking into sub fields. 0117 RResult<void> EnsureValidMapping(const RFieldBase *target, const FieldMap_t &fieldMap); 0118 0119 public: 0120 explicit RProjectedFields(const RNTupleModel *model) : fFieldZero(std::make_unique<RFieldZero>()), fModel(model) 0121 { 0122 } 0123 RProjectedFields(const RProjectedFields &) = delete; 0124 RProjectedFields(RProjectedFields &&) = default; 0125 RProjectedFields &operator=(const RProjectedFields &) = delete; 0126 RProjectedFields &operator=(RProjectedFields &&) = default; 0127 ~RProjectedFields() = default; 0128 0129 /// The new model needs to be a clone of fModel 0130 std::unique_ptr<RProjectedFields> Clone(const RNTupleModel *newModel) const; 0131 0132 RFieldZero *GetFieldZero() const { return fFieldZero.get(); } 0133 const RFieldBase *GetSourceField(const RFieldBase *target) const; 0134 /// Adds a new projected field. The field map needs to provide valid source fields of fModel for 'field' 0135 /// and each of its sub fields. 0136 RResult<void> Add(std::unique_ptr<RFieldBase> field, const FieldMap_t &fieldMap); 0137 bool IsEmpty() const { return fFieldZero->begin() == fFieldZero->end(); } 0138 }; 0139 0140 /// A model is usually immutable after passing it to an `RNTupleWriter`. However, for the rare 0141 /// cases that require changing the model after the fact, `RUpdater` provides limited support for 0142 /// incremental updates, e.g. addition of new fields. 0143 /// 0144 /// See `RNTupleWriter::CreateModelUpdater()` for an example. 0145 class RUpdater { 0146 private: 0147 RNTupleWriter &fWriter; 0148 Internal::RNTupleModelChangeset fOpenChangeset; 0149 std::uint64_t fNewModelId = 0; ///< The model ID after committing 0150 0151 public: 0152 explicit RUpdater(RNTupleWriter &writer); 0153 ~RUpdater() { CommitUpdate(); } 0154 /// Begin a new set of alterations to the underlying model. As a side effect, all `REntry` instances related to 0155 /// the model are invalidated. 0156 void BeginUpdate(); 0157 /// Commit changes since the last call to `BeginUpdate()`. All the invalidated `REntry`s remain invalid. 0158 /// `CreateEntry()` or `CreateBareEntry()` can be used to create an `REntry` that matching the new model. 0159 /// Upon completion, `BeginUpdate()` can be called again to begin a new set of changes. 0160 void CommitUpdate(); 0161 0162 template <typename T, typename... ArgsT> 0163 std::shared_ptr<T> MakeField(const NameWithDescription_t &fieldNameDesc, ArgsT &&...args) 0164 { 0165 auto objPtr = fOpenChangeset.fModel.MakeField<T>(fieldNameDesc, std::forward<ArgsT>(args)...); 0166 auto fieldZero = fOpenChangeset.fModel.fFieldZero.get(); 0167 auto it = std::find_if(fieldZero->begin(), fieldZero->end(), 0168 [&](const auto &f) { return f.GetFieldName() == fieldNameDesc.fName; }); 0169 R__ASSERT(it != fieldZero->end()); 0170 fOpenChangeset.fAddedFields.emplace_back(&(*it)); 0171 return objPtr; 0172 } 0173 0174 void AddField(std::unique_ptr<RFieldBase> field); 0175 0176 RResult<void> 0177 AddProjectedField(std::unique_ptr<RFieldBase> field, std::function<std::string(const std::string &)> mapping); 0178 }; 0179 0180 private: 0181 /// Hierarchy of fields consisting of simple types and collections (sub trees) 0182 std::unique_ptr<RFieldZero> fFieldZero; 0183 /// Contains field values corresponding to the created top-level fields 0184 std::unique_ptr<REntry> fDefaultEntry; 0185 /// Keeps track of which field names are taken, including projected field names. 0186 std::unordered_set<std::string> fFieldNames; 0187 /// Free text set by the user 0188 std::string fDescription; 0189 /// The set of projected top-level fields 0190 std::unique_ptr<RProjectedFields> fProjectedFields; 0191 /// Every model has a unique ID to distinguish it from other models. Entries are linked to models via the ID. 0192 /// Cloned models get a new model ID. 0193 std::uint64_t fModelId = 0; 0194 /// Changed by Freeze() / Unfreeze() and by the RUpdater. 0195 bool fIsFrozen = false; 0196 0197 /// Checks that user-provided field names are valid in the context 0198 /// of this NTuple model. Throws an RException for invalid names. 0199 void EnsureValidFieldName(std::string_view fieldName); 0200 0201 /// Throws an RException if fFrozen is true 0202 void EnsureNotFrozen() const; 0203 0204 /// Throws an RException if fDefaultEntry is nullptr 0205 void EnsureNotBare() const; 0206 0207 /// The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the model. 0208 RFieldBase *FindField(std::string_view fieldName) const; 0209 0210 RNTupleModel(std::unique_ptr<RFieldZero> fieldZero); 0211 0212 public: 0213 RNTupleModel(const RNTupleModel&) = delete; 0214 RNTupleModel& operator =(const RNTupleModel&) = delete; 0215 ~RNTupleModel() = default; 0216 0217 std::unique_ptr<RNTupleModel> Clone() const; 0218 static std::unique_ptr<RNTupleModel> Create(); 0219 static std::unique_ptr<RNTupleModel> Create(std::unique_ptr<RFieldZero> fieldZero); 0220 /// A bare model has no default entry 0221 static std::unique_ptr<RNTupleModel> CreateBare(); 0222 static std::unique_ptr<RNTupleModel> CreateBare(std::unique_ptr<RFieldZero> fieldZero); 0223 0224 /// Creates a new field given a `name` or `{name, description}` pair and a 0225 /// corresponding value that is managed by a shared pointer. 0226 /// 0227 /// **Example: create some fields and fill an %RNTuple** 0228 /// ~~~ {.cpp} 0229 /// #include <ROOT/RNTupleModel.hxx> 0230 /// #include <ROOT/RNTupleWriter.hxx> 0231 /// using ROOT::Experimental::RNTupleModel; 0232 /// using ROOT::Experimental::RNTupleWriter; 0233 /// 0234 /// #include <vector> 0235 /// 0236 /// auto model = RNTupleModel::Create(); 0237 /// auto pt = model->MakeField<float>("pt"); 0238 /// auto vec = model->MakeField<std::vector<int>>("vec"); 0239 /// 0240 /// // The RNTuple is written to disk when the RNTupleWriter goes out of scope 0241 /// { 0242 /// auto writer = RNTupleWriter::Recreate(std::move(model), "myNTuple", "myFile.root"); 0243 /// for (int i = 0; i < 100; i++) { 0244 /// *pt = static_cast<float>(i); 0245 /// *vec = {i, i+1, i+2}; 0246 /// writer->Fill(); 0247 /// } 0248 /// } 0249 /// ~~~ 0250 /// 0251 /// **Example: create a field with an initial value** 0252 /// ~~~ {.cpp} 0253 /// #include <ROOT/RNTupleModel.hxx> 0254 /// using ROOT::Experimental::RNTupleModel; 0255 /// 0256 /// auto model = RNTupleModel::Create(); 0257 /// // pt's initial value is 42.0 0258 /// auto pt = model->MakeField<float>("pt", 42.0); 0259 /// ~~~ 0260 /// **Example: create a field with a description** 0261 /// ~~~ {.cpp} 0262 /// #include <ROOT/RNTupleModel.hxx> 0263 /// using ROOT::Experimental::RNTupleModel; 0264 /// 0265 /// auto model = RNTupleModel::Create(); 0266 /// auto hadronFlavour = model->MakeField<float>({ 0267 /// "hadronFlavour", "flavour from hadron ghost clustering" 0268 /// }); 0269 /// ~~~ 0270 template <typename T, typename... ArgsT> 0271 std::shared_ptr<T> MakeField(const NameWithDescription_t &fieldNameDesc, ArgsT &&...args) 0272 { 0273 EnsureNotFrozen(); 0274 EnsureValidFieldName(fieldNameDesc.fName); 0275 auto field = std::make_unique<RField<T>>(fieldNameDesc.fName); 0276 field->SetDescription(fieldNameDesc.fDescription); 0277 std::shared_ptr<T> ptr; 0278 if (fDefaultEntry) 0279 ptr = fDefaultEntry->AddValue<T>(*field, std::forward<ArgsT>(args)...); 0280 fFieldNames.insert(field->GetFieldName()); 0281 fFieldZero->Attach(std::move(field)); 0282 return ptr; 0283 } 0284 0285 /// Adds a field whose type is not known at compile time. Thus there is no shared pointer returned. 0286 /// 0287 /// Throws an exception if the field is null. 0288 void AddField(std::unique_ptr<RFieldBase> field); 0289 0290 /// Adds a top-level field based on existing fields. The mapping function is called with the qualified field names 0291 /// of the provided field and the subfields. It should return the qualified field names used as a mapping source. 0292 /// Projected fields can only be used for models used to write data. 0293 RResult<void> 0294 AddProjectedField(std::unique_ptr<RFieldBase> field, std::function<std::string(const std::string &)> mapping); 0295 const RProjectedFields &GetProjectedFields() const { return *fProjectedFields; } 0296 0297 void Freeze(); 0298 void Unfreeze(); 0299 bool IsFrozen() const { return fIsFrozen; } 0300 std::uint64_t GetModelId() const { return fModelId; } 0301 0302 /// Ingests a model for a sub collection and attaches it to the current model 0303 /// 0304 /// Throws an exception if collectionModel is null. 0305 std::shared_ptr<RNTupleCollectionWriter> 0306 MakeCollection(std::string_view fieldName, std::unique_ptr<RNTupleModel> collectionModel); 0307 0308 std::unique_ptr<REntry> CreateEntry() const; 0309 /// In a bare entry, all values point to nullptr. The resulting entry shall use BindValue() in order 0310 /// set memory addresses to be serialized / deserialized 0311 std::unique_ptr<REntry> CreateBareEntry() const; 0312 /// Creates a token to be used in REntry methods to address a top-level field 0313 REntry::RFieldToken GetToken(std::string_view fieldName) const; 0314 /// Calls the given field's CreateBulk() method. Throws an exception if no field with the given name exists. 0315 RFieldBase::RBulk CreateBulk(std::string_view fieldName) const; 0316 0317 REntry &GetDefaultEntry(); 0318 const REntry &GetDefaultEntry() const; 0319 0320 /// Non-const access to the root field is used to commit clusters during writing 0321 /// and to set the on-disk field IDs when connecting a model to a page source or sink. 0322 RFieldZero &GetFieldZero(); 0323 const RFieldZero &GetFieldZero() const { return *fFieldZero; } 0324 const RFieldBase &GetField(std::string_view fieldName) const; 0325 0326 std::string GetDescription() const { return fDescription; } 0327 void SetDescription(std::string_view description); 0328 }; 0329 0330 } // namespace Experimental 0331 } // namespace ROOT 0332 0333 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |