Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-16 09:08:34

0001 /// \file ROOT/RNTupleModel.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2018-10-04
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RNTupleModel
0015 #define ROOT_RNTupleModel
0016 
0017 #include <ROOT/REntry.hxx>
0018 #include <ROOT/RError.hxx>
0019 #include <ROOT/RField.hxx>
0020 #include <ROOT/RFieldToken.hxx>
0021 #include <ROOT/RNTupleUtil.hxx>
0022 #include <string_view>
0023 
0024 #include <cstdint>
0025 #include <functional>
0026 #include <memory>
0027 #include <string>
0028 #include <unordered_map>
0029 #include <unordered_set>
0030 #include <utility>
0031 
0032 namespace ROOT {
0033 
0034 class RNTupleWriteOptions;
0035 class RNTupleModel;
0036 class RNTupleWriter;
0037 
0038 namespace Experimental {
0039 namespace Detail {
0040 class RRawPtrWriteEntry;
0041 } // namespace Detail
0042 } // namespace Experimental
0043 
0044 namespace Internal {
0045 class RProjectedFields;
0046 
0047 ROOT::RFieldZero &GetFieldZeroOfModel(RNTupleModel &model);
0048 RProjectedFields &GetProjectedFieldsOfModel(RNTupleModel &model);
0049 
0050 // clang-format off
0051 /**
0052 \class ROOT::Internal::RProjectedFields
0053 \ingroup NTuple
0054 \brief Container for the projected fields of an RNTupleModel
0055 
0056 Projected fields are fields whose columns are reused from existing fields. Projected fields are not attached
0057 to the model's zero field but form a separate hierarchy with their own zero field (which is stored in this class).
0058 Only the real source fields are written to: projected fields are stored as metadata
0059 (header) information only. Only top-level projected fields are supported because otherwise the layout of types
0060 could be altered in unexpected ways.
0061 This class owns the hierarchy of projected fields and keeps the mapping between them and their backing source fields.
0062 */
0063 // clang-format on
0064 class RProjectedFields {
0065 public:
0066    /// The map keys are the projected target fields, the map values are the backing source fields
0067    /// Note that sub fields are treated individually and indepently of their parent field
0068    using FieldMap_t = std::unordered_map<const ROOT::RFieldBase *, const ROOT::RFieldBase *>;
0069 
0070 private:
0071    explicit RProjectedFields(std::unique_ptr<ROOT::RFieldZero> fieldZero) : fFieldZero(std::move(fieldZero)) {}
0072    /// The projected fields are attached to this zero field
0073    std::unique_ptr<ROOT::RFieldZero> fFieldZero;
0074    /// Maps the source fields from fModel to the target projected fields attached to fFieldZero
0075    FieldMap_t fFieldMap;
0076    /// The model this set of projected fields belongs to
0077    const RNTupleModel *fModel;
0078 
0079    /// Asserts that the passed field is a valid target of the source field provided in the field map.
0080    /// Checks the field without looking into sub fields.
0081    RResult<void> EnsureValidMapping(const ROOT::RFieldBase *target, const FieldMap_t &fieldMap);
0082 
0083 public:
0084    explicit RProjectedFields(const RNTupleModel &model)
0085       : fFieldZero(std::make_unique<ROOT::RFieldZero>()), fModel(&model)
0086    {
0087    }
0088    RProjectedFields(const RProjectedFields &) = delete;
0089    RProjectedFields(RProjectedFields &&) = default;
0090    RProjectedFields &operator=(const RProjectedFields &) = delete;
0091    RProjectedFields &operator=(RProjectedFields &&) = default;
0092    ~RProjectedFields() = default;
0093 
0094    /// Clones this container and all the projected fields it owns. `newModel` must be a clone of the model
0095    /// that this RProjectedFields was constructed with.
0096    std::unique_ptr<RProjectedFields> Clone(const RNTupleModel &newModel) const;
0097 
0098    ROOT::RFieldZero &GetFieldZero() { return *fFieldZero; }
0099    const ROOT::RFieldBase *GetSourceField(const ROOT::RFieldBase *target) const;
0100    /// Adds a new projected field. The field map needs to provide valid source fields of fModel for 'field'
0101    /// and each of its sub fields.
0102    RResult<void> Add(std::unique_ptr<ROOT::RFieldBase> field, const FieldMap_t &fieldMap);
0103    bool IsEmpty() const { return fFieldZero->begin() == fFieldZero->end(); }
0104 };
0105 
0106 } // namespace Internal
0107 
0108 // clang-format off
0109 /**
0110 \class ROOT::RNTupleModel
0111 \ingroup NTuple
0112 \brief The RNTupleModel encapulates the schema of an RNTuple.
0113 
0114 The RNTupleModel comprises a collection of hierarchically organized fields. From a model, "entries"
0115 can be extracted or created. For convenience, the RNTupleModel provides a default entry unless it is created as a "bare model".
0116 Models have a unique model identifier that facilitates checking whether entries are compatible with it
0117 (i.e.: have been extracted from that model).
0118 
0119 A model is subject to state transitions during its lifetime: it starts in a *building* state, in which fields can be
0120 added and modified.  Once the schema is finalized, the model gets *frozen*.  Only frozen models can create entries.
0121 From frozen, models move into an *expired* state. In this state, the model is only partially usable: it can be cloned
0122 and queried, but it can't be unfrozen anymore and no new entries can be created.  This state is used for models
0123 that were used for writing and are no longer connected to a page sink.
0124 
0125 ```
0126 (Model gets created)
0127      |
0128      |       (passed to a Sink            (detached from
0129  ____v______  or explicitly    __________  Sink after     ___________
0130 |           | frozen)         |          | writing)      |           |
0131 | Building  |---------------->|  Frozen  |-------------->|  Expired  |
0132 |___________|<----------------|__________|               |___________|
0133              (explicitly
0134               unfrozen)
0135 ```
0136 
0137 */
0138 // clang-format on
0139 class RNTupleModel {
0140    friend ROOT::RFieldZero &Internal::GetFieldZeroOfModel(RNTupleModel &);
0141    friend Internal::RProjectedFields &Internal::GetProjectedFieldsOfModel(RNTupleModel &);
0142 
0143 public:
0144    /// User-provided function that describes the mapping of existing source fields to projected fields in terms
0145    /// of fully qualified field names. The mapping function is called with the qualified field names of the provided
0146    /// field and the subfields. It should return the qualified field names used as a mapping source.
0147    /// See AddProjectedFields() for more details.
0148    using FieldMappingFunc_t = std::function<std::string(const std::string &)>;
0149 
0150    class RUpdater;
0151 
0152 private:
0153    /// The states a model can be in. Possible transitions are between kBuilding and kFrozen
0154    /// and from kFrozen to kExpired.
0155    /// See RNTupleModel for the state transition graph.
0156    enum class EState {
0157       kBuilding,
0158       kFrozen,
0159       kExpired
0160    };
0161 
0162    /// Hierarchy of fields consisting of simple types and collections (sub trees)
0163    std::unique_ptr<ROOT::RFieldZero> fFieldZero;
0164    /// Contains field values corresponding to the created top-level fields, as well as registered subfields
0165    std::unique_ptr<ROOT::REntry> fDefaultEntry;
0166    /// Keeps track of which field names are taken, including projected field names.
0167    std::unordered_set<std::string> fFieldNames;
0168    /// Free text set by the user
0169    std::string fDescription;
0170    /// The set of projected top-level fields
0171    std::unique_ptr<Internal::RProjectedFields> fProjectedFields;
0172    /// Keeps track of which subfields have been registered to be included in entries belonging to this model.
0173    std::unordered_set<std::string> fRegisteredSubfields;
0174    /// Every model has a unique ID to distinguish it from other models. Entries are linked to models via the ID.
0175    /// Cloned models get a new model ID. Expired models are cloned into frozen models.
0176    std::uint64_t fModelId = 0;
0177    /// Models have a separate schema ID to remember that the clone of a frozen model still has the same schema.
0178    std::uint64_t fSchemaId = 0;
0179    /// Changed by Freeze() / Unfreeze() and by the RUpdater.
0180    EState fModelState = EState::kBuilding;
0181 
0182    /// Checks that user-provided field names are valid in the context of this RNTupleModel.
0183    /// Throws an RException for invalid names, empty names (which is reserved for the zero field) and duplicate field
0184    /// names.
0185    void EnsureValidFieldName(std::string_view fieldName);
0186 
0187    /// Throws an RException if fFrozen is true
0188    void EnsureNotFrozen() const;
0189 
0190    /// Throws an RException if fDefaultEntry is nullptr
0191    void EnsureNotBare() const;
0192 
0193    /// The field name can be a top-level field or a nested field. Returns nullptr if the field is not in the model.
0194    ROOT::RFieldBase *FindField(std::string_view fieldName) const;
0195 
0196    /// Add a subfield to the provided entry. If `initializeValue` is false, a nullptr will be bound to the entry value
0197    /// (used in bare models).
0198    void AddSubfield(std::string_view fieldName, ROOT::REntry &entry, bool initializeValue = true) const;
0199 
0200    RNTupleModel(std::unique_ptr<ROOT::RFieldZero> fieldZero);
0201 
0202 public:
0203    RNTupleModel(const RNTupleModel &) = delete;
0204    RNTupleModel &operator=(const RNTupleModel &) = delete;
0205    ~RNTupleModel() = default;
0206 
0207    std::unique_ptr<RNTupleModel> Clone() const;
0208    static std::unique_ptr<RNTupleModel> Create();
0209    static std::unique_ptr<RNTupleModel> Create(std::unique_ptr<ROOT::RFieldZero> fieldZero);
0210    /// Creates a "bare model", i.e. an RNTupleModel with no default entry
0211    static std::unique_ptr<RNTupleModel> CreateBare();
0212    /// Creates a "bare model", i.e. an RNTupleModel with no default entry, with the given field zero.
0213    static std::unique_ptr<RNTupleModel> CreateBare(std::unique_ptr<ROOT::RFieldZero> fieldZero);
0214 
0215    /// Creates a new field given a `name` or `{name, description}` pair and a
0216    /// corresponding, default-constructed value that is managed by a shared pointer.
0217    ///
0218    /// **Example: create some fields and fill an %RNTuple**
0219    /// ~~~ {.cpp}
0220    /// #include <ROOT/RNTupleModel.hxx>
0221    /// #include <ROOT/RNTupleWriter.hxx>
0222    /// using ROOT::RNTupleWriter;
0223    ///
0224    /// #include <vector>
0225    ///
0226    /// auto model = ROOT::RNTupleModel::Create();
0227    /// auto pt = model->MakeField<float>("pt");
0228    /// auto vec = model->MakeField<std::vector<int>>("vec");
0229    ///
0230    /// // The RNTuple is written to disk when the RNTupleWriter goes out of scope
0231    /// {
0232    ///    auto writer = RNTupleWriter::Recreate(std::move(model), "myNTuple", "myFile.root");
0233    ///    for (int i = 0; i < 100; i++) {
0234    ///       *pt = static_cast<float>(i);
0235    ///       *vec = {i, i+1, i+2};
0236    ///       writer->Fill();
0237    ///    }
0238    /// }
0239    /// ~~~
0240    ///
0241    /// **Example: create a field with a description**
0242    /// ~~~ {.cpp}
0243    /// #include <ROOT/RNTupleModel.hxx>
0244    ///
0245    /// auto model = ROOT::RNTupleModel::Create();
0246    /// auto hadronFlavour = model->MakeField<float>(
0247    ///    "hadronFlavour", "flavour from hadron ghost clustering"
0248    /// );
0249    /// ~~~
0250    template <typename T>
0251    std::shared_ptr<T> MakeField(std::string_view name, std::string_view description = "")
0252    {
0253       EnsureNotFrozen();
0254       EnsureValidFieldName(name);
0255       auto field = std::make_unique<ROOT::RField<T>>(name);
0256       field->SetDescription(description);
0257       std::shared_ptr<T> ptr;
0258       if (fDefaultEntry)
0259          ptr = fDefaultEntry->AddValue<T>(*field);
0260       fFieldNames.insert(field->GetFieldName());
0261       fFieldZero->Attach(std::move(field));
0262       return ptr;
0263    }
0264 
0265    /// Adds a field whose type is not known at compile time. No shared pointer is returned in this case:
0266    /// pointers should be retrieved or bound via REntry.
0267    ///
0268    /// Throws an RException if the field is null.
0269    void AddField(std::unique_ptr<ROOT::RFieldBase> field);
0270 
0271    /// Register a subfield so it can be accessed directly from entries belonging to the model. Because registering a
0272    /// subfield does not fundamentally change the model, previously created entries will not be invalidated, nor
0273    /// modified in any way; a registered subfield is merely an accessor added to the default entry (if present) and any
0274    /// entries created afterwards. Note that previously created entries won't have this subfield added to them.
0275    ///
0276    /// Using models with registered subfields for writing is not allowed. Attempting to do so will result in an
0277    /// exception.
0278    ///
0279    /// Throws an RException if the provided subfield could not be found in the model.
0280    void RegisterSubfield(std::string_view qualifiedFieldName);
0281 
0282    /// Adds a top-level field based on existing fields.
0283    ///
0284    /// The mapping function takes one argument, which is a string containing the name of the projected field. The return
0285    /// value of the mapping function should be the name of the (existing) field onto which the projection is made.
0286    /// **Example**
0287    /// ~~~ {.cpp}
0288    /// auto model = RNTupleModel::Create();
0289    /// model->MakeField<float>("met");
0290    /// auto metProjection = ROOT::RFieldBase::Create("missingE", "float").Unwrap();
0291    /// model->AddProjectedField(std::move(metProjection), [](const std::string &) { return "met"; });
0292    /// ~~~
0293    ///
0294    /// Adding projections for collection fields is also possible, as long as they follow the same schema structure. For
0295    /// example, a projection of a collection of structs onto a collection of scalars is possible, but a projection of a
0296    /// collection of a collection of scalars onto a collection of scalars is not.
0297    ///
0298    /// In the case of projections for nested fields, the mapping function must provide a mapping for every nesting
0299    /// level.
0300    /// **Example**
0301    /// ~~~ {.cpp}
0302    /// struct P { int x, y; };
0303    ///
0304    /// auto model = RNTupleModel::Create();
0305    /// model->MakeField<std::vector<P>>("points");
0306    /// auto pxProjection = ROOT::RFieldBase::Create("pxs", "std::vector<int>").Unwrap();
0307    /// model->AddProjectedField(std::move(pxProjection), [](const std::string &fieldName) {
0308    ///   if (fieldName == "pxs")
0309    ///     return "points";
0310    ///   else
0311    ///     return "points._0.x";
0312    /// });
0313    /// ~~~
0314    ///
0315    /// Creating projections for fields containing `std::variant` or fixed-size arrays is unsupported.
0316    RResult<void> AddProjectedField(std::unique_ptr<ROOT::RFieldBase> field, FieldMappingFunc_t mapping);
0317 
0318    /// Transitions an RNTupleModel from the *building* state to the *frozen* state, disabling adding additional fields
0319    /// and enabling creating entries from it. Freezing an already-frozen model is a no-op. Throws an RException if the
0320    /// model is in the *expired* state. See RNTupleModel for more detailed explanation on the state transitions.
0321    void Freeze();
0322    /// Transitions an RNTupleModel from the *frozen* state back to the *building* state, invalidating all previously
0323    /// created entries, re-enabling adding additional fields and disabling creating entries from it. Unfreezing a model
0324    /// that is already in the *building* state is a no-op. Throws an RException if the model is in the *expired* state.
0325    /// See RNTupleModel for a more detailed explanation on the state transitions.
0326    void Unfreeze();
0327    /// Transitions an RNTupleModel from the *frozen* state to the *expired* state, invalidating all previously created
0328    /// entries, disabling creating new entries from it and disabling further state transitions. Expiring a model that is
0329    /// already expired is a no-op. Throws an RException if the model is in the *building* state. See RNTupleModel for a
0330    /// more detailed explanation on the state transitions.
0331    void Expire();
0332    /// \see Expire()
0333    bool IsExpired() const { return fModelState == EState::kExpired; }
0334    /// \see Freeze()
0335    bool IsFrozen() const { return (fModelState == EState::kFrozen) || (fModelState == EState::kExpired); }
0336    /// \see CreateBare()
0337    bool IsBare() const { return !fDefaultEntry; }
0338    std::uint64_t GetModelId() const { return fModelId; }
0339    std::uint64_t GetSchemaId() const { return fSchemaId; }
0340 
0341    /// Creates a new entry with default values for each field.
0342    std::unique_ptr<REntry> CreateEntry() const;
0343    /// Creates a "bare entry", i.e. a entry with all null values. The user needs to explicitly call BindValue() or
0344    /// BindRawPtr() to set memory addresses before serializing / deserializing the entry.
0345    std::unique_ptr<REntry> CreateBareEntry() const;
0346    std::unique_ptr<Experimental::Detail::RRawPtrWriteEntry> CreateRawPtrWriteEntry() const;
0347    /// Creates a token to be used in REntry methods to address a field present in the entry
0348    ROOT::RFieldToken GetToken(std::string_view fieldName) const;
0349    /// Calls the given field's CreateBulk() method. Throws an RException if no field with the given name exists.
0350    ROOT::RFieldBase::RBulkValues CreateBulk(std::string_view fieldName) const;
0351 
0352    /// Retrieves the default entry of this model.
0353    /// Throws an RException if this is a bare model (i.e. if it was created with CreateBare()).
0354    REntry &GetDefaultEntry();
0355    /// \see GetDefaultEntry()
0356    const REntry &GetDefaultEntry() const;
0357 
0358    /// Retrieves the field zero of this model, i.e. the root of the field hierarchy.
0359    /// This may be used to make adjustments on the field hierarchy before the model is frozen.
0360    ROOT::RFieldZero &GetMutableFieldZero();
0361    /// Retrieves the field zero of this model, i.e. the root of the field hierarchy.
0362    const ROOT::RFieldZero &GetConstFieldZero() const { return *fFieldZero; }
0363    /// Retrieves the field with fully-qualified name `fieldName`.
0364    /// Dot-separated names are used to walk down the field hierarchy: e.g. `"parent.child"` should
0365    /// be used to retrieve a field with name `"child"` whose parent is the top-level field with name `"parent"`.
0366    /// Throws an RException if no field is found with the given name.
0367    ROOT::RFieldBase &GetMutableField(std::string_view fieldName);
0368    /// \see GetMutableField()
0369    const ROOT::RFieldBase &GetConstField(std::string_view fieldName) const;
0370 
0371    const std::string &GetDescription() const { return fDescription; }
0372    void SetDescription(std::string_view description);
0373 
0374    /// Get the names of the fields currently present in the model, including projected fields. Registered subfields
0375    /// are not included, use GetRegisteredSubfieldnames() for this.
0376    const std::unordered_set<std::string> &GetFieldNames() const { return fFieldNames; }
0377    /// Get the (qualified) names of subfields that have been registered (via RegisterSubfield()) to be included in
0378    /// entries from this model.
0379    const std::unordered_set<std::string> &GetRegisteredSubfieldNames() const { return fRegisteredSubfields; }
0380 
0381    /// Estimate the memory usage for this model during writing
0382    ///
0383    /// This will return an estimate in bytes for the internal page and compression buffers. The value should be
0384    /// understood per sequential RNTupleWriter or per RNTupleFillContext created for an RNTupleParallelWriter
0385    /// constructed with this model.
0386    std::size_t EstimateWriteMemoryUsage(const ROOT::RNTupleWriteOptions &options = ROOT::RNTupleWriteOptions()) const;
0387 };
0388 
0389 namespace Internal {
0390 
0391 // clang-format off
0392 /**
0393 \class ROOT::Internal::RNTupleModelChangeset
0394 \ingroup NTuple
0395 \brief The incremental changes to a `RNTupleModel`
0396 
0397 Represents a set of alterations to a `RNTupleModel` that happened after the model is used to initialize a `RPageSink`
0398 instance. This object can be used to communicate metadata updates to a `RPageSink`.
0399 You will not normally use this directly; see `RNTupleModel::RUpdater` instead.
0400 */
0401 // clang-format on
0402 struct RNTupleModelChangeset {
0403    RNTupleModel &fModel;
0404    /// Points to the fields in fModel that were added as part of an updater transaction
0405    std::vector<ROOT::RFieldBase *> fAddedFields;
0406    /// Points to the projected fields in fModel that were added as part of an updater transaction
0407    std::vector<ROOT::RFieldBase *> fAddedProjectedFields;
0408 
0409    RNTupleModelChangeset(RNTupleModel &model) : fModel(model) {}
0410    bool IsEmpty() const { return fAddedFields.empty() && fAddedProjectedFields.empty(); }
0411 
0412    void AddField(std::unique_ptr<ROOT::RFieldBase> field);
0413 
0414    /// \see RNTupleModel::AddProjectedField()
0415    ROOT::RResult<void>
0416    AddProjectedField(std::unique_ptr<ROOT::RFieldBase> field, RNTupleModel::FieldMappingFunc_t mapping);
0417 };
0418 
0419 } // namespace Internal
0420 
0421 /// A model is usually immutable after passing it to an `RNTupleWriter`. However, for the rare
0422 /// cases that require changing the model after the fact, `RUpdater` provides limited support for
0423 /// incremental updates, e.g. addition of new fields.
0424 ///
0425 /// See `RNTupleWriter::CreateModelUpdater()` for an example.
0426 class RNTupleModel::RUpdater {
0427 private:
0428    ROOT::RNTupleWriter &fWriter;
0429    Internal::RNTupleModelChangeset fOpenChangeset;
0430    std::uint64_t fNewModelId = 0; ///< The model ID after committing
0431 
0432 public:
0433    explicit RUpdater(ROOT::RNTupleWriter &writer);
0434    ~RUpdater() { CommitUpdate(); }
0435    /// Begin a new set of alterations to the underlying model. As a side effect, all REntry
0436    /// instances related to the model are invalidated.
0437    void BeginUpdate();
0438    /// Commit changes since the last call to `BeginUpdate()`. All the invalidated REntries remain
0439    /// invalid. `CreateEntry()` or `CreateBareEntry()` can be used to create an REntry that
0440    /// matches the new model. Upon completion, `BeginUpdate()` can be called again to begin a new set of changes.
0441    void CommitUpdate();
0442 
0443    template <typename T>
0444    std::shared_ptr<T> MakeField(std::string_view name, std::string_view description = "")
0445    {
0446       auto objPtr = fOpenChangeset.fModel.MakeField<T>(name, description);
0447       auto fieldZero = fOpenChangeset.fModel.fFieldZero.get();
0448       auto it =
0449          std::find_if(fieldZero->begin(), fieldZero->end(), [&](const auto &f) { return f.GetFieldName() == name; });
0450       R__ASSERT(it != fieldZero->end());
0451       fOpenChangeset.fAddedFields.emplace_back(&(*it));
0452       return objPtr;
0453    }
0454 
0455    void AddField(std::unique_ptr<ROOT::RFieldBase> field);
0456 
0457    /// \see RNTupleModel::AddProjectedField()
0458    RResult<void> AddProjectedField(std::unique_ptr<ROOT::RFieldBase> field, FieldMappingFunc_t mapping);
0459 };
0460 
0461 } // namespace ROOT
0462 
0463 #endif