Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-16 09:08:32

0001 /// \file ROOT/RFieldBase.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2018-10-09
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RFieldBase
0015 #define ROOT_RFieldBase
0016 
0017 #include <ROOT/RColumn.hxx>
0018 #include <ROOT/RCreateFieldOptions.hxx>
0019 #include <ROOT/RNTupleRange.hxx>
0020 #include <ROOT/RNTupleUtil.hxx>
0021 
0022 #include <cstddef>
0023 #include <functional>
0024 #include <iterator>
0025 #include <memory>
0026 #include <new>
0027 #include <string>
0028 #include <string_view>
0029 #include <vector>
0030 
0031 namespace ROOT {
0032 
0033 class RFieldBase;
0034 class RClassField;
0035 
0036 namespace Detail {
0037 class RFieldVisitor;
0038 } // namespace Detail
0039 
0040 namespace Experimental {
0041 
0042 class RNTupleJoinProcessor;
0043 
0044 namespace Detail {
0045 class RRawPtrWriteEntry;
0046 } // namespace Detail
0047 
0048 } // namespace Experimental
0049 
0050 namespace Internal {
0051 
0052 class RPageSink;
0053 class RPageSource;
0054 struct RFieldCallbackInjector;
0055 struct RFieldRepresentationModifier;
0056 
0057 // TODO(jblomer): find a better way to not have these methods in the RFieldBase public API
0058 void CallFlushColumnsOnField(RFieldBase &);
0059 void CallCommitClusterOnField(RFieldBase &);
0060 void CallConnectPageSinkOnField(RFieldBase &, ROOT::Internal::RPageSink &, ROOT::NTupleSize_t firstEntry = 0);
0061 void CallConnectPageSourceOnField(RFieldBase &, ROOT::Internal::RPageSource &);
0062 ROOT::RResult<std::unique_ptr<ROOT::RFieldBase>>
0063 CallFieldBaseCreate(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options,
0064                     const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId);
0065 
0066 } // namespace Internal
0067 
0068 // clang-format off
0069 /**
0070 \class ROOT::RFieldBase
0071 \ingroup NTuple
0072 \brief A field translates read and write calls from/to underlying columns to/from tree values
0073 
0074 A field is a serializable C++ type or a container for a collection of subfields. The RFieldBase and its
0075 type-safe descendants provide the object to column mapper. They map C++ objects to primitive columns.  The
0076 mapping is trivial for simple types such as 'double'. Complex types resolve to multiple primitive columns.
0077 The field knows based on its type and the field name the type(s) and name(s) of the columns.
0078 
0079 Note: the class hierarchy starting at RFieldBase is not meant to be extended by user-provided child classes.
0080 This is and can only be partially enforced through C++.
0081 */
0082 // clang-format on
0083 class RFieldBase {
0084    friend class ROOT::RClassField;                             // to mark members as artificial
0085    friend class ROOT::Experimental::RNTupleJoinProcessor;      // needs ConstructValue
0086    friend class ROOT::Experimental::Detail::RRawPtrWriteEntry; // to call Append()
0087    friend struct ROOT::Internal::RFieldCallbackInjector;       // used for unit tests
0088    friend struct ROOT::Internal::RFieldRepresentationModifier; // used for unit tests
0089    friend void Internal::CallFlushColumnsOnField(RFieldBase &);
0090    friend void Internal::CallCommitClusterOnField(RFieldBase &);
0091    friend void Internal::CallConnectPageSinkOnField(RFieldBase &, ROOT::Internal::RPageSink &, ROOT::NTupleSize_t);
0092    friend void Internal::CallConnectPageSourceOnField(RFieldBase &, ROOT::Internal::RPageSource &);
0093    friend ROOT::RResult<std::unique_ptr<ROOT::RFieldBase>>
0094    Internal::CallFieldBaseCreate(const std::string &fieldName, const std::string &typeName,
0095                                  const ROOT::RCreateFieldOptions &options, const ROOT::RNTupleDescriptor *desc,
0096                                  ROOT::DescriptorId_t fieldId);
0097 
0098    using ReadCallback_t = std::function<void(void *)>;
0099 
0100 protected:
0101    /// A functor to release the memory acquired by CreateValue() (memory and constructor).
0102    /// This implementation works for types with a trivial destructor. More complex fields implement a derived deleter.
0103    /// The deleter is operational without the field object and thus can be used to destruct/release a value after
0104    /// the field has been destructed.
0105    class RDeleter {
0106    public:
0107       virtual ~RDeleter() = default;
0108       virtual void operator()(void *objPtr, bool dtorOnly)
0109       {
0110          if (!dtorOnly)
0111             operator delete(objPtr);
0112       }
0113    };
0114 
0115    /// A deleter for templated RFieldBase descendents where the value type is known.
0116    template <typename T>
0117    class RTypedDeleter : public RDeleter {
0118    public:
0119       void operator()(void *objPtr, bool dtorOnly) final
0120       {
0121          std::destroy_at(static_cast<T *>(objPtr));
0122          RDeleter::operator()(objPtr, dtorOnly);
0123       }
0124    };
0125 
0126    // We cannot directly use RFieldBase::RDeleter as a shared pointer deleter due to splicing. We use this
0127    // wrapper class to store a polymorphic pointer to the actual deleter.
0128    struct RSharedPtrDeleter {
0129       std::unique_ptr<RFieldBase::RDeleter> fDeleter;
0130       void operator()(void *objPtr) { fDeleter->operator()(objPtr, false /* dtorOnly*/); }
0131       explicit RSharedPtrDeleter(std::unique_ptr<RFieldBase::RDeleter> deleter) : fDeleter(std::move(deleter)) {}
0132    };
0133 
0134 public:
0135    static constexpr std::uint32_t kInvalidTypeVersion = -1U;
0136    enum {
0137       /// No constructor needs to be called, i.e. any bit pattern in the allocated memory represents a valid type
0138       /// A trivially constructible field has a no-op ConstructValue() implementation
0139       kTraitTriviallyConstructible = 0x01,
0140       /// The type is cleaned up just by freeing its memory. I.e. the destructor performs a no-op.
0141       kTraitTriviallyDestructible = 0x02,
0142       /// A field of a fundamental type that can be directly mapped via RField<T>::Map(), i.e. maps as-is to a single
0143       /// column
0144       kTraitMappable = 0x04,
0145       /// The TClass checksum is set and valid
0146       kTraitTypeChecksum = 0x08,
0147       /// This field is an instance of RInvalidField and can be safely `static_cast` to it
0148       kTraitInvalidField = 0x10,
0149       /// This field is a user defined type that was missing dictionaries and was reconstructed from the on-disk
0150       /// information
0151       kTraitEmulatedField = 0x20,
0152 
0153       /// Shorthand for types that are both trivially constructible and destructible
0154       kTraitTrivialType = kTraitTriviallyConstructible | kTraitTriviallyDestructible
0155    };
0156 
0157    using ColumnRepresentation_t = std::vector<ROOT::ENTupleColumnType>;
0158 
0159    /// During its lifetime, a field undergoes the following possible state transitions:
0160    ///
0161    ///  [*] --> Unconnected --> ConnectedToSink ----
0162    ///               |      |                      |
0163    ///               |      --> ConnectedToSource ---> [*]
0164    ///               |                             |
0165    ///               -------------------------------
0166    enum class EState {
0167       kUnconnected,
0168       kConnectedToSink,
0169       kConnectedToSource
0170    };
0171 
0172    // clang-format off
0173    /**
0174    \class ROOT::RFieldBase::RColumnRepresentations
0175    \ingroup NTuple
0176    \brief The list of column representations a field can have.
0177 
0178    Some fields have multiple possible column representations, e.g. with or without split encoding.
0179    All column representations supported for writing also need to be supported for reading. In addition,
0180    fields can support extra column representations for reading only, e.g. a 64bit integer reading from a
0181    32bit column.
0182    The defined column representations must be supported by corresponding column packing/unpacking implementations,
0183    i.e. for the example above, the unpacking of 32bit ints to 64bit pages must be implemented in RColumnElement.hxx
0184    */
0185    // clang-format on
0186    class RColumnRepresentations {
0187    public:
0188       /// A list of column representations
0189       using Selection_t = std::vector<ColumnRepresentation_t>;
0190 
0191       RColumnRepresentations();
0192       RColumnRepresentations(const Selection_t &serializationTypes, const Selection_t &deserializationExtraTypes);
0193 
0194       /// The first column list from `fSerializationTypes` is the default for writing.
0195       const ColumnRepresentation_t &GetSerializationDefault() const { return fSerializationTypes[0]; }
0196       const Selection_t &GetSerializationTypes() const { return fSerializationTypes; }
0197       const Selection_t &GetDeserializationTypes() const { return fDeserializationTypes; }
0198 
0199    private:
0200       Selection_t fSerializationTypes;
0201       /// The union of the serialization types and the deserialization extra types passed during construction.
0202       /// Duplicates the serialization types list but the benefit is that GetDeserializationTypes() does not need to
0203       /// compile the list.
0204       Selection_t fDeserializationTypes;
0205    }; // class RColumnRepresentations
0206 
0207    class RValue;
0208    class RBulkValues;
0209 
0210 private:
0211    /// The field name relative to its parent field
0212    std::string fName;
0213    /// The C++ type captured by this field
0214    std::string fType;
0215    /// The role of this field in the data model structure
0216    ROOT::ENTupleStructure fStructure;
0217    /// For fixed sized arrays, the array length
0218    std::size_t fNRepetitions;
0219    /// A field qualifies as simple if it is mappable (which implies it has a single principal column),
0220    /// and it is not an artificial field and has no post-read callback
0221    bool fIsSimple;
0222    /// A field that is not backed on disk but computed, e.g. a default-constructed missing field or
0223    /// a field whose data is created by I/O customization rules. Subfields of artificial fields are
0224    /// artificial, too.
0225    bool fIsArtificial = false;
0226    /// When the columns are connected to a page source or page sink, the field represents a field id in the
0227    /// corresponding RNTuple descriptor. This on-disk ID is set in RPageSink::Create() for writing and by
0228    /// RFieldDescriptor::CreateField() when recreating a field / model from the stored descriptor.
0229    ROOT::DescriptorId_t fOnDiskId = ROOT::kInvalidDescriptorId;
0230    /// Free text set by the user
0231    std::string fDescription;
0232    /// Changed by ConnectTo[Sink,Source], reset by Clone()
0233    EState fState = EState::kUnconnected;
0234 
0235    void InvokeReadCallbacks(void *target)
0236    {
0237       for (const auto &func : fReadCallbacks)
0238          func(target);
0239    }
0240 
0241    /// Translate an entry index to a column element index of the principal column and vice versa. These functions
0242    /// take into account the role and number of repetitions on each level of the field hierarchy as follows:
0243    /// - Top level fields: element index == entry index
0244    /// - Record fields propagate their principal column index to the principal columns of direct descendant fields
0245    /// - Collection and variant fields set the principal column index of their children to 0
0246    ///
0247    /// The column element index also depends on the number of repetitions of each field in the hierarchy, e.g., given a
0248    /// field with type `std::array<std::array<float, 4>, 2>`, this function returns 8 for the innermost field.
0249    ROOT::NTupleSize_t EntryToColumnElementIndex(ROOT::NTupleSize_t globalIndex) const;
0250 
0251    /// Flushes data from active columns
0252    void FlushColumns();
0253    /// Flushes data from active columns to disk and calls CommitClusterImpl()
0254    void CommitCluster();
0255    /// Fields and their columns live in the void until connected to a physical page storage.  Only once connected, data
0256    /// can be read or written.  In order to find the field in the page storage, the field's on-disk ID has to be set.
0257    /// \param firstEntry The global index of the first entry with on-disk data for the connected field
0258    void ConnectPageSink(ROOT::Internal::RPageSink &pageSink, ROOT::NTupleSize_t firstEntry = 0);
0259    /// Connects the field and its subfield tree to the given page source. Once connected, data can be read.
0260    /// Only unconnected fields may be connected, i.e. the method is not idempotent. The field ID has to be set prior to
0261    /// calling this function. For subfields, a field ID may or may not be set. If the field ID is unset, it will be
0262    /// determined using the page source descriptor, based on the parent field ID and the subfield name.
0263    void ConnectPageSource(ROOT::Internal::RPageSource &pageSource);
0264 
0265    void SetArtificial()
0266    {
0267       fIsSimple = false;
0268       fIsArtificial = true;
0269       for (auto &field : fSubfields) {
0270          field->SetArtificial();
0271       }
0272    }
0273 
0274 protected:
0275    struct RBulkSpec;
0276 
0277    /// Collections and classes own subfields
0278    std::vector<std::unique_ptr<RFieldBase>> fSubfields;
0279    /// Subfields point to their mother field
0280    RFieldBase *fParent;
0281    /// All fields that have columns have a distinct main column. E.g., for simple fields (`float`, `int`, ...), the
0282    /// principal column corresponds to the field type. For collection fields except fixed-sized arrays,
0283    /// the main column is the offset field.  Class fields have no column of their own.
0284    /// When reading, points to any column of the column team of the active representation. Usually, this is just
0285    /// the first column.
0286    /// When writing, points to the first column index of the currently active (not suppressed) column representation.
0287    ROOT::Internal::RColumn *fPrincipalColumn = nullptr;
0288    /// Some fields have a second column in its column representation. In this case, `fAuxiliaryColumn` points into
0289    /// `fAvailableColumns` to the column that immediately follows the column `fPrincipalColumn` points to.
0290    ROOT::Internal::RColumn *fAuxiliaryColumn = nullptr;
0291    /// The columns are connected either to a sink or to a source (not to both); they are owned by the field.
0292    /// Contains all columns of all representations in order of representation and column index.
0293    std::vector<std::unique_ptr<ROOT::Internal::RColumn>> fAvailableColumns;
0294    /// Properties of the type that allow for optimizations of collections of that type
0295    std::uint32_t fTraits = 0;
0296    /// A typedef or using name that was used when creating the field
0297    std::string fTypeAlias;
0298    /// List of functions to be called after reading a value
0299    std::vector<ReadCallback_t> fReadCallbacks;
0300    /// C++ type version cached from the descriptor after a call to ConnectPageSource()
0301    std::uint32_t fOnDiskTypeVersion = kInvalidTypeVersion;
0302    /// TClass checksum cached from the descriptor after a call to ConnectPageSource(). Only set
0303    /// for classes with dictionaries.
0304    std::uint32_t fOnDiskTypeChecksum = 0;
0305    /// Pointers into the static vector returned by RColumnRepresentations::GetSerializationTypes() when
0306    /// SetColumnRepresentatives() is called. Otherwise (if empty) GetColumnRepresentatives() returns a vector
0307    /// with a single element, the default representation.  Always empty for artificial fields.
0308    std::vector<std::reference_wrapper<const ColumnRepresentation_t>> fColumnRepresentatives;
0309 
0310    /// Factory method for the field's type. The caller owns the returned pointer
0311    void *CreateObjectRawPtr() const;
0312 
0313    /// Helpers for generating columns. We use the fact that most fields have the same C++/memory types
0314    /// for all their column representations.
0315    /// Where possible, we call the helpers not from the header to reduce compilation time.
0316    template <std::uint32_t ColumnIndexT, typename HeadT, typename... TailTs>
0317    void GenerateColumnsImpl(const ColumnRepresentation_t &representation, std::uint16_t representationIndex)
0318    {
0319       assert(ColumnIndexT < representation.size());
0320       auto &column = fAvailableColumns.emplace_back(
0321          ROOT::Internal::RColumn::Create<HeadT>(representation[ColumnIndexT], ColumnIndexT, representationIndex));
0322 
0323       // Initially, the first two columns become the active column representation
0324       if (representationIndex == 0 && !fPrincipalColumn) {
0325          fPrincipalColumn = column.get();
0326       } else if (representationIndex == 0 && !fAuxiliaryColumn) {
0327          fAuxiliaryColumn = column.get();
0328       } else {
0329          // We currently have no fields with more than 2 columns in its column representation
0330          R__ASSERT(representationIndex > 0);
0331       }
0332 
0333       if constexpr (sizeof...(TailTs))
0334          GenerateColumnsImpl<ColumnIndexT + 1, TailTs...>(representation, representationIndex);
0335    }
0336 
0337    /// For writing, use the currently set column representative
0338    template <typename... ColumnCppTs>
0339    void GenerateColumnsImpl()
0340    {
0341       if (fColumnRepresentatives.empty()) {
0342          fAvailableColumns.reserve(sizeof...(ColumnCppTs));
0343          GenerateColumnsImpl<0, ColumnCppTs...>(GetColumnRepresentations().GetSerializationDefault(), 0);
0344       } else {
0345          const auto N = fColumnRepresentatives.size();
0346          fAvailableColumns.reserve(N * sizeof...(ColumnCppTs));
0347          for (unsigned i = 0; i < N; ++i) {
0348             GenerateColumnsImpl<0, ColumnCppTs...>(fColumnRepresentatives[i].get(), i);
0349          }
0350       }
0351    }
0352 
0353    /// For reading, use the on-disk column list
0354    template <typename... ColumnCppTs>
0355    void GenerateColumnsImpl(const ROOT::RNTupleDescriptor &desc)
0356    {
0357       std::uint16_t representationIndex = 0;
0358       do {
0359          const auto &onDiskTypes = EnsureCompatibleColumnTypes(desc, representationIndex);
0360          if (onDiskTypes.empty())
0361             break;
0362          GenerateColumnsImpl<0, ColumnCppTs...>(onDiskTypes, representationIndex);
0363          fColumnRepresentatives.emplace_back(onDiskTypes);
0364          if (representationIndex > 0) {
0365             for (std::size_t i = 0; i < sizeof...(ColumnCppTs); ++i) {
0366                fAvailableColumns[i]->MergeTeams(
0367                   *fAvailableColumns[representationIndex * sizeof...(ColumnCppTs) + i].get());
0368             }
0369          }
0370          representationIndex++;
0371       } while (true);
0372    }
0373 
0374    /// Implementations in derived classes should return a static RColumnRepresentations object. The default
0375    /// implementation does not attach any columns to the field.
0376    virtual const RColumnRepresentations &GetColumnRepresentations() const;
0377    /// Implementations in derived classes should create the backing columns corresponding to the field type for
0378    /// writing. The default implementation does not attach any columns to the field.
0379    virtual void GenerateColumns() {}
0380    /// Implementations in derived classes should create the backing columns corresponding to the field type for reading.
0381    /// The default implementation does not attach any columns to the field. The method should check, using the page
0382    /// source and `fOnDiskId`, if the column types match and throw if they don't.
0383    virtual void GenerateColumns(const ROOT::RNTupleDescriptor & /*desc*/) {}
0384    /// Returns the on-disk column types found in the provided descriptor for `fOnDiskId` and the given
0385    /// representation index. If there are no columns for the given representation index, return an empty
0386    /// ColumnRepresentation_t list. Otherwise, the returned reference points into the static array returned by
0387    /// GetColumnRepresentations().
0388    /// Throws an exception if the types on disk don't match any of the deserialization types from
0389    /// GetColumnRepresentations().
0390    const ColumnRepresentation_t &
0391    EnsureCompatibleColumnTypes(const ROOT::RNTupleDescriptor &desc, std::uint16_t representationIndex) const;
0392    /// When connecting a field to a page sink, the field's default column representation is subject
0393    /// to adjustment according to the write options. E.g., if compression is turned off, encoded columns
0394    /// are changed to their unencoded counterparts.
0395    void AutoAdjustColumnTypes(const ROOT::RNTupleWriteOptions &options);
0396 
0397    /// Called by Clone(), which additionally copies the on-disk ID
0398    virtual std::unique_ptr<RFieldBase> CloneImpl(std::string_view newName) const = 0;
0399 
0400    /// Constructs value in a given location of size at least GetValueSize(). Called by the base class' CreateValue().
0401    virtual void ConstructValue(void *where) const = 0;
0402    virtual std::unique_ptr<RDeleter> GetDeleter() const { return std::make_unique<RDeleter>(); }
0403    /// Allow derived classes to call ConstructValue(void *) and GetDeleter() on other (sub)fields.
0404    static void CallConstructValueOn(const RFieldBase &other, void *where) { other.ConstructValue(where); }
0405    static std::unique_ptr<RDeleter> GetDeleterOf(const RFieldBase &other) { return other.GetDeleter(); }
0406 
0407    /// Operations on values of complex types, e.g. ones that involve multiple columns or for which no direct
0408    /// column type exists.
0409    virtual std::size_t AppendImpl(const void *from);
0410    virtual void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to);
0411    virtual void ReadInClusterImpl(RNTupleLocalIndex localIndex, void *to);
0412 
0413    /// Write the given value into columns. The value object has to be of the same type as the field.
0414    /// Returns the number of uncompressed bytes written.
0415    std::size_t Append(const void *from);
0416 
0417    /// Populate a single value with data from the field. The memory location pointed to by to needs to be of the
0418    /// fitting type. The fast path is conditioned by the field qualifying as simple, i.e. maps as-is
0419    /// to a single column and has no read callback.
0420    void Read(ROOT::NTupleSize_t globalIndex, void *to)
0421    {
0422       if (fIsSimple)
0423          return (void)fPrincipalColumn->Read(globalIndex, to);
0424 
0425       if (!fIsArtificial) {
0426          if (fTraits & kTraitMappable)
0427             fPrincipalColumn->Read(globalIndex, to);
0428          else
0429             ReadGlobalImpl(globalIndex, to);
0430       }
0431       if (R__unlikely(!fReadCallbacks.empty()))
0432          InvokeReadCallbacks(to);
0433    }
0434 
0435    /// Populate a single value with data from the field. The memory location pointed to by to needs to be of the
0436    /// fitting type. The fast path is conditioned by the field qualifying as simple, i.e. maps as-is
0437    /// to a single column and has no read callback.
0438    void Read(RNTupleLocalIndex localIndex, void *to)
0439    {
0440       if (fIsSimple)
0441          return (void)fPrincipalColumn->Read(localIndex, to);
0442 
0443       if (!fIsArtificial) {
0444          if (fTraits & kTraitMappable)
0445             fPrincipalColumn->Read(localIndex, to);
0446          else
0447             ReadInClusterImpl(localIndex, to);
0448       }
0449       if (R__unlikely(!fReadCallbacks.empty()))
0450          InvokeReadCallbacks(to);
0451    }
0452 
0453    /// General implementation of bulk read. Loop over the required range and read values that are required
0454    /// and not already present. Derived classes may implement more optimized versions of this method.
0455    /// See ReadBulk() for the return value.
0456    virtual std::size_t ReadBulkImpl(const RBulkSpec &bulkSpec);
0457 
0458    /// Returns the number of newly available values, that is the number of bools in `bulkSpec.fMaskAvail` that
0459    /// flipped from false to true. As a special return value, `kAllSet` can be used if all values are read
0460    /// independent from the masks.
0461    std::size_t ReadBulk(const RBulkSpec &bulkSpec);
0462 
0463    /// Allow derived classes to call Append() and Read() on other (sub)fields.
0464    static std::size_t CallAppendOn(RFieldBase &other, const void *from) { return other.Append(from); }
0465    static void CallReadOn(RFieldBase &other, RNTupleLocalIndex localIndex, void *to) { other.Read(localIndex, to); }
0466    static void CallReadOn(RFieldBase &other, ROOT::NTupleSize_t globalIndex, void *to) { other.Read(globalIndex, to); }
0467    static void *CallCreateObjectRawPtrOn(RFieldBase &other) { return other.CreateObjectRawPtr(); }
0468 
0469    /// Fields may need direct access to the principal column of their subfields, e.g. in RRVecField::ReadBulk()
0470    static ROOT::Internal::RColumn *GetPrincipalColumnOf(const RFieldBase &other) { return other.fPrincipalColumn; }
0471 
0472    /// Set a user-defined function to be called after reading a value, giving a chance to inspect and/or modify the
0473    /// value object.
0474    /// Returns an index that can be used to remove the callback.
0475    size_t AddReadCallback(ReadCallback_t func);
0476    void RemoveReadCallback(size_t idx);
0477 
0478    // Perform housekeeping tasks for global to cluster-local index translation
0479    virtual void CommitClusterImpl() {}
0480    // The field can indicate that it needs to register extra type information in the on-disk schema.
0481    // In this case, a callback from the page sink to the field will be registered on connect, so that the
0482    // extra type information can be collected when the dataset gets committed.
0483    virtual bool HasExtraTypeInfo() const { return false; }
0484    // The page sink's callback when the data set gets committed will call this method to get the field's extra
0485    // type information. This has to happen at the end of writing because the type information may change depending
0486    // on the data that's written, e.g. for polymorphic types in the streamer field.
0487    virtual ROOT::RExtraTypeInfoDescriptor GetExtraTypeInfo() const { return ROOT::RExtraTypeInfoDescriptor(); }
0488 
0489    /// Add a new subfield to the list of nested fields
0490    void Attach(std::unique_ptr<RFieldBase> child);
0491 
0492    /// Called by ConnectPageSource() before connecting; derived classes may override this as appropriate
0493    virtual void BeforeConnectPageSource(ROOT::Internal::RPageSource &) {}
0494 
0495    /// Called by ConnectPageSource() once connected; derived classes may override this as appropriate
0496    virtual void AfterConnectPageSource() {}
0497 
0498    /// Factory method to resurrect a field from the stored on-disk type information.  This overload takes an already
0499    /// normalized type name and type alias.
0500    /// `desc` and `fieldId` must be passed if `options.fEmulateUnknownTypes` is true, otherwise they can be left blank.
0501    static RResult<std::unique_ptr<RFieldBase>>
0502    Create(const std::string &fieldName, const std::string &typeName, const ROOT::RCreateFieldOptions &options,
0503           const ROOT::RNTupleDescriptor *desc, ROOT::DescriptorId_t fieldId);
0504 
0505 public:
0506    template <bool IsConstT>
0507    class RSchemaIteratorTemplate;
0508    using RSchemaIterator = RSchemaIteratorTemplate<false>;
0509    using RConstSchemaIterator = RSchemaIteratorTemplate<true>;
0510 
0511    // This is used in CreateObject() and is specialized for void
0512    template <typename T>
0513    struct RCreateObjectDeleter {
0514       using deleter = std::default_delete<T>;
0515    };
0516 
0517    /// Used in the return value of the Check() method
0518    struct RCheckResult {
0519       std::string fFieldName; ///< Qualified field name causing the error
0520       std::string fTypeName;  ///< Type name corresponding to the (sub)field
0521       std::string fErrMsg;    ///< Cause of the failure, e.g. unsupported type
0522    };
0523 
0524    /// The constructor creates the underlying column objects and connects them to either a sink or a source.
0525    /// If `isSimple` is `true`, the trait `kTraitMappable` is automatically set on construction. However, the
0526    /// field might be demoted to non-simple if a post-read callback is set.
0527    RFieldBase(std::string_view name, std::string_view type, ROOT::ENTupleStructure structure, bool isSimple,
0528               std::size_t nRepetitions = 0);
0529    RFieldBase(const RFieldBase &) = delete;
0530    RFieldBase(RFieldBase &&) = default;
0531    RFieldBase &operator=(const RFieldBase &) = delete;
0532    RFieldBase &operator=(RFieldBase &&) = default;
0533    virtual ~RFieldBase() = default;
0534 
0535    /// Copies the field and its subfields using a possibly new name and a new, unconnected set of columns
0536    std::unique_ptr<RFieldBase> Clone(std::string_view newName) const;
0537 
0538    /// Factory method to create a field from a certain type given as string.
0539    /// Note that the provided type name must be a valid C++ type name. Template arguments of templated types
0540    /// must be type names or integers (e.g., no expressions).
0541    static RResult<std::unique_ptr<RFieldBase>>
0542    Create(const std::string &fieldName, const std::string &typeName);
0543 
0544    /// Checks if the given type is supported by RNTuple. In case of success, the result vector is empty.
0545    /// Otherwise there is an error record for each failing subfield (subtype).
0546    static std::vector<RCheckResult> Check(const std::string &fieldName, const std::string &typeName);
0547 
0548    /// Generates an object of the field type and allocates new initialized memory according to the type.
0549    /// Implemented at the end of this header because the implementation is using RField<T>::TypeName()
0550    /// The returned object can be released with `delete`, i.e. it is valid to call:
0551    /// ~~~{.cpp}
0552    ///    auto ptr = field->CreateObject();
0553    ///    delete ptr.release();
0554    /// ~~~
0555    ///
0556    /// Note that CreateObject<void>() is supported. The returned `unique_ptr` has a custom deleter that reports an error
0557    /// if it is called. The intended use of the returned `unique_ptr<void>` is to call `release()`. In this way, the
0558    /// transfer of pointer ownership is explicit.
0559    template <typename T>
0560    std::unique_ptr<T, typename RCreateObjectDeleter<T>::deleter> CreateObject() const;
0561    /// Generates an object of the field's type, wraps it in a shared pointer and returns it as an RValue connected to
0562    /// the field.
0563    RValue CreateValue();
0564    /// Creates a new, initially empty bulk.
0565    /// RBulkValues::ReadBulk() will construct the array of values. The memory of the value array is managed by the
0566    /// RBulkValues class.
0567    RBulkValues CreateBulk();
0568    /// Creates a value from a memory location with an already constructed object
0569    RValue BindValue(std::shared_ptr<void> objPtr);
0570    /// Creates the list of direct child values given an existing value for this field. E.g. a single value for the
0571    /// correct `std::variant` or all the elements of a collection. The default implementation assumes no subvalues
0572    /// and returns an empty vector.
0573    virtual std::vector<RValue> SplitValue(const RValue &value) const;
0574    /// The number of bytes taken by a value of the appropriate type
0575    virtual size_t GetValueSize() const = 0;
0576    /// As a rule of thumb, the alignment is equal to the size of the type. There are, however, various exceptions
0577    /// to this rule depending on OS and CPU architecture. So enforce the alignment to be explicitly spelled out.
0578    virtual size_t GetAlignment() const = 0;
0579    std::uint32_t GetTraits() const { return fTraits; }
0580    bool HasReadCallbacks() const { return !fReadCallbacks.empty(); }
0581 
0582    const std::string &GetFieldName() const { return fName; }
0583    /// Returns the field name and parent field names separated by dots (`grandparent.parent.child`)
0584    std::string GetQualifiedFieldName() const;
0585    const std::string &GetTypeName() const { return fType; }
0586    const std::string &GetTypeAlias() const { return fTypeAlias; }
0587    ROOT::ENTupleStructure GetStructure() const { return fStructure; }
0588    std::size_t GetNRepetitions() const { return fNRepetitions; }
0589    const RFieldBase *GetParent() const { return fParent; }
0590    std::vector<RFieldBase *> GetMutableSubfields();
0591    std::vector<const RFieldBase *> GetConstSubfields() const;
0592    bool IsSimple() const { return fIsSimple; }
0593    bool IsArtificial() const { return fIsArtificial; }
0594    /// Get the field's description
0595    const std::string &GetDescription() const { return fDescription; }
0596    void SetDescription(std::string_view description);
0597    EState GetState() const { return fState; }
0598 
0599    ROOT::DescriptorId_t GetOnDiskId() const { return fOnDiskId; }
0600    void SetOnDiskId(ROOT::DescriptorId_t id);
0601 
0602    /// Returns the `fColumnRepresentative` pointee or, if unset (always the case for artificial fields), the field's
0603    /// default representative
0604    RColumnRepresentations::Selection_t GetColumnRepresentatives() const;
0605    /// Fixes a column representative. This can only be done _before_ connecting the field to a page sink.
0606    /// Otherwise, or if the provided representation is not in the list of GetColumnRepresentations(),
0607    /// an exception is thrown
0608    void SetColumnRepresentatives(const RColumnRepresentations::Selection_t &representatives);
0609    /// Whether or not an explicit column representative was set
0610    bool HasDefaultColumnRepresentative() const { return fColumnRepresentatives.empty(); }
0611 
0612    /// Indicates an evolution of the mapping scheme from C++ type to columns
0613    virtual std::uint32_t GetFieldVersion() const { return 0; }
0614    /// Indicates an evolution of the C++ type itself
0615    virtual std::uint32_t GetTypeVersion() const { return 0; }
0616    /// Return the current TClass reported checksum of this class. Only valid if `kTraitTypeChecksum` is set.
0617    virtual std::uint32_t GetTypeChecksum() const { return 0; }
0618    /// Return the C++ type version stored in the field descriptor; only valid after a call to ConnectPageSource()
0619    std::uint32_t GetOnDiskTypeVersion() const { return fOnDiskTypeVersion; }
0620    /// Return checksum stored in the field descriptor; only valid after a call to ConnectPageSource(),
0621    /// if the field stored a type checksum
0622    std::uint32_t GetOnDiskTypeChecksum() const { return fOnDiskTypeChecksum; }
0623 
0624    RSchemaIterator begin();
0625    RSchemaIterator end();
0626    RConstSchemaIterator begin() const;
0627    RConstSchemaIterator end() const;
0628    RConstSchemaIterator cbegin() const;
0629    RConstSchemaIterator cend() const;
0630 
0631    virtual void AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const;
0632 }; // class RFieldBase
0633 
0634 /// Iterates over the subtree of fields in depth-first search order
0635 template <bool IsConstT>
0636 class RFieldBase::RSchemaIteratorTemplate {
0637 private:
0638    struct Position {
0639       using FieldPtr_t = std::conditional_t<IsConstT, const RFieldBase *, RFieldBase *>;
0640       Position() : fFieldPtr(nullptr), fIdxInParent(-1) {}
0641       Position(FieldPtr_t fieldPtr, int idxInParent) : fFieldPtr(fieldPtr), fIdxInParent(idxInParent) {}
0642       FieldPtr_t fFieldPtr;
0643       int fIdxInParent;
0644    };
0645    /// The stack of nodes visited when walking down the tree of fields
0646    std::vector<Position> fStack;
0647 
0648 public:
0649    using iterator = RSchemaIteratorTemplate<IsConstT>;
0650    using iterator_category = std::forward_iterator_tag;
0651    using difference_type = std::ptrdiff_t;
0652    using value_type = std::conditional_t<IsConstT, const RFieldBase, RFieldBase>;
0653    using pointer = std::conditional_t<IsConstT, const RFieldBase *, RFieldBase *>;
0654    using reference = std::conditional_t<IsConstT, const RFieldBase &, RFieldBase &>;
0655 
0656    RSchemaIteratorTemplate() { fStack.emplace_back(Position()); }
0657    RSchemaIteratorTemplate(pointer val, int idxInParent) { fStack.emplace_back(Position(val, idxInParent)); }
0658    ~RSchemaIteratorTemplate() {}
0659    /// Given that the iterator points to a valid field which is not the end iterator, go to the next field
0660    /// in depth-first search order
0661    void Advance()
0662    {
0663       auto itr = fStack.rbegin();
0664       if (!itr->fFieldPtr->fSubfields.empty()) {
0665          fStack.emplace_back(Position(itr->fFieldPtr->fSubfields[0].get(), 0));
0666          return;
0667       }
0668 
0669       unsigned int nextIdxInParent = ++(itr->fIdxInParent);
0670       while (nextIdxInParent >= itr->fFieldPtr->fParent->fSubfields.size()) {
0671          if (fStack.size() == 1) {
0672             itr->fFieldPtr = itr->fFieldPtr->fParent;
0673             itr->fIdxInParent = -1;
0674             return;
0675          }
0676          fStack.pop_back();
0677          itr = fStack.rbegin();
0678          nextIdxInParent = ++(itr->fIdxInParent);
0679       }
0680       itr->fFieldPtr = itr->fFieldPtr->fParent->fSubfields[nextIdxInParent].get();
0681    }
0682 
0683    iterator operator++(int) /* postfix */
0684    {
0685       auto r = *this;
0686       Advance();
0687       return r;
0688    }
0689    iterator &operator++() /* prefix */
0690    {
0691       Advance();
0692       return *this;
0693    }
0694    reference operator*() const { return *fStack.back().fFieldPtr; }
0695    pointer operator->() const { return fStack.back().fFieldPtr; }
0696    bool operator==(const iterator &rh) const { return fStack.back().fFieldPtr == rh.fStack.back().fFieldPtr; }
0697    bool operator!=(const iterator &rh) const { return fStack.back().fFieldPtr != rh.fStack.back().fFieldPtr; }
0698 };
0699 
0700 /// Points to an object with RNTuple I/O support and keeps a pointer to the corresponding field.
0701 /// Fields can create RValue objects through RFieldBase::CreateValue(), RFieldBase::BindValue()) or
0702 /// RFieldBase::SplitValue().
0703 class RFieldBase::RValue {
0704    friend class RFieldBase;
0705 
0706 private:
0707    RFieldBase *fField = nullptr;  ///< The field that created the RValue
0708    /// Set by Bind() or by RFieldBase::CreateValue(), RFieldBase::SplitValue() or RFieldBase::BindValue()
0709    std::shared_ptr<void> fObjPtr;
0710    RValue(RFieldBase *field, std::shared_ptr<void> objPtr) : fField(field), fObjPtr(objPtr) {}
0711 
0712 public:
0713    RValue(const RValue &) = default;
0714    RValue &operator=(const RValue &) = default;
0715    RValue(RValue &&other) = default;
0716    RValue &operator=(RValue &&other) = default;
0717    ~RValue() = default;
0718 
0719    std::size_t Append() { return fField->Append(fObjPtr.get()); }
0720    void Read(ROOT::NTupleSize_t globalIndex) { fField->Read(globalIndex, fObjPtr.get()); }
0721    void Read(RNTupleLocalIndex localIndex) { fField->Read(localIndex, fObjPtr.get()); }
0722    void Bind(std::shared_ptr<void> objPtr) { fObjPtr = objPtr; }
0723    void BindRawPtr(void *rawPtr);
0724    /// Replace the current object pointer by a pointer to a new object constructed by the field
0725    void EmplaceNew() { fObjPtr = fField->CreateValue().GetPtr<void>(); }
0726 
0727    template <typename T>
0728    std::shared_ptr<T> GetPtr() const
0729    {
0730       return std::static_pointer_cast<T>(fObjPtr);
0731    }
0732 
0733    template <typename T>
0734    const T &GetRef() const
0735    {
0736       return *static_cast<T *>(fObjPtr.get());
0737    }
0738 
0739    const RFieldBase &GetField() const { return *fField; }
0740 };
0741 
0742 /// Input parameter to RFieldBase::ReadBulk() and RFieldBase::ReadBulkImpl().
0743 //  See the RBulkValues class documentation for more information.
0744 struct RFieldBase::RBulkSpec {
0745    /// Possible return value of ReadBulk() and ReadBulkImpl(), which indicates that the full bulk range was read
0746    /// independently of the provided masks.
0747    static const std::size_t kAllSet = std::size_t(-1);
0748 
0749    RNTupleLocalIndex fFirstIndex; ///< Start of the bulk range
0750    std::size_t fCount = 0;        ///< Size of the bulk range
0751    /// A bool array of size fCount, indicating the required values in the requested range
0752    const bool *fMaskReq = nullptr;
0753    bool *fMaskAvail = nullptr; ///< A bool array of size `fCount`, indicating the valid values in fValues
0754    /// The destination area, which has to be an array of valid objects of the correct type large enough to hold the bulk
0755    /// range.
0756    void *fValues = nullptr;
0757    /// Reference to memory owned by the RBulkValues class. The field implementing BulkReadImpl() may use `fAuxData` as
0758    /// memory that stays persistent between calls.
0759    std::vector<unsigned char> *fAuxData = nullptr;
0760 };
0761 
0762 // clang-format off
0763 /**
0764 \class ROOT::RFieldBase::RBulkValues
0765 \ingroup NTuple
0766 \brief Points to an array of objects with RNTuple I/O support, used for bulk reading.
0767 
0768 Similar to RValue, but manages an array of consecutive values. Bulks have to come from the same cluster.
0769 Bulk I/O works with two bit masks: the mask of all the available entries in the current bulk and the mask
0770 of the required entries in a bulk read. The idea is that a single bulk may serve multiple read operations
0771 on the same range, where in each read operation a different subset of values is required.
0772 The memory of the value array is managed by the RBulkValues class.
0773 */
0774 // clang-format on
0775 class RFieldBase::RBulkValues {
0776 private:
0777    friend class RFieldBase;
0778 
0779    RFieldBase *fField = nullptr;                   ///< The field that created the array of values
0780    std::unique_ptr<RFieldBase::RDeleter> fDeleter; /// Cached deleter of fField
0781    void *fValues = nullptr;                        ///< Pointer to the start of the array
0782    std::size_t fValueSize = 0;                     ///< Cached copy of RFieldBase::GetValueSize()
0783    std::size_t fCapacity = 0;                      ///< The size of the array memory block in number of values
0784    std::size_t fSize = 0;              ///< The number of available values in the array (provided their mask is set)
0785    bool fIsAdopted = false;            ///< True if the user provides the memory buffer for fValues
0786    std::unique_ptr<bool[]> fMaskAvail; ///< Masks invalid values in the array
0787    std::size_t fNValidValues = 0;      ///< The sum of non-zero elements in the fMask
0788    RNTupleLocalIndex fFirstIndex;      ///< Index of the first value of the array
0789    /// Reading arrays of complex values may require additional memory, for instance for the elements of
0790    /// arrays of vectors. A pointer to the `fAuxData` array is passed to the field's BulkRead method.
0791    /// The RBulkValues class does not modify the array in-between calls to the field's BulkRead method.
0792    std::vector<unsigned char> fAuxData;
0793 
0794    void ReleaseValues();
0795    /// Sets a new range for the bulk. If there is enough capacity, the `fValues` array will be reused.
0796    /// Otherwise a new array is allocated. After reset, fMaskAvail is false for all values.
0797    void Reset(RNTupleLocalIndex firstIndex, std::size_t size);
0798    void CountValidValues();
0799 
0800    bool ContainsRange(RNTupleLocalIndex firstIndex, std::size_t size) const
0801    {
0802       if (firstIndex.GetClusterId() != fFirstIndex.GetClusterId())
0803          return false;
0804       return (firstIndex.GetIndexInCluster() >= fFirstIndex.GetIndexInCluster()) &&
0805              ((firstIndex.GetIndexInCluster() + size) <= (fFirstIndex.GetIndexInCluster() + fSize));
0806    }
0807 
0808    void *GetValuePtrAt(std::size_t idx) const { return reinterpret_cast<unsigned char *>(fValues) + idx * fValueSize; }
0809 
0810    explicit RBulkValues(RFieldBase *field)
0811       : fField(field), fDeleter(field->GetDeleter()), fValueSize(field->GetValueSize())
0812    {
0813    }
0814 
0815 public:
0816    ~RBulkValues();
0817    RBulkValues(const RBulkValues &) = delete;
0818    RBulkValues &operator=(const RBulkValues &) = delete;
0819    RBulkValues(RBulkValues &&other);
0820    RBulkValues &operator=(RBulkValues &&other);
0821 
0822    // Sets `fValues` and `fSize`/`fCapacity` to the given values. The capacity is specified in number of values.
0823    // Once a buffer is adopted, an attempt to read more values then available throws an exception.
0824    void AdoptBuffer(void *buf, std::size_t capacity);
0825 
0826    /// Reads `size` values from the associated field, starting from `firstIndex`. Note that the index is given
0827    /// relative to a certain cluster. The return value points to the array of read objects.
0828    /// The `maskReq` parameter is a bool array of at least `size` elements. Only objects for which the mask is
0829    /// true are guaranteed to be read in the returned value array. A `nullptr` means to read all elements.
0830    void *ReadBulk(RNTupleLocalIndex firstIndex, const bool *maskReq, std::size_t size)
0831    {
0832       if (!ContainsRange(firstIndex, size))
0833          Reset(firstIndex, size);
0834 
0835       // We may read a subrange of the currently available range
0836       auto offset = firstIndex.GetIndexInCluster() - fFirstIndex.GetIndexInCluster();
0837 
0838       if (fNValidValues == fSize)
0839          return GetValuePtrAt(offset);
0840 
0841       RBulkSpec bulkSpec;
0842       bulkSpec.fFirstIndex = firstIndex;
0843       bulkSpec.fCount = size;
0844       bulkSpec.fMaskReq = maskReq;
0845       bulkSpec.fMaskAvail = &fMaskAvail[offset];
0846       bulkSpec.fValues = GetValuePtrAt(offset);
0847       bulkSpec.fAuxData = &fAuxData;
0848       auto nRead = fField->ReadBulk(bulkSpec);
0849       if (nRead == RBulkSpec::kAllSet) {
0850          if ((offset == 0) && (size == fSize)) {
0851             fNValidValues = fSize;
0852          } else {
0853             CountValidValues();
0854          }
0855       } else {
0856          fNValidValues += nRead;
0857       }
0858       return GetValuePtrAt(offset);
0859    }
0860 
0861    /// Overload to read all elements in the given cluster range.
0862    void *ReadBulk(ROOT::RNTupleLocalRange range) { return ReadBulk(*range.begin(), nullptr, range.size()); }
0863 };
0864 
0865 namespace Internal {
0866 // At some point, RFieldBase::OnClusterCommit() may allow for a user-defined callback to change the
0867 // column representation. For now, we inject this for testing and internal use only.
0868 struct RFieldRepresentationModifier {
0869    static void SetPrimaryColumnRepresentation(RFieldBase &field, std::uint16_t newRepresentationIdx)
0870    {
0871       R__ASSERT(newRepresentationIdx < field.fColumnRepresentatives.size());
0872       const auto N = field.fColumnRepresentatives[0].get().size();
0873       R__ASSERT(N >= 1 && N <= 2);
0874       R__ASSERT(field.fPrincipalColumn);
0875       field.fPrincipalColumn = field.fAvailableColumns[newRepresentationIdx * N].get();
0876       if (field.fAuxiliaryColumn) {
0877          R__ASSERT(N == 2);
0878          field.fAuxiliaryColumn = field.fAvailableColumns[newRepresentationIdx * N + 1].get();
0879       }
0880    }
0881 };
0882 } // namespace Internal
0883 } // namespace ROOT
0884 
0885 #endif