Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:45

0001 /// \file ROOT/RNTupleDescriptor.hxx
0002 /// \ingroup NTuple ROOT7
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
0005 /// \date 2018-07-19
0006 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0007 /// is welcome!
0008 
0009 /*************************************************************************
0010  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0011  * All rights reserved.                                                  *
0012  *                                                                       *
0013  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0014  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0015  *************************************************************************/
0016 
0017 #ifndef ROOT7_RNTupleDescriptor
0018 #define ROOT7_RNTupleDescriptor
0019 
0020 #include <ROOT/RColumnModel.hxx>
0021 #include <ROOT/RError.hxx>
0022 #include <ROOT/RNTupleSerialize.hxx>
0023 #include <ROOT/RNTupleUtil.hxx>
0024 #include <ROOT/RSpan.hxx>
0025 #include <string_view>
0026 
0027 #include <algorithm>
0028 #include <chrono>
0029 #include <functional>
0030 #include <iterator>
0031 #include <map>
0032 #include <memory>
0033 #include <ostream>
0034 #include <vector>
0035 #include <set>
0036 #include <string>
0037 #include <unordered_map>
0038 #include <unordered_set>
0039 
0040 namespace ROOT {
0041 namespace Experimental {
0042 
0043 class RFieldBase;
0044 class RNTupleDescriptor;
0045 class RNTupleModel;
0046 
0047 namespace Internal {
0048 class RColumnElementBase;
0049 } // namespace Internal
0050 
0051 namespace Internal {
0052 class RColumnDescriptorBuilder;
0053 class RColumnGroupDescriptorBuilder;
0054 class RClusterDescriptorBuilder;
0055 class RClusterGroupDescriptorBuilder;
0056 class RFieldDescriptorBuilder;
0057 class RNTupleDescriptorBuilder;
0058 } // namespace Internal
0059 
0060 // clang-format off
0061 /**
0062 \class ROOT::Experimental::RFieldDescriptor
0063 \ingroup NTuple
0064 \brief Meta-data stored for every field of an ntuple
0065 */
0066 // clang-format on
0067 class RFieldDescriptor {
0068    friend class Internal::RNTupleDescriptorBuilder;
0069    friend class Internal::RFieldDescriptorBuilder;
0070 
0071 private:
0072    DescriptorId_t fFieldId = kInvalidDescriptorId;
0073    /// The version of the C++-type-to-column translation mechanics
0074    std::uint32_t fFieldVersion = 0;
0075    /// The version of the C++ type itself
0076    std::uint32_t fTypeVersion = 0;
0077    /// The leaf name, not including parent fields
0078    std::string fFieldName;
0079    /// Free text set by the user
0080    std::string fFieldDescription;
0081    /// The C++ type that was used when writing the field
0082    std::string fTypeName;
0083    /// A typedef or using directive that resolved to the type name during field creation
0084    std::string fTypeAlias;
0085    /// The number of elements per entry for fixed-size arrays
0086    std::uint64_t fNRepetitions = 0;
0087    /// The structural information carried by this field in the data model tree
0088    ENTupleStructure fStructure = ENTupleStructure::kInvalid;
0089    /// Establishes sub field relationships, such as classes and collections
0090    DescriptorId_t fParentId = kInvalidDescriptorId;
0091    /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
0092    /// order of sub fields.
0093    std::vector<DescriptorId_t> fLinkIds;
0094 
0095 public:
0096    RFieldDescriptor() = default;
0097    RFieldDescriptor(const RFieldDescriptor &other) = delete;
0098    RFieldDescriptor &operator =(const RFieldDescriptor &other) = delete;
0099    RFieldDescriptor(RFieldDescriptor &&other) = default;
0100    RFieldDescriptor &operator =(RFieldDescriptor &&other) = default;
0101 
0102    bool operator==(const RFieldDescriptor &other) const;
0103    /// Get a copy of the descriptor
0104    RFieldDescriptor Clone() const;
0105    /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
0106    /// access to sub fields, which is provided by the ntuple descriptor argument.
0107    std::unique_ptr<RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
0108 
0109    DescriptorId_t GetId() const { return fFieldId; }
0110    std::uint32_t GetFieldVersion() const { return fFieldVersion; }
0111    std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0112    std::string GetFieldName() const { return fFieldName; }
0113    std::string GetFieldDescription() const { return fFieldDescription; }
0114    std::string GetTypeName() const { return fTypeName; }
0115    std::string GetTypeAlias() const { return fTypeAlias; }
0116    std::uint64_t GetNRepetitions() const { return fNRepetitions; }
0117    ENTupleStructure GetStructure() const { return fStructure; }
0118    DescriptorId_t GetParentId() const { return fParentId; }
0119    const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
0120 };
0121 
0122 
0123 // clang-format off
0124 /**
0125 \class ROOT::Experimental::RColumnDescriptor
0126 \ingroup NTuple
0127 \brief Meta-data stored for every column of an ntuple
0128 */
0129 // clang-format on
0130 class RColumnDescriptor {
0131    friend class Internal::RColumnDescriptorBuilder;
0132    friend class Internal::RNTupleDescriptorBuilder;
0133 
0134 private:
0135    /// The actual column identifier, which is the link to the corresponding field
0136    DescriptorId_t fLogicalColumnId = kInvalidDescriptorId;
0137    /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
0138    DescriptorId_t fPhysicalColumnId = kInvalidDescriptorId;
0139    /// Contains the column type and whether it is sorted
0140    RColumnModel fModel;
0141    /// Every column belongs to one and only one field
0142    DescriptorId_t fFieldId = kInvalidDescriptorId;
0143    /// A field can be serialized into several columns, which are numbered from zero to $n$
0144    std::uint32_t fIndex;
0145    /// Specifies the index for the first stored element for this column. For deferred columns the value is greater
0146    /// than 0
0147    std::uint64_t fFirstElementIndex = 0U;
0148 
0149 public:
0150    RColumnDescriptor() = default;
0151    RColumnDescriptor(const RColumnDescriptor &other) = delete;
0152    RColumnDescriptor &operator =(const RColumnDescriptor &other) = delete;
0153    RColumnDescriptor(RColumnDescriptor &&other) = default;
0154    RColumnDescriptor &operator =(RColumnDescriptor &&other) = default;
0155 
0156    bool operator==(const RColumnDescriptor &other) const;
0157    /// Get a copy of the descriptor
0158    RColumnDescriptor Clone() const;
0159 
0160    DescriptorId_t GetLogicalId() const { return fLogicalColumnId; }
0161    DescriptorId_t GetPhysicalId() const { return fPhysicalColumnId; }
0162    RColumnModel GetModel() const { return fModel; }
0163    std::uint32_t GetIndex() const { return fIndex; }
0164    DescriptorId_t GetFieldId() const { return fFieldId; }
0165    bool IsAliasColumn() const { return fPhysicalColumnId != fLogicalColumnId; }
0166    std::uint64_t GetFirstElementIndex() const { return fFirstElementIndex; }
0167    bool IsDeferredColumn() const { return fFirstElementIndex > 0; }
0168 };
0169 
0170 // clang-format off
0171 /**
0172 \class ROOT::Experimental::RColumnGroupDescriptor
0173 \ingroup NTuple
0174 \brief Meta-data for a sets of columns; non-trivial column groups are used for sharded clusters
0175 
0176 Clusters can span a subset of columns. Such subsets are described as a column group. An empty column group
0177 is used to denote the column group of all the columns. Every ntuple has at least one column group.
0178 */
0179 // clang-format on
0180 class RColumnGroupDescriptor {
0181    friend class Internal::RColumnGroupDescriptorBuilder;
0182 
0183 private:
0184    DescriptorId_t fColumnGroupId = kInvalidDescriptorId;
0185    std::unordered_set<DescriptorId_t> fPhysicalColumnIds;
0186 
0187 public:
0188    RColumnGroupDescriptor() = default;
0189    RColumnGroupDescriptor(const RColumnGroupDescriptor &other) = delete;
0190    RColumnGroupDescriptor &operator=(const RColumnGroupDescriptor &other) = delete;
0191    RColumnGroupDescriptor(RColumnGroupDescriptor &&other) = default;
0192    RColumnGroupDescriptor &operator=(RColumnGroupDescriptor &&other) = default;
0193 
0194    bool operator==(const RColumnGroupDescriptor &other) const;
0195 
0196    DescriptorId_t GetId() const { return fColumnGroupId; }
0197    const std::unordered_set<DescriptorId_t> &GetPhysicalColumnIds() const { return fPhysicalColumnIds; }
0198    bool Contains(DescriptorId_t physicalId) const
0199    {
0200       return fPhysicalColumnIds.empty() || fPhysicalColumnIds.count(physicalId) > 0;
0201    }
0202    bool HasAllColumns() const { return fPhysicalColumnIds.empty(); }
0203 };
0204 
0205 // clang-format off
0206 /**
0207 \class ROOT::Experimental::RClusterDescriptor
0208 \ingroup NTuple
0209 \brief Meta-data for a set of ntuple clusters
0210 
0211 The cluster descriptor is built in two phases.  In a first phase, the descriptor has only an ID.
0212 In a second phase, the event range, column group, page locations and column ranges are added.
0213 Both phases are populated by the RClusterDescriptorBuilder.
0214 Clusters usually span across all available columns but in some cases they can describe only a subset of the columns,
0215 for instance when describing friend ntuples.
0216 */
0217 // clang-format on
0218 class RClusterDescriptor {
0219    friend class Internal::RClusterDescriptorBuilder;
0220 
0221 public:
0222    /// The window of element indexes of a particular column in a particular cluster
0223    struct RColumnRange {
0224       DescriptorId_t fPhysicalColumnId = kInvalidDescriptorId;
0225       /// A 64bit element index
0226       NTupleSize_t fFirstElementIndex = kInvalidNTupleIndex;
0227       /// The number of column elements in the cluster
0228       ClusterSize_t fNElements = kInvalidClusterIndex;
0229       /// The usual format for ROOT compression settings (see Compression.h).
0230       /// The pages of a particular column in a particular cluster are all compressed with the same settings.
0231       int fCompressionSettings = kUnknownCompressionSettings;
0232 
0233       // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
0234       // Should this be done on the field level?
0235 
0236       bool operator==(const RColumnRange &other) const {
0237          return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
0238                 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings;
0239       }
0240 
0241       bool Contains(NTupleSize_t index) const {
0242          return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
0243       }
0244    };
0245 
0246    /// Records the parition of data into pages for a particular column in a particular cluster
0247    class RPageRange {
0248       friend class Internal::RClusterDescriptorBuilder;
0249       /// Extend this RPageRange to fit the given RColumnRange, i.e. prepend as many synthetic RPageInfos as needed to
0250       /// cover the range in `columnRange`. `RPageInfo`s are constructed to contain as many elements of type `element`
0251       /// given a page size limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
0252       /// This function is used to make up `RPageRange`s for clusters that contain deferred columns.
0253       /// \return The number of column elements covered by the synthesized RPageInfos
0254       std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
0255                                          std::size_t pageSize);
0256 
0257    public:
0258       /// We do not need to store the element size / uncompressed page size because we know to which column
0259       /// the page belongs
0260       struct RPageInfo {
0261          /// The sum of the elements of all the pages must match the corresponding fNElements field in fColumnRanges
0262          std::uint32_t fNElements = std::uint32_t(-1);
0263          /// The meaning of fLocator depends on the storage backend.
0264          RNTupleLocator fLocator;
0265 
0266          bool operator==(const RPageInfo &other) const {
0267             return fNElements == other.fNElements && fLocator == other.fLocator;
0268          }
0269       };
0270       struct RPageInfoExtended : RPageInfo {
0271          /// Index (in cluster) of the first element in page.
0272          ClusterSize_t::ValueType fFirstInPage = 0;
0273          /// Page number in the corresponding RPageRange.
0274          NTupleSize_t fPageNo = 0;
0275 
0276          RPageInfoExtended() = default;
0277          RPageInfoExtended(const RPageInfo &pi, ClusterSize_t::ValueType i, NTupleSize_t n)
0278             : RPageInfo(pi), fFirstInPage(i), fPageNo(n)
0279          {
0280          }
0281       };
0282 
0283       RPageRange() = default;
0284       RPageRange(const RPageRange &other) = delete;
0285       RPageRange &operator =(const RPageRange &other) = delete;
0286       RPageRange(RPageRange &&other) = default;
0287       RPageRange &operator =(RPageRange &&other) = default;
0288 
0289       RPageRange Clone() const {
0290          RPageRange clone;
0291          clone.fPhysicalColumnId = fPhysicalColumnId;
0292          clone.fPageInfos = fPageInfos;
0293          return clone;
0294       }
0295 
0296       /// Find the page in the RPageRange that contains the given element. The element must exist.
0297       RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const;
0298 
0299       DescriptorId_t fPhysicalColumnId = kInvalidDescriptorId;
0300       std::vector<RPageInfo> fPageInfos;
0301 
0302       bool operator==(const RPageRange &other) const {
0303          return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
0304       }
0305    };
0306 
0307 private:
0308    DescriptorId_t fClusterId = kInvalidDescriptorId;
0309    /// Clusters can be swapped by adjusting the entry offsets
0310    NTupleSize_t fFirstEntryIndex = kInvalidNTupleIndex;
0311    // TODO(jblomer): change to std::uint64_t
0312    ClusterSize_t fNEntries = kInvalidClusterIndex;
0313 
0314    std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
0315    std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
0316 
0317 public:
0318    RClusterDescriptor() = default;
0319    RClusterDescriptor(const RClusterDescriptor &other) = delete;
0320    RClusterDescriptor &operator =(const RClusterDescriptor &other) = delete;
0321    RClusterDescriptor(RClusterDescriptor &&other) = default;
0322    RClusterDescriptor &operator =(RClusterDescriptor &&other) = default;
0323 
0324    RClusterDescriptor Clone() const;
0325 
0326    bool operator==(const RClusterDescriptor &other) const;
0327 
0328    DescriptorId_t GetId() const { return fClusterId; }
0329    NTupleSize_t GetFirstEntryIndex() const { return fFirstEntryIndex; }
0330    ClusterSize_t GetNEntries() const { return fNEntries; }
0331    const RColumnRange &GetColumnRange(DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
0332    const RPageRange &GetPageRange(DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
0333    bool ContainsColumn(DescriptorId_t physicalId) const
0334    {
0335       return fColumnRanges.find(physicalId) != fColumnRanges.end();
0336    }
0337    std::unordered_set<DescriptorId_t> GetColumnIds() const;
0338    std::uint64_t GetBytesOnStorage() const;
0339 };
0340 
0341 // clang-format off
0342 /**
0343 \class ROOT::Experimental::RClusterGroupDescriptor
0344 \ingroup NTuple
0345 \brief Clusters are bundled in cluster groups.
0346 
0347 Very large ntuples or combined ntuples (chains, friends) contain multiple cluster groups. The cluster groups
0348 may contain sharded clusters.
0349 Every ntuple has at least one cluster group.  The clusters in a cluster group are ordered corresponding to
0350 the order of page locations in the page list envelope that belongs to the cluster group (see format specification)
0351 */
0352 // clang-format on
0353 class RClusterGroupDescriptor {
0354    friend class Internal::RClusterGroupDescriptorBuilder;
0355 
0356 private:
0357    DescriptorId_t fClusterGroupId = kInvalidDescriptorId;
0358    /// The cluster IDs can be empty if the corresponding page list is not loaded.
0359    std::vector<DescriptorId_t> fClusterIds;
0360    /// The page list that corresponds to the cluster group
0361    RNTupleLocator fPageListLocator;
0362    /// Uncompressed size of the page list
0363    std::uint64_t fPageListLength = 0;
0364    /// The minimum first entry number of the clusters in the cluster group
0365    std::uint64_t fMinEntry = 0;
0366    /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
0367    std::uint64_t fEntrySpan = 0;
0368    /// Number of clusters is always known even if the cluster IDs are not (yet) populated
0369    std::uint32_t fNClusters = 0;
0370 
0371 public:
0372    RClusterGroupDescriptor() = default;
0373    RClusterGroupDescriptor(const RClusterGroupDescriptor &other) = delete;
0374    RClusterGroupDescriptor &operator=(const RClusterGroupDescriptor &other) = delete;
0375    RClusterGroupDescriptor(RClusterGroupDescriptor &&other) = default;
0376    RClusterGroupDescriptor &operator=(RClusterGroupDescriptor &&other) = default;
0377 
0378    RClusterGroupDescriptor Clone() const;
0379    // Creates a clone without the cluster IDs
0380    RClusterGroupDescriptor CloneSummary() const;
0381 
0382    bool operator==(const RClusterGroupDescriptor &other) const;
0383 
0384    DescriptorId_t GetId() const { return fClusterGroupId; }
0385    std::uint32_t GetNClusters() const { return fNClusters; }
0386    RNTupleLocator GetPageListLocator() const { return fPageListLocator; }
0387    std::uint64_t GetPageListLength() const { return fPageListLength; }
0388    const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
0389    std::uint64_t GetMinEntry() const { return fMinEntry; }
0390    std::uint64_t GetEntrySpan() const { return fEntrySpan; }
0391    /// A cluster group is loaded in two stages. Stage one loads only the summary information.
0392    /// Stage two loads the list of cluster IDs.
0393    bool HasClusterDetails() const { return !fClusterIds.empty(); }
0394 };
0395 
0396 // clang-format off
0397 /**
0398 \class ROOT::Experimental::RNTupleDescriptor
0399 \ingroup NTuple
0400 \brief The on-storage meta-data of an ntuple
0401 
0402 Represents the on-disk (on storage) information about an ntuple. The meta-data consists of a header and one or
0403 several footers. The header carries the ntuple schema, i.e. the fields and the associated columns and their
0404 relationships. The footer(s) carry information about one or several clusters. For every cluster, a footer stores
0405 its location and size, and for every column the range of element indexes as well as a list of pages and page
0406 locations.
0407 
0408 The descriptor provide machine-independent (de-)serialization of headers and footers, and it provides lookup routines
0409 for ntuple objects (pages, clusters, ...).  It is supposed to be usable by all RPageStorage implementations.
0410 
0411 The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
0412 the concept of frames: header, footer, and substructures have a preamble with version numbers and the size of the
0413 writte struct. This allows for forward and backward compatibility when the meta-data evolves.
0414 */
0415 // clang-format on
0416 class RNTupleDescriptor {
0417    friend class Internal::RNTupleDescriptorBuilder;
0418 
0419 public:
0420    class RHeaderExtension;
0421 
0422 private:
0423    /// The ntuple name needs to be unique in a given storage location (file)
0424    std::string fName;
0425    /// Free text from the user
0426    std::string fDescription;
0427 
0428    std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
0429    std::uint64_t fOnDiskHeaderSize = 0; ///< Set by the descriptor builder when deserialized
0430    std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
0431 
0432    std::uint64_t fNEntries = 0;         ///< Updated by the descriptor builder when the cluster groups are added
0433    std::uint64_t fNClusters = 0;        ///< Updated by the descriptor builder when the cluster groups are added
0434    std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
0435 
0436    /**
0437     * Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for set of
0438     * active the page locations.  During the lifetime of the descriptor, page location information for clusters
0439     * can be added or removed.  When this happens, the generation should be increased, so that users of the
0440     * descriptor know that the information changed.  The generation is increased, e.g., by the page source's
0441     * exclusive lock guard around the descriptor.  It is used, e.g., by the descriptor cache in RNTupleReader.
0442     */
0443    std::uint64_t fGeneration = 0;
0444 
0445    std::set<unsigned int> fFeatureFlags;
0446    std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
0447    std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
0448    std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
0449    /// May contain only a subset of all the available clusters, e.g. the clusters of the current file
0450    /// from a chain of files
0451    std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
0452    std::unique_ptr<RHeaderExtension> fHeaderExtension;
0453 
0454 public:
0455    static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
0456 
0457    // clang-format off
0458    /**
0459    \class ROOT::Experimental::RNTupleDescriptor::RHeaderExtension
0460    \ingroup NTuple
0461    \brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
0462    */
0463    // clang-format on
0464    class RHeaderExtension {
0465       friend class Internal::RNTupleDescriptorBuilder;
0466 
0467    private:
0468       /// Contains the list of field IDs that are part of the header extension; the corresponding columns are
0469       /// available via `GetColumnIterable()`.
0470       std::vector<DescriptorId_t> fFields;
0471       /// Number of logical and physical columns; updated by the descriptor builder when columns are added
0472       std::uint64_t fNLogicalColumns = 0;
0473       std::uint64_t fNPhysicalColumns = 0;
0474 
0475       void AddFieldId(DescriptorId_t id) { fFields.push_back(id); }
0476       void AddColumn(bool isAliasColumn)
0477       {
0478          fNLogicalColumns++;
0479          if (!isAliasColumn)
0480             fNPhysicalColumns++;
0481       }
0482 
0483    public:
0484       std::size_t GetNFields() const { return fFields.size(); }
0485       std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
0486       std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
0487       /// Return a vector containing the IDs of the top-level fields defined in the extension header
0488       std::vector<DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
0489    };
0490 
0491    // clang-format off
0492    /**
0493    \class ROOT::Experimental::RNTupleDescriptor::RColumnDescriptorIterable
0494    \ingroup NTuple
0495    \brief Used to loop over a field's associated columns
0496    */
0497    // clang-format on
0498    class RColumnDescriptorIterable {
0499    private:
0500       /// The associated NTuple for this range.
0501       const RNTupleDescriptor &fNTuple;
0502       /// The descriptor ids of the columns ordered by index id
0503       std::vector<DescriptorId_t> fColumns = {};
0504 
0505       void CollectColumnIds(DescriptorId_t fieldId);
0506    public:
0507       class RIterator {
0508       private:
0509          /// The enclosing range's NTuple.
0510          const RNTupleDescriptor &fNTuple;
0511          /// The enclosing range's descriptor id list.
0512          const std::vector<DescriptorId_t> &fColumns;
0513          std::size_t fIndex = 0;
0514       public:
0515          using iterator_category = std::forward_iterator_tag;
0516          using iterator = RIterator;
0517          using value_type = RFieldDescriptor;
0518          using difference_type = std::ptrdiff_t;
0519          using pointer = RColumnDescriptor *;
0520          using reference = const RColumnDescriptor &;
0521 
0522          RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
0523             : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
0524          iterator operator++() { ++fIndex; return *this; }
0525          reference operator*() { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0526          bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0527          bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0528       };
0529 
0530       RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field);
0531       RColumnDescriptorIterable(const RNTupleDescriptor &ntuple);
0532 
0533       RIterator begin() { return RIterator(fNTuple, fColumns, 0); }
0534       RIterator end() { return RIterator(fNTuple, fColumns, fColumns.size()); }
0535    };
0536 
0537    // clang-format off
0538    /**
0539    \class ROOT::Experimental::RNTupleDescriptor::RFieldDescriptorIterable
0540    \ingroup NTuple
0541    \brief Used to loop over a field's child fields
0542    */
0543    // clang-format on
0544    class RFieldDescriptorIterable {
0545    private:
0546       /// The associated NTuple for this range.
0547       const RNTupleDescriptor& fNTuple;
0548       /// The descriptor ids of the child fields. These may be sorted using
0549       /// a comparison function.
0550       std::vector<DescriptorId_t> fFieldChildren = {};
0551 
0552    public:
0553       class RIterator {
0554       private:
0555          /// The enclosing range's NTuple.
0556          const RNTupleDescriptor& fNTuple;
0557          /// The enclosing range's descriptor id list.
0558          const std::vector<DescriptorId_t>& fFieldChildren;
0559          std::size_t fIndex = 0;
0560       public:
0561          using iterator_category = std::forward_iterator_tag;
0562          using iterator = RIterator;
0563          using value_type = RFieldDescriptor;
0564          using difference_type = std::ptrdiff_t;
0565          using pointer = RFieldDescriptor*;
0566          using reference = const RFieldDescriptor&;
0567 
0568          RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
0569             std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
0570          iterator operator++() { ++fIndex; return *this; }
0571          reference operator*() {
0572             return fNTuple.GetFieldDescriptor(
0573                fFieldChildren.at(fIndex)
0574             );
0575          }
0576          bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
0577          bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
0578       };
0579       RFieldDescriptorIterable(const RNTupleDescriptor& ntuple, const RFieldDescriptor& field)
0580          : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
0581       /// Sort the range using an arbitrary comparison function.
0582       RFieldDescriptorIterable(const RNTupleDescriptor& ntuple, const RFieldDescriptor& field,
0583          const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
0584          : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
0585       {
0586          std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
0587       }
0588       RIterator begin() {
0589          return RIterator(fNTuple, fFieldChildren, 0);
0590       }
0591       RIterator end() {
0592          return RIterator(fNTuple, fFieldChildren, fFieldChildren.size());
0593       }
0594    };
0595 
0596    // clang-format off
0597    /**
0598    \class ROOT::Experimental::RNTupleDescriptor::RClusterGroupDescriptorIterable
0599    \ingroup NTuple
0600    \brief Used to loop over all the cluster groups of an ntuple (in unspecified order)
0601 
0602    Enumerate all cluster group IDs from the cluster group descriptor.  No specific order can be assumed, use
0603    FindNextClusterGroupId and FindPrevClusterGroupId to traverse clusters groups by entry number.
0604    */
0605    // clang-format on
0606    class RClusterGroupDescriptorIterable {
0607    private:
0608       /// The associated NTuple for this range.
0609       const RNTupleDescriptor &fNTuple;
0610 
0611    public:
0612       class RIterator {
0613       private:
0614          /// The enclosing range's NTuple.
0615          const RNTupleDescriptor &fNTuple;
0616          std::size_t fIndex = 0;
0617 
0618       public:
0619          using iterator_category = std::forward_iterator_tag;
0620          using iterator = RIterator;
0621          using value_type = RClusterGroupDescriptor;
0622          using difference_type = std::ptrdiff_t;
0623          using pointer = RClusterGroupDescriptor *;
0624          using reference = const RClusterGroupDescriptor &;
0625 
0626          RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
0627          iterator operator++()
0628          {
0629             ++fIndex;
0630             return *this;
0631          }
0632          reference operator*()
0633          {
0634             auto it = fNTuple.fClusterGroupDescriptors.begin();
0635             std::advance(it, fIndex);
0636             return it->second;
0637          }
0638          bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0639          bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0640       };
0641 
0642       RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
0643       RIterator begin() { return RIterator(fNTuple, 0); }
0644       RIterator end() { return RIterator(fNTuple, fNTuple.GetNClusterGroups()); }
0645    };
0646 
0647    // clang-format off
0648    /**
0649    \class ROOT::Experimental::RNTupleDescriptor::RClusterDescriptorIterable
0650    \ingroup NTuple
0651    \brief Used to loop over all the clusters of an ntuple (in unspecified order)
0652 
0653    Enumerate all cluster IDs from the cluster descriptor.  No specific order can be assumed, use
0654    FindNextClusterId and FindPrevClusterId to travers clusters by entry number.
0655    */
0656    // clang-format on
0657    class RClusterDescriptorIterable {
0658    private:
0659       /// The associated NTuple for this range.
0660       const RNTupleDescriptor &fNTuple;
0661    public:
0662       class RIterator {
0663       private:
0664          /// The enclosing range's NTuple.
0665          const RNTupleDescriptor &fNTuple;
0666          std::size_t fIndex = 0;
0667       public:
0668          using iterator_category = std::forward_iterator_tag;
0669          using iterator = RIterator;
0670          using value_type = RClusterDescriptor;
0671          using difference_type = std::ptrdiff_t;
0672          using pointer = RClusterDescriptor *;
0673          using reference = const RClusterDescriptor &;
0674 
0675          RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
0676          iterator operator++() { ++fIndex; return *this; }
0677          reference operator*() {
0678             auto it = fNTuple.fClusterDescriptors.begin();
0679             std::advance(it, fIndex);
0680             return it->second;
0681          }
0682          bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0683          bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0684       };
0685 
0686       RClusterDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) { }
0687       RIterator begin() { return RIterator(fNTuple, 0); }
0688       RIterator end() { return RIterator(fNTuple, fNTuple.GetNActiveClusters()); }
0689    };
0690 
0691    RNTupleDescriptor() = default;
0692    RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
0693    RNTupleDescriptor &operator=(const RNTupleDescriptor &other) = delete;
0694    RNTupleDescriptor(RNTupleDescriptor &&other) = default;
0695    RNTupleDescriptor &operator=(RNTupleDescriptor &&other) = default;
0696 
0697    std::unique_ptr<RNTupleDescriptor> Clone() const;
0698 
0699    bool operator ==(const RNTupleDescriptor &other) const;
0700 
0701    std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
0702    std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
0703    std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
0704 
0705    const RFieldDescriptor& GetFieldDescriptor(DescriptorId_t fieldId) const {
0706       return fFieldDescriptors.at(fieldId);
0707    }
0708    const RColumnDescriptor& GetColumnDescriptor(DescriptorId_t columnId) const {
0709       return fColumnDescriptors.at(columnId);
0710    }
0711    const RClusterGroupDescriptor &GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
0712    {
0713       return fClusterGroupDescriptors.at(clusterGroupId);
0714    }
0715    const RClusterDescriptor& GetClusterDescriptor(DescriptorId_t clusterId) const {
0716       return fClusterDescriptors.at(clusterId);
0717    }
0718 
0719    RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor& fieldDesc) const {
0720       return RFieldDescriptorIterable(*this, fieldDesc);
0721    }
0722    RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor& fieldDesc,
0723       const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
0724    {
0725       return RFieldDescriptorIterable(*this, fieldDesc, comparator);
0726    }
0727    RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const {
0728       return GetFieldIterable(GetFieldDescriptor(fieldId));
0729    }
0730    RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId,
0731       const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
0732    {
0733       return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
0734    }
0735    RFieldDescriptorIterable GetTopLevelFields() const {
0736       return GetFieldIterable(GetFieldZeroId());
0737    }
0738    RFieldDescriptorIterable GetTopLevelFields(
0739       const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
0740    {
0741       return GetFieldIterable(GetFieldZeroId(), comparator);
0742    }
0743 
0744    RColumnDescriptorIterable GetColumnIterable() const
0745    {
0746       return RColumnDescriptorIterable(*this);
0747    }
0748    RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
0749    {
0750       return RColumnDescriptorIterable(*this, fieldDesc);
0751    }
0752    RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
0753    {
0754       return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
0755    }
0756 
0757    RClusterGroupDescriptorIterable GetClusterGroupIterable() const { return RClusterGroupDescriptorIterable(*this); }
0758 
0759    RClusterDescriptorIterable GetClusterIterable() const
0760    {
0761       return RClusterDescriptorIterable(*this);
0762    }
0763 
0764    std::string GetName() const { return fName; }
0765    std::string GetDescription() const { return fDescription; }
0766 
0767    std::size_t GetNFields() const { return fFieldDescriptors.size(); }
0768    std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
0769    std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
0770    std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
0771    std::size_t GetNClusters() const { return fNClusters; }
0772    std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
0773 
0774    /// We know the number of entries from adding the cluster summaries
0775    NTupleSize_t GetNEntries() const { return fNEntries; }
0776    NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
0777 
0778    /// Returns the logical parent of all top-level NTuple data fields.
0779    DescriptorId_t GetFieldZeroId() const;
0780    const RFieldDescriptor &GetFieldZero() const { return GetFieldDescriptor(GetFieldZeroId()); }
0781    DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
0782    /// Searches for a top-level field
0783    DescriptorId_t FindFieldId(std::string_view fieldName) const;
0784    DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
0785    DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
0786    DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const;
0787    DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const;
0788    DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const;
0789 
0790    /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
0791    /// In case of invalid field ID, an empty string is returned.
0792    std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
0793 
0794    bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
0795    std::vector<std::uint64_t> GetFeatureFlags() const;
0796 
0797    /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
0798    const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
0799 
0800    /// Methods to load and drop cluster group details (cluster IDs and page locations)
0801    RResult<void> AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
0802    RResult<void> DropClusterGroupDetails(DescriptorId_t clusterGroupId);
0803 
0804    std::uint64_t GetGeneration() const { return fGeneration; }
0805    void IncGeneration() { fGeneration++; }
0806 
0807    /// Re-create the C++ model from the stored meta-data
0808    std::unique_ptr<RNTupleModel> CreateModel() const;
0809    void PrintInfo(std::ostream &output) const;
0810 };
0811 
0812 namespace Internal {
0813 
0814 // clang-format off
0815 /**
0816 \class ROOT::Experimental::Internal::RColumnDescriptorBuilder
0817 \ingroup NTuple
0818 \brief A helper class for piece-wise construction of an RColumnDescriptor
0819 
0820 Dangling column descriptors can become actual descriptors when added to an
0821 RNTupleDescriptorBuilder instance and then linked to their fields.
0822 */
0823 // clang-format on
0824 class RColumnDescriptorBuilder {
0825 private:
0826    RColumnDescriptor fColumn = RColumnDescriptor();
0827 public:
0828    /// Make an empty column descriptor builder.
0829    RColumnDescriptorBuilder() = default;
0830 
0831    RColumnDescriptorBuilder &LogicalColumnId(DescriptorId_t logicalColumnId)
0832    {
0833       fColumn.fLogicalColumnId = logicalColumnId;
0834       return *this;
0835    }
0836    RColumnDescriptorBuilder &PhysicalColumnId(DescriptorId_t physicalColumnId)
0837    {
0838       fColumn.fPhysicalColumnId = physicalColumnId;
0839       return *this;
0840    }
0841    RColumnDescriptorBuilder& Model(const RColumnModel &model) {
0842       fColumn.fModel = model;
0843       return *this;
0844    }
0845    RColumnDescriptorBuilder& FieldId(DescriptorId_t fieldId) {
0846       fColumn.fFieldId = fieldId;
0847       return *this;
0848    }
0849    RColumnDescriptorBuilder& Index(std::uint32_t index) {
0850       fColumn.fIndex = index;
0851       return *this;
0852    }
0853    RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
0854    {
0855       fColumn.fFirstElementIndex = firstElementIdx;
0856       return *this;
0857    }
0858    DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
0859    /// Attempt to make a column descriptor. This may fail if the column
0860    /// was not given enough information to make a proper descriptor.
0861    RResult<RColumnDescriptor> MakeDescriptor() const;
0862 };
0863 
0864 
0865 // clang-format off
0866 /**
0867 \class ROOT::Experimental::Internal::RFieldDescriptorBuilder
0868 \ingroup NTuple
0869 \brief A helper class for piece-wise construction of an RFieldDescriptor
0870 
0871 Dangling field descriptors describe a single field in isolation. They are
0872 missing the necessary relationship information (parent field, any child fields)
0873 required to describe a real NTuple field.
0874 
0875 Dangling field descriptors can only become actual descriptors when added to an
0876 RNTupleDescriptorBuilder instance and then linked to other fields.
0877 */
0878 // clang-format on
0879 class RFieldDescriptorBuilder {
0880 private:
0881    RFieldDescriptor fField = RFieldDescriptor();
0882 public:
0883    /// Make an empty dangling field descriptor.
0884    RFieldDescriptorBuilder() = default;
0885    /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
0886    /// Relationship information is lost during the conversion to a
0887    /// dangling descriptor:
0888    /// * Parent id is reset to an invalid id.
0889    /// * Field children ids are forgotten.
0890    ///
0891    /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
0892    explicit RFieldDescriptorBuilder(const RFieldDescriptor& fieldDesc);
0893 
0894    /// Make a new RFieldDescriptorBuilder based off a live NTuple field.
0895    static RFieldDescriptorBuilder FromField(const RFieldBase &field);
0896 
0897    RFieldDescriptorBuilder& FieldId(DescriptorId_t fieldId) {
0898       fField.fFieldId = fieldId;
0899       return *this;
0900    }
0901    RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
0902    {
0903       fField.fFieldVersion = fieldVersion;
0904       return *this;
0905    }
0906    RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
0907    {
0908       fField.fTypeVersion = typeVersion;
0909       return *this;
0910    }
0911    RFieldDescriptorBuilder& ParentId(DescriptorId_t id) {
0912       fField.fParentId = id;
0913       return *this;
0914    }
0915    RFieldDescriptorBuilder& FieldName(const std::string& fieldName) {
0916       fField.fFieldName = fieldName;
0917       return *this;
0918    }
0919    RFieldDescriptorBuilder& FieldDescription(const std::string& fieldDescription) {
0920       fField.fFieldDescription = fieldDescription;
0921       return *this;
0922    }
0923    RFieldDescriptorBuilder& TypeName(const std::string& typeName) {
0924       fField.fTypeName = typeName;
0925       return *this;
0926    }
0927    RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
0928    {
0929       fField.fTypeAlias = typeAlias;
0930       return *this;
0931    }
0932    RFieldDescriptorBuilder& NRepetitions(std::uint64_t nRepetitions) {
0933       fField.fNRepetitions = nRepetitions;
0934       return *this;
0935    }
0936    RFieldDescriptorBuilder& Structure(const ENTupleStructure& structure) {
0937       fField.fStructure = structure;
0938       return *this;
0939    }
0940    DescriptorId_t GetParentId() const { return fField.fParentId; }
0941    /// Attempt to make a field descriptor. This may fail if the dangling field
0942    /// was not given enough information to make a proper descriptor.
0943    RResult<RFieldDescriptor> MakeDescriptor() const;
0944 };
0945 
0946 
0947 // clang-format off
0948 /**
0949 \class ROOT::Experimental::Internal::RClusterDescriptorBuilder
0950 \ingroup NTuple
0951 \brief A helper class for piece-wise construction of an RClusterDescriptor
0952 
0953 The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
0954 piecewise addition of page locations.
0955 */
0956 // clang-format on
0957 class RClusterDescriptorBuilder {
0958 private:
0959    RClusterDescriptor fCluster;
0960 
0961 public:
0962    RClusterDescriptorBuilder &ClusterId(DescriptorId_t clusterId)
0963    {
0964       fCluster.fClusterId = clusterId;
0965       return *this;
0966    }
0967 
0968    RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
0969    {
0970       fCluster.fFirstEntryIndex = firstEntryIndex;
0971       return *this;
0972    }
0973 
0974    RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
0975    {
0976       fCluster.fNEntries = nEntries;
0977       return *this;
0978    }
0979 
0980    RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
0981                                    std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
0982 
0983    /// Add column and page ranges for deferred columns missing in this cluster.  The locator type for the synthesized
0984    /// page ranges is `kTypePageZero`.  All the page sources must be able to populate the 'zero' page from such locator.
0985    /// Any call to `CommitColumnRange()` should happen before calling this function.
0986    RClusterDescriptorBuilder &AddDeferredColumnRanges(const RNTupleDescriptor &desc);
0987 
0988    /// Move out the full cluster descriptor including page locations
0989    RResult<RClusterDescriptor> MoveDescriptor();
0990 };
0991 
0992 // clang-format off
0993 /**
0994 \class ROOT::Experimental::Internal::RClusterGroupDescriptorBuilder
0995 \ingroup NTuple
0996 \brief A helper class for piece-wise construction of an RClusterGroupDescriptor
0997 */
0998 // clang-format on
0999 class RClusterGroupDescriptorBuilder {
1000 private:
1001    RClusterGroupDescriptor fClusterGroup;
1002 
1003 public:
1004    RClusterGroupDescriptorBuilder() = default;
1005    static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc);
1006 
1007    RClusterGroupDescriptorBuilder &ClusterGroupId(DescriptorId_t clusterGroupId)
1008    {
1009       fClusterGroup.fClusterGroupId = clusterGroupId;
1010       return *this;
1011    }
1012    RClusterGroupDescriptorBuilder &PageListLocator(const RNTupleLocator &pageListLocator)
1013    {
1014       fClusterGroup.fPageListLocator = pageListLocator;
1015       return *this;
1016    }
1017    RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1018    {
1019       fClusterGroup.fPageListLength = pageListLength;
1020       return *this;
1021    }
1022    RClusterGroupDescriptorBuilder &MinEntry(std::uint64_t minEntry)
1023    {
1024       fClusterGroup.fMinEntry = minEntry;
1025       return *this;
1026    }
1027    RClusterGroupDescriptorBuilder &EntrySpan(std::uint64_t entrySpan)
1028    {
1029       fClusterGroup.fEntrySpan = entrySpan;
1030       return *this;
1031    }
1032    RClusterGroupDescriptorBuilder &NClusters(std::uint32_t nClusters)
1033    {
1034       fClusterGroup.fNClusters = nClusters;
1035       return *this;
1036    }
1037    void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
1038    {
1039       if (clusterIds.size() != fClusterGroup.GetNClusters())
1040          throw RException(R__FAIL("mismatch of number of clusters"));
1041       fClusterGroup.fClusterIds = clusterIds;
1042    }
1043 
1044    RResult<RClusterGroupDescriptor> MoveDescriptor();
1045 };
1046 
1047 // clang-format off
1048 /**
1049 \class ROOT::Experimental::Internal::RColumnGroupDescriptorBuilder
1050 \ingroup NTuple
1051 \brief A helper class for piece-wise construction of an RColumnGroupDescriptor
1052 */
1053 // clang-format on
1054 class RColumnGroupDescriptorBuilder {
1055 private:
1056    RColumnGroupDescriptor fColumnGroup;
1057 
1058 public:
1059    RColumnGroupDescriptorBuilder() = default;
1060 
1061    RColumnGroupDescriptorBuilder &ColumnGroupId(DescriptorId_t columnGroupId)
1062    {
1063       fColumnGroup.fColumnGroupId = columnGroupId;
1064       return *this;
1065    }
1066    void AddColumn(DescriptorId_t physicalId) { fColumnGroup.fPhysicalColumnIds.insert(physicalId); }
1067 
1068    RResult<RColumnGroupDescriptor> MoveDescriptor();
1069 };
1070 
1071 // clang-format off
1072 /**
1073 \class ROOT::Experimental::Internal::RNTupleDescriptorBuilder
1074 \ingroup NTuple
1075 \brief A helper class for piece-wise construction of an RNTupleDescriptor
1076 
1077 Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1078 */
1079 // clang-format on
1080 class RNTupleDescriptorBuilder {
1081 private:
1082    RNTupleDescriptor fDescriptor;
1083    RResult<void> EnsureFieldExists(DescriptorId_t fieldId) const;
1084 public:
1085    /// Checks whether invariants hold:
1086    /// * NTuple name is valid
1087    /// * Fields have valid parent and child ids
1088    RResult<void> EnsureValidDescriptor() const;
1089    const RNTupleDescriptor& GetDescriptor() const { return fDescriptor; }
1090    RNTupleDescriptor MoveDescriptor();
1091 
1092    void SetNTuple(const std::string_view name, const std::string_view description);
1093    void SetFeature(unsigned int flag);
1094 
1095    void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1096    void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1097    /// The real footer size also include the page list envelopes
1098    void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1099 
1100    void AddField(const RFieldDescriptor& fieldDesc);
1101    RResult<void> AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId);
1102 
1103    void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId,
1104                   const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx = 0U);
1105    RResult<void> AddColumn(RColumnDescriptor &&columnDesc);
1106 
1107    RResult<void> AddClusterGroup(RClusterGroupDescriptor &&clusterGroup);
1108    RResult<void> AddCluster(RClusterDescriptor &&clusterDesc);
1109 
1110    /// Clears so-far stored clusters, fields, and columns and return to a pristine ntuple descriptor
1111    void Reset();
1112 
1113    /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1114    /// annotated as begin part of the header extension.
1115    void BeginHeaderExtension();
1116 };
1117 
1118 } // namespace Internal
1119 } // namespace Experimental
1120 } // namespace ROOT
1121 
1122 #endif // ROOT7_RNTupleDescriptor