Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-11-11 10:17:37

0001 /// \file ROOT/RNTupleDescriptor.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
0005 /// \date 2018-07-19
0006 
0007 /*************************************************************************
0008  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0009  * All rights reserved.                                                  *
0010  *                                                                       *
0011  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0012  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0013  *************************************************************************/
0014 
0015 #ifndef ROOT_RNTupleDescriptor
0016 #define ROOT_RNTupleDescriptor
0017 
0018 #include <ROOT/RCreateFieldOptions.hxx>
0019 #include <ROOT/RError.hxx>
0020 #include <ROOT/RNTupleSerialize.hxx>
0021 #include <ROOT/RNTupleUtil.hxx>
0022 #include <ROOT/RSpan.hxx>
0023 
0024 #include <TError.h>
0025 
0026 #include <algorithm>
0027 #include <chrono>
0028 #include <cmath>
0029 #include <functional>
0030 #include <iterator>
0031 #include <map>
0032 #include <memory>
0033 #include <optional>
0034 #include <ostream>
0035 #include <vector>
0036 #include <set>
0037 #include <string>
0038 #include <string_view>
0039 #include <unordered_map>
0040 #include <unordered_set>
0041 
0042 namespace ROOT {
0043 
0044 class RFieldBase;
0045 class RNTupleModel;
0046 
0047 namespace Internal {
0048 class RColumnElementBase;
0049 }
0050 
0051 class RNTupleDescriptor;
0052 
0053 namespace Internal {
0054 class RColumnDescriptorBuilder;
0055 class RClusterDescriptorBuilder;
0056 class RClusterGroupDescriptorBuilder;
0057 class RExtraTypeInfoDescriptorBuilder;
0058 class RFieldDescriptorBuilder;
0059 class RNTupleDescriptorBuilder;
0060 
0061 RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
0062 } // namespace Internal
0063 
0064 // clang-format off
0065 /**
0066 \class ROOT::RFieldDescriptor
0067 \ingroup NTuple
0068 \brief Metadata stored for every field of an RNTuple
0069 */
0070 // clang-format on
0071 class RFieldDescriptor final {
0072    friend class Internal::RNTupleDescriptorBuilder;
0073    friend class Internal::RFieldDescriptorBuilder;
0074 
0075 private:
0076    ROOT::DescriptorId_t fFieldId = ROOT::kInvalidDescriptorId;
0077    /// The version of the C++-type-to-column translation mechanics
0078    std::uint32_t fFieldVersion = 0;
0079    /// The version of the C++ type itself
0080    std::uint32_t fTypeVersion = 0;
0081    /// The leaf name, not including parent fields
0082    std::string fFieldName;
0083    /// Free text set by the user
0084    std::string fFieldDescription;
0085    /// The C++ type that was used when writing the field
0086    std::string fTypeName;
0087    /// A typedef or using directive that resolved to the type name during field creation
0088    std::string fTypeAlias;
0089    /// The number of elements per entry for fixed-size arrays
0090    std::uint64_t fNRepetitions = 0;
0091    /// The structural information carried by this field in the data model tree
0092    ROOT::ENTupleStructure fStructure = ROOT::ENTupleStructure::kInvalid;
0093    /// Establishes sub field relationships, such as classes and collections
0094    ROOT::DescriptorId_t fParentId = ROOT::kInvalidDescriptorId;
0095    /// For projected fields, the source field ID
0096    ROOT::DescriptorId_t fProjectionSourceId = ROOT::kInvalidDescriptorId;
0097    /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
0098    /// order of sub fields.
0099    std::vector<ROOT::DescriptorId_t> fLinkIds;
0100    /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
0101    /// list of logical column ids. For example, the second column of the third column representation is
0102    /// fLogicalColumnIds[2 * fColumnCardinality + 1]
0103    std::uint32_t fColumnCardinality = 0;
0104    /// The ordered list of columns attached to this field: first by representation index then by column index.
0105    std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
0106    /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
0107    /// identify types by their checksum
0108    std::optional<std::uint32_t> fTypeChecksum;
0109 
0110 public:
0111    RFieldDescriptor() = default;
0112    RFieldDescriptor(const RFieldDescriptor &other) = delete;
0113    RFieldDescriptor &operator=(const RFieldDescriptor &other) = delete;
0114    RFieldDescriptor(RFieldDescriptor &&other) = default;
0115    RFieldDescriptor &operator=(RFieldDescriptor &&other) = default;
0116 
0117    bool operator==(const RFieldDescriptor &other) const;
0118    /// Get a copy of the descriptor
0119    RFieldDescriptor Clone() const;
0120 
0121    /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
0122    /// access to sub fields, which is provided by the RNTupleDescriptor argument.
0123    std::unique_ptr<ROOT::RFieldBase>
0124    CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
0125 
0126    ROOT::DescriptorId_t GetId() const { return fFieldId; }
0127    std::uint32_t GetFieldVersion() const { return fFieldVersion; }
0128    std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0129    const std::string &GetFieldName() const { return fFieldName; }
0130    const std::string &GetFieldDescription() const { return fFieldDescription; }
0131    const std::string &GetTypeName() const { return fTypeName; }
0132    const std::string &GetTypeAlias() const { return fTypeAlias; }
0133    std::uint64_t GetNRepetitions() const { return fNRepetitions; }
0134    ROOT::ENTupleStructure GetStructure() const { return fStructure; }
0135    ROOT::DescriptorId_t GetParentId() const { return fParentId; }
0136    ROOT::DescriptorId_t GetProjectionSourceId() const { return fProjectionSourceId; }
0137    const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
0138    const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
0139    std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
0140    std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
0141    bool IsProjectedField() const { return fProjectionSourceId != ROOT::kInvalidDescriptorId; }
0142    /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
0143    /// natively supported stdlib classes.
0144    /// The dictionary does not need to be available for this method.
0145    bool IsCustomClass() const;
0146 };
0147 
0148 // clang-format off
0149 /**
0150 \class ROOT::RColumnDescriptor
0151 \ingroup NTuple
0152 \brief Metadata stored for every column of an RNTuple
0153 */
0154 // clang-format on
0155 class RColumnDescriptor final {
0156    friend class Internal::RColumnDescriptorBuilder;
0157    friend class Internal::RNTupleDescriptorBuilder;
0158 
0159 public:
0160    struct RValueRange {
0161       double fMin = 0, fMax = 0;
0162 
0163       RValueRange() = default;
0164       RValueRange(double min, double max) : fMin(min), fMax(max) {}
0165       RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
0166 
0167       bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
0168       bool operator!=(RValueRange other) const { return !(*this == other); }
0169    };
0170 
0171 private:
0172    /// The actual column identifier, which is the link to the corresponding field
0173    ROOT::DescriptorId_t fLogicalColumnId = ROOT::kInvalidDescriptorId;
0174    /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
0175    ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0176    /// Every column belongs to one and only one field
0177    ROOT::DescriptorId_t fFieldId = ROOT::kInvalidDescriptorId;
0178    /// The absolute value specifies the index for the first stored element for this column.
0179    /// For deferred columns the absolute value is larger than zero.
0180    /// Negative values specify a suppressed and deferred column.
0181    std::int64_t fFirstElementIndex = 0U;
0182    /// A field can be serialized into several columns, which are numbered from zero to $n$
0183    std::uint32_t fIndex = 0;
0184    /// A field may use multiple column representations, which are numbered from zero to $m$.
0185    /// Every representation has the same number of columns.
0186    std::uint16_t fRepresentationIndex = 0;
0187    /// The size in bits of elements of this column. Most columns have the size fixed by their type
0188    /// but low-precision float columns have variable bit widths.
0189    std::uint16_t fBitsOnStorage = 0;
0190    /// The on-disk column type
0191    ROOT::ENTupleColumnType fType = ROOT::ENTupleColumnType::kUnknown;
0192    /// Optional value range (used e.g. by quantized real fields)
0193    std::optional<RValueRange> fValueRange;
0194 
0195 public:
0196    RColumnDescriptor() = default;
0197    RColumnDescriptor(const RColumnDescriptor &other) = delete;
0198    RColumnDescriptor &operator=(const RColumnDescriptor &other) = delete;
0199    RColumnDescriptor(RColumnDescriptor &&other) = default;
0200    RColumnDescriptor &operator=(RColumnDescriptor &&other) = default;
0201 
0202    bool operator==(const RColumnDescriptor &other) const;
0203    /// Get a copy of the descriptor
0204    RColumnDescriptor Clone() const;
0205 
0206    ROOT::DescriptorId_t GetLogicalId() const { return fLogicalColumnId; }
0207    ROOT::DescriptorId_t GetPhysicalId() const { return fPhysicalColumnId; }
0208    ROOT::DescriptorId_t GetFieldId() const { return fFieldId; }
0209    std::uint32_t GetIndex() const { return fIndex; }
0210    std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
0211    std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
0212    std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
0213    ROOT::ENTupleColumnType GetType() const { return fType; }
0214    std::optional<RValueRange> GetValueRange() const { return fValueRange; }
0215    bool IsAliasColumn() const { return fPhysicalColumnId != fLogicalColumnId; }
0216    bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
0217    bool IsSuppressedDeferredColumn() const { return fFirstElementIndex < 0; }
0218 };
0219 
0220 // clang-format off
0221 /**
0222 \class ROOT::RClusterDescriptor
0223 \ingroup NTuple
0224 \brief Metadata for RNTuple clusters
0225 
0226 The cluster descriptor is built in two phases.  In a first phase, the descriptor has only an ID.
0227 In a second phase, the event range, column group, page locations and column ranges are added.
0228 Both phases are populated by the RClusterDescriptorBuilder.
0229 Clusters span across all available columns in the RNTuple.
0230 */
0231 // clang-format on
0232 class RClusterDescriptor final {
0233    friend class Internal::RClusterDescriptorBuilder;
0234 
0235 public:
0236    // clang-format off
0237    /**
0238    \class ROOT::RClusterDescriptor::RColumnRange
0239    \ingroup NTuple
0240    \brief The window of element indexes of a particular column in a particular cluster
0241    */
0242    // clang-format on
0243    class RColumnRange final {
0244       ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0245       /// The global index of the first column element in the cluster
0246       ROOT::NTupleSize_t fFirstElementIndex = ROOT::kInvalidNTupleIndex;
0247       /// The number of column elements in the cluster
0248       ROOT::NTupleSize_t fNElements = ROOT::kInvalidNTupleIndex;
0249       /// The usual format for ROOT compression settings (see Compression.h).
0250       /// The pages of a particular column in a particular cluster are all compressed with the same settings.
0251       /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
0252       std::optional<std::uint32_t> fCompressionSettings;
0253       /// Suppressed columns have an empty page range and unknown compression settings.
0254       /// Their element index range, however, is aligned with the corresponding column of the
0255       /// primary column representation (see Section "Suppressed Columns" in the specification)
0256       bool fIsSuppressed = false;
0257 
0258       // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
0259       // Should this be done on the field level?
0260 
0261    public:
0262       RColumnRange() = default;
0263 
0264       RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex,
0265                    ROOT::NTupleSize_t nElements, std::optional<std::uint32_t> compressionSettings,
0266                    bool suppressed = false)
0267          : fPhysicalColumnId(physicalColumnId),
0268            fFirstElementIndex(firstElementIndex),
0269            fNElements(nElements),
0270            fCompressionSettings(compressionSettings),
0271            fIsSuppressed(suppressed)
0272       {
0273       }
0274 
0275       ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
0276       void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
0277 
0278       ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0279       void SetFirstElementIndex(ROOT::NTupleSize_t idx) { fFirstElementIndex = idx; }
0280       void IncrementFirstElementIndex(ROOT::NTupleSize_t by) { fFirstElementIndex += by; }
0281 
0282       ROOT::NTupleSize_t GetNElements() const { return fNElements; }
0283       void SetNElements(ROOT::NTupleSize_t n) { fNElements = n; }
0284       void IncrementNElements(ROOT::NTupleSize_t by) { fNElements += by; }
0285 
0286       std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
0287       void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
0288 
0289       bool IsSuppressed() const { return fIsSuppressed; }
0290       void SetIsSuppressed(bool suppressed) { fIsSuppressed = suppressed; }
0291 
0292       bool operator==(const RColumnRange &other) const
0293       {
0294          return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
0295                 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings &&
0296                 fIsSuppressed == other.fIsSuppressed;
0297       }
0298 
0299       bool Contains(ROOT::NTupleSize_t index) const
0300       {
0301          return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
0302       }
0303    };
0304 
0305    // clang-format off
0306    /**
0307    \class ROOT::RClusterDescriptor::RPageInfo
0308    \ingroup NTuple
0309    \brief Information about a single page in the context of a cluster's page range.
0310    */
0311    // clang-format on
0312    // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
0313    // the page belongs
0314    struct RPageInfo {
0315    private:
0316       /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
0317       std::uint32_t fNElements = std::uint32_t(-1);
0318       /// The meaning of `fLocator` depends on the storage backend.
0319       RNTupleLocator fLocator;
0320       /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
0321       bool fHasChecksum = false;
0322 
0323    public:
0324       RPageInfo() = default;
0325       RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
0326          : fNElements(nElements), fLocator(locator), fHasChecksum(hasChecksum)
0327       {
0328       }
0329 
0330       bool operator==(const RPageInfo &other) const
0331       {
0332          return fNElements == other.fNElements && fLocator == other.fLocator;
0333       }
0334 
0335       std::uint32_t GetNElements() const { return fNElements; }
0336       void SetNElements(std::uint32_t n) { fNElements = n; }
0337 
0338       const RNTupleLocator &GetLocator() const { return fLocator; }
0339       RNTupleLocator &GetLocator() { return fLocator; }
0340       void SetLocator(const RNTupleLocator &locator) { fLocator = locator; }
0341 
0342       bool HasChecksum() const { return fHasChecksum; }
0343       void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; }
0344    };
0345 
0346    // clang-format off
0347    /**
0348    \class ROOT::RClusterDescriptor::RPageInfoExtended
0349    \ingroup NTuple
0350    \brief Additional information about a page in an in-memory RPageRange.
0351 
0352    Used by RPageRange::Find() to return information relative to the RPageRange.  This information is not stored on disk
0353    and we don't need to keep it in memory because it can be easily recomputed.
0354    */
0355    // clang-format on
0356    struct RPageInfoExtended final : RPageInfo {
0357    private:
0358       /// Index (in cluster) of the first element in page.
0359       ROOT::NTupleSize_t fFirstElementIndex = 0;
0360       /// Page number in the corresponding RPageRange.
0361       ROOT::NTupleSize_t fPageNumber = 0;
0362 
0363    public:
0364       RPageInfoExtended() = default;
0365       RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
0366          : RPageInfo(pageInfo), fFirstElementIndex(firstElementIndex), fPageNumber(pageNumber)
0367       {
0368       }
0369 
0370       ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0371       void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage) { fFirstElementIndex = firstInPage; }
0372 
0373       ROOT::NTupleSize_t GetPageNumber() const { return fPageNumber; }
0374       void SetPageNumber(ROOT::NTupleSize_t pageNumber) { fPageNumber = pageNumber; }
0375    };
0376 
0377    // clang-format off
0378    /**
0379    \class ROOT::RClusterDescriptor::RPageRange
0380    \ingroup NTuple
0381    \brief Records the partition of data into pages for a particular column in a particular cluster
0382    */
0383    // clang-format on
0384    class RPageRange final {
0385       friend class Internal::RClusterDescriptorBuilder;
0386 
0387    private:
0388       /// \brief Extend this RPageRange to fit the given RColumnRange.
0389       ///
0390       /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
0391       /// RPageInfos are constructed to contain as many elements of type `element` given a page size
0392       /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
0393       /// This function is used to make up RPageRanges for clusters that contain deferred columns.
0394       /// \return The number of column elements covered by the synthesized RPageInfos
0395       std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
0396                                          const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize);
0397 
0398       /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
0399       /// up to and including a given index. Used for binary search in Find().
0400       std::vector<ROOT::NTupleSize_t> fCumulativeNElements;
0401 
0402       ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0403       std::vector<RPageInfo> fPageInfos;
0404 
0405    public:
0406       RPageRange() = default;
0407       RPageRange(const RPageRange &other) = delete;
0408       RPageRange &operator=(const RPageRange &other) = delete;
0409       RPageRange(RPageRange &&other) = default;
0410       RPageRange &operator=(RPageRange &&other) = default;
0411 
0412       RPageRange Clone() const
0413       {
0414          RPageRange clone;
0415          clone.fPhysicalColumnId = fPhysicalColumnId;
0416          clone.fPageInfos = fPageInfos;
0417          clone.fCumulativeNElements = fCumulativeNElements;
0418          return clone;
0419       }
0420 
0421       /// Find the page in the RPageRange that contains the given element. The element must exist.
0422       RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const;
0423 
0424       ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
0425       void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
0426 
0427       const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
0428       std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
0429 
0430       bool operator==(const RPageRange &other) const
0431       {
0432          return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
0433       }
0434    };
0435 
0436 private:
0437    ROOT::DescriptorId_t fClusterId = ROOT::kInvalidDescriptorId;
0438    /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
0439    ROOT::NTupleSize_t fFirstEntryIndex = ROOT::kInvalidNTupleIndex;
0440    ROOT::NTupleSize_t fNEntries = ROOT::kInvalidNTupleIndex;
0441 
0442    std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
0443    std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
0444 
0445 public:
0446    class RColumnRangeIterable;
0447 
0448    RClusterDescriptor() = default;
0449    RClusterDescriptor(const RClusterDescriptor &other) = delete;
0450    RClusterDescriptor &operator=(const RClusterDescriptor &other) = delete;
0451    RClusterDescriptor(RClusterDescriptor &&other) = default;
0452    RClusterDescriptor &operator=(RClusterDescriptor &&other) = default;
0453 
0454    RClusterDescriptor Clone() const;
0455 
0456    bool operator==(const RClusterDescriptor &other) const;
0457 
0458    ROOT::DescriptorId_t GetId() const { return fClusterId; }
0459    ROOT::NTupleSize_t GetFirstEntryIndex() const { return fFirstEntryIndex; }
0460    ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
0461    const RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
0462    const RPageRange &GetPageRange(ROOT::DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
0463    /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
0464    RColumnRangeIterable GetColumnRangeIterable() const;
0465    bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
0466    {
0467       return fColumnRanges.find(physicalId) != fColumnRanges.end();
0468    }
0469    std::uint64_t GetNBytesOnStorage() const;
0470 };
0471 
0472 class RClusterDescriptor::RColumnRangeIterable final {
0473 private:
0474    const RClusterDescriptor &fDesc;
0475 
0476 public:
0477    class RIterator final {
0478    private:
0479       using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
0480       /// The wrapped map iterator
0481       Iter_t fIter;
0482 
0483    public:
0484       using iterator_category = std::forward_iterator_tag;
0485       using iterator = RIterator;
0486       using value_type = RColumnRange;
0487       using difference_type = std::ptrdiff_t;
0488       using pointer = const RColumnRange *;
0489       using reference = const RColumnRange &;
0490 
0491       RIterator(Iter_t iter) : fIter(iter) {}
0492       iterator operator++()
0493       {
0494          ++fIter;
0495          return *this;
0496       }
0497       reference operator*() { return fIter->second; }
0498       pointer operator->() { return &fIter->second; }
0499       bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
0500       bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
0501    };
0502 
0503    explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
0504 
0505    RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
0506    RIterator end() { return fDesc.fColumnRanges.cend(); }
0507    size_t size() { return fDesc.fColumnRanges.size(); }
0508 };
0509 
0510 // clang-format off
0511 /**
0512 \class ROOT::RClusterGroupDescriptor
0513 \ingroup NTuple
0514 \brief Clusters are bundled in cluster groups.
0515 
0516 Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
0517 Every RNTuple has at least one cluster group.  The clusters in a cluster group are ordered
0518 corresponding to their first entry number.
0519 */
0520 // clang-format on
0521 class RClusterGroupDescriptor final {
0522    friend class Internal::RClusterGroupDescriptorBuilder;
0523 
0524 private:
0525    ROOT::DescriptorId_t fClusterGroupId = ROOT::kInvalidDescriptorId;
0526    /// The cluster IDs can be empty if the corresponding page list is not loaded.
0527    /// Otherwise, cluster ids are sorted by first entry number.
0528    std::vector<ROOT::DescriptorId_t> fClusterIds;
0529    /// The page list that corresponds to the cluster group
0530    RNTupleLocator fPageListLocator;
0531    /// Uncompressed size of the page list
0532    std::uint64_t fPageListLength = 0;
0533    /// The minimum first entry number of the clusters in the cluster group
0534    std::uint64_t fMinEntry = 0;
0535    /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
0536    std::uint64_t fEntrySpan = 0;
0537    /// Number of clusters is always known even if the cluster IDs are not (yet) populated
0538    std::uint32_t fNClusters = 0;
0539 
0540 public:
0541    RClusterGroupDescriptor() = default;
0542    RClusterGroupDescriptor(const RClusterGroupDescriptor &other) = delete;
0543    RClusterGroupDescriptor &operator=(const RClusterGroupDescriptor &other) = delete;
0544    RClusterGroupDescriptor(RClusterGroupDescriptor &&other) = default;
0545    RClusterGroupDescriptor &operator=(RClusterGroupDescriptor &&other) = default;
0546 
0547    RClusterGroupDescriptor Clone() const;
0548    /// Creates a clone without the cluster IDs
0549    RClusterGroupDescriptor CloneSummary() const;
0550 
0551    bool operator==(const RClusterGroupDescriptor &other) const;
0552 
0553    ROOT::DescriptorId_t GetId() const { return fClusterGroupId; }
0554    std::uint32_t GetNClusters() const { return fNClusters; }
0555    RNTupleLocator GetPageListLocator() const { return fPageListLocator; }
0556    std::uint64_t GetPageListLength() const { return fPageListLength; }
0557    const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
0558    std::uint64_t GetMinEntry() const { return fMinEntry; }
0559    std::uint64_t GetEntrySpan() const { return fEntrySpan; }
0560    /// A cluster group is loaded in two stages. Stage one loads only the summary information.
0561    /// Stage two loads the list of cluster IDs.
0562    bool HasClusterDetails() const { return !fClusterIds.empty(); }
0563 };
0564 
0565 /// Used in RExtraTypeInfoDescriptor
0566 enum class EExtraTypeInfoIds {
0567    kInvalid,
0568    kStreamerInfo
0569 };
0570 
0571 // clang-format off
0572 /**
0573 \class ROOT::RExtraTypeInfoDescriptor
0574 \ingroup NTuple
0575 \brief Field specific extra type information from the header / extenstion header
0576 
0577 Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
0578 */
0579 // clang-format on
0580 class RExtraTypeInfoDescriptor final {
0581    friend class Internal::RExtraTypeInfoDescriptorBuilder;
0582 
0583 private:
0584    /// Specifies the meaning of the extra information
0585    EExtraTypeInfoIds fContentId = EExtraTypeInfoIds::kInvalid;
0586    /// Type version the extra type information is bound to
0587    std::uint32_t fTypeVersion = 0;
0588    /// The type name the extra information refers to; empty for RNTuple-wide extra information
0589    std::string fTypeName;
0590    /// The content format depends on the content ID and may be binary
0591    std::string fContent;
0592 
0593 public:
0594    RExtraTypeInfoDescriptor() = default;
0595    RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other) = delete;
0596    RExtraTypeInfoDescriptor &operator=(const RExtraTypeInfoDescriptor &other) = delete;
0597    RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other) = default;
0598    RExtraTypeInfoDescriptor &operator=(RExtraTypeInfoDescriptor &&other) = default;
0599 
0600    bool operator==(const RExtraTypeInfoDescriptor &other) const;
0601 
0602    RExtraTypeInfoDescriptor Clone() const;
0603 
0604    EExtraTypeInfoIds GetContentId() const { return fContentId; }
0605    std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0606    const std::string &GetTypeName() const { return fTypeName; }
0607    const std::string &GetContent() const { return fContent; }
0608 };
0609 
0610 // clang-format off
0611 /**
0612 \class ROOT::RNTupleDescriptor
0613 \ingroup NTuple
0614 \brief The on-storage metadata of an RNTuple
0615 
0616 Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
0617 potentially multiple page lists.
0618 The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
0619 The footer carries information about one or several cluster groups and links to their page lists.
0620 For every cluster group, a page list envelope stores cluster summaries and page locations.
0621 For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
0622 locations.
0623 
0624 The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
0625 for RNTuple objects (pages, clusters, ...).  It is supposed to be usable by all RPageStorage implementations.
0626 
0627 The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
0628 the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
0629 Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
0630 and backward compatibility when the metadata evolves.
0631 */
0632 // clang-format on
0633 class RNTupleDescriptor final {
0634    friend class Internal::RNTupleDescriptorBuilder;
0635    friend RNTupleDescriptor Internal::CloneDescriptorSchema(const RNTupleDescriptor &desc);
0636 
0637 public:
0638    class RHeaderExtension;
0639 
0640 private:
0641    /// The RNTuple name needs to be unique in a given storage location (file)
0642    std::string fName;
0643    /// Free text from the user
0644    std::string fDescription;
0645 
0646    ROOT::DescriptorId_t fFieldZeroId = ROOT::kInvalidDescriptorId; ///< Set by the descriptor builder
0647 
0648    std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
0649 
0650    std::set<unsigned int> fFeatureFlags;
0651    std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
0652    std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
0653 
0654    std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
0655    std::unique_ptr<RHeaderExtension> fHeaderExtension;
0656 
0657    //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
0658    //// (see CloneSchema())
0659 
0660    std::uint16_t fVersionEpoch = 0; ///< Set by the descriptor builder when deserialized
0661    std::uint16_t fVersionMajor = 0; ///< Set by the descriptor builder when deserialized
0662    std::uint16_t fVersionMinor = 0; ///< Set by the descriptor builder when deserialized
0663    std::uint16_t fVersionPatch = 0; ///< Set by the descriptor builder when deserialized
0664 
0665    std::uint64_t fOnDiskHeaderSize = 0;    ///< Set by the descriptor builder when deserialized
0666    std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
0667    std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
0668 
0669    std::uint64_t fNEntries = 0;  ///< Updated by the descriptor builder when the cluster groups are added
0670    std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
0671 
0672    /// \brief The generation of the descriptor
0673    ///
0674    /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
0675    /// active page locations.  During the lifetime of the descriptor, page location information for clusters
0676    /// can be added or removed.  When this happens, the generation should be increased, so that users of the
0677    /// descriptor know that the information changed.  The generation is increased, e.g., by the page source's
0678    /// exclusive lock guard around the descriptor.  It is used, e.g., by the descriptor cache in RNTupleReader.
0679    std::uint64_t fGeneration = 0;
0680 
0681    std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
0682    /// References cluster groups sorted by entry range and thus allows for binary search.
0683    /// Note that this list is empty during the descriptor building process and will only be
0684    /// created when the final descriptor is extracted from the builder.
0685    std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
0686    /// Potentially a subset of all the available clusters
0687    std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
0688 
0689    // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
0690    ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const;
0691 
0692    /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
0693    /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
0694    /// when merging two RNTuples.
0695    RNTupleDescriptor CloneSchema() const;
0696 
0697 public:
0698    static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
0699 
0700    class RColumnDescriptorIterable;
0701    class RFieldDescriptorIterable;
0702    class RClusterGroupDescriptorIterable;
0703    class RClusterDescriptorIterable;
0704    class RExtraTypeInfoDescriptorIterable;
0705 
0706    /// Modifiers passed to CreateModel()
0707    struct RCreateModelOptions {
0708    private:
0709       /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
0710       /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
0711       bool fReconstructProjections = false;
0712       /// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
0713       /// or an unknown field structural role.
0714       /// If this option is enabled, the model will be created and all fields containing unknown data (directly
0715       /// or indirectly) will be skipped instead.
0716       bool fForwardCompatible = false;
0717       /// If true, the model will be created without a default entry (bare model).
0718       bool fCreateBare = false;
0719       /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
0720       /// as record fields from the on-disk information; otherwise, they will cause an error.
0721       bool fEmulateUnknownTypes = false;
0722 
0723    public:
0724       RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
0725 
0726       void SetReconstructProjections(bool v) { fReconstructProjections = v; }
0727       bool GetReconstructProjections() const { return fReconstructProjections; }
0728 
0729       void SetForwardCompatible(bool v) { fForwardCompatible = v; }
0730       bool GetForwardCompatible() const { return fForwardCompatible; }
0731 
0732       void SetCreateBare(bool v) { fCreateBare = v; }
0733       bool GetCreateBare() const { return fCreateBare; }
0734 
0735       void SetEmulateUnknownTypes(bool v) { fEmulateUnknownTypes = v; }
0736       bool GetEmulateUnknownTypes() const { return fEmulateUnknownTypes; }
0737    };
0738 
0739    RNTupleDescriptor() = default;
0740    RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
0741    RNTupleDescriptor &operator=(const RNTupleDescriptor &other) = delete;
0742    RNTupleDescriptor(RNTupleDescriptor &&other) = default;
0743    RNTupleDescriptor &operator=(RNTupleDescriptor &&other) = default;
0744 
0745    RNTupleDescriptor Clone() const;
0746 
0747    bool operator==(const RNTupleDescriptor &other) const;
0748 
0749    std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
0750    std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
0751    std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
0752 
0753    const RFieldDescriptor &GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
0754    {
0755       return fFieldDescriptors.at(fieldId);
0756    }
0757    const RColumnDescriptor &GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
0758    {
0759       return fColumnDescriptors.at(columnId);
0760    }
0761    const RClusterGroupDescriptor &GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
0762    {
0763       return fClusterGroupDescriptors.at(clusterGroupId);
0764    }
0765    const RClusterDescriptor &GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
0766    {
0767       return fClusterDescriptors.at(clusterId);
0768    }
0769 
0770    RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
0771    RFieldDescriptorIterable
0772    GetFieldIterable(const RFieldDescriptor &fieldDesc,
0773                     const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0774    RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
0775    RFieldDescriptorIterable
0776    GetFieldIterable(ROOT::DescriptorId_t fieldId,
0777                     const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0778 
0779    RFieldDescriptorIterable GetTopLevelFields() const;
0780    RFieldDescriptorIterable
0781    GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0782 
0783    RColumnDescriptorIterable GetColumnIterable() const;
0784    RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
0785    RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
0786 
0787    RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
0788 
0789    RClusterDescriptorIterable GetClusterIterable() const;
0790 
0791    RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
0792 
0793    const std::string &GetName() const { return fName; }
0794    const std::string &GetDescription() const { return fDescription; }
0795 
0796    std::size_t GetNFields() const { return fFieldDescriptors.size(); }
0797    std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
0798    std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
0799    std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
0800    std::size_t GetNClusters() const { return fNClusters; }
0801    std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
0802    std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
0803 
0804    /// We know the number of entries from adding the cluster summaries
0805    ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
0806    ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const;
0807 
0808    /// Returns the logical parent of all top-level RNTuple data fields.
0809    ROOT::DescriptorId_t GetFieldZeroId() const { return fFieldZeroId; }
0810    const RFieldDescriptor &GetFieldZero() const { return GetFieldDescriptor(GetFieldZeroId()); }
0811    ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const;
0812    /// Searches for a top-level field
0813    ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
0814    ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
0815                                             std::uint16_t representationIndex) const;
0816    ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
0817                                              std::uint16_t representationIndex) const;
0818    ROOT::DescriptorId_t FindClusterId(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t index) const;
0819    ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const;
0820    ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const;
0821 
0822    /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
0823    /// In case of invalid field ID, an empty string is returned.
0824    std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const;
0825 
0826    /// Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type name.
0827    std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const;
0828 
0829    bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
0830    std::vector<std::uint64_t> GetFeatureFlags() const;
0831 
0832    /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
0833    const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
0834 
0835    /// Methods to load and drop cluster group details (cluster IDs and page locations)
0836    RResult<void>
0837    AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
0838    RResult<void> DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId);
0839 
0840    std::uint64_t GetGeneration() const { return fGeneration; }
0841    void IncGeneration() { fGeneration++; }
0842 
0843    /// Re-create the C++ model from the stored metadata
0844    std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
0845    void PrintInfo(std::ostream &output) const;
0846 };
0847 
0848 // clang-format off
0849 /**
0850 \class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
0851 \ingroup NTuple
0852 \brief Used to loop over a field's associated columns
0853 */
0854 // clang-format on
0855 class RNTupleDescriptor::RColumnDescriptorIterable final {
0856 private:
0857    /// The associated RNTuple for this range.
0858    const RNTupleDescriptor &fNTuple;
0859    /// The descriptor ids of the columns ordered by field, representation, and column index
0860    std::vector<ROOT::DescriptorId_t> fColumns = {};
0861 
0862 public:
0863    class RIterator final {
0864    private:
0865       /// The enclosing range's RNTuple.
0866       const RNTupleDescriptor &fNTuple;
0867       /// The enclosing range's descriptor id list.
0868       const std::vector<ROOT::DescriptorId_t> &fColumns;
0869       std::size_t fIndex = 0;
0870 
0871    public:
0872       using iterator_category = std::forward_iterator_tag;
0873       using iterator = RIterator;
0874       using value_type = RFieldDescriptor;
0875       using difference_type = std::ptrdiff_t;
0876       using pointer = const RColumnDescriptor *;
0877       using reference = const RColumnDescriptor &;
0878 
0879       RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
0880          : fNTuple(ntuple), fColumns(columns), fIndex(index)
0881       {
0882       }
0883       iterator operator++()
0884       {
0885          ++fIndex;
0886          return *this;
0887       }
0888       reference operator*() { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0889       pointer operator->() { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0890       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0891       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0892    };
0893 
0894    RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
0895    RColumnDescriptorIterable(const RNTupleDescriptor &ntuple);
0896 
0897    RIterator begin() { return RIterator(fNTuple, fColumns, 0); }
0898    RIterator end() { return RIterator(fNTuple, fColumns, fColumns.size()); }
0899    size_t size() { return fColumns.size(); }
0900 };
0901 
0902 // clang-format off
0903 /**
0904 \class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
0905 \ingroup NTuple
0906 \brief Used to loop over a field's child fields
0907 */
0908 // clang-format on
0909 class RNTupleDescriptor::RFieldDescriptorIterable final {
0910 private:
0911    /// The associated RNTuple for this range.
0912    const RNTupleDescriptor &fNTuple;
0913    /// The descriptor IDs of the child fields. These may be sorted using
0914    /// a comparison function.
0915    std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
0916 
0917 public:
0918    class RIterator final {
0919    private:
0920       /// The enclosing range's RNTuple.
0921       const RNTupleDescriptor &fNTuple;
0922       /// The enclosing range's descriptor id list.
0923       const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
0924       std::size_t fIndex = 0;
0925 
0926    public:
0927       using iterator_category = std::forward_iterator_tag;
0928       using iterator = RIterator;
0929       using value_type = RFieldDescriptor;
0930       using difference_type = std::ptrdiff_t;
0931       using pointer = RFieldDescriptor *;
0932       using reference = const RFieldDescriptor &;
0933 
0934       RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
0935                 std::size_t index)
0936          : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
0937       {
0938       }
0939       iterator operator++()
0940       {
0941          ++fIndex;
0942          return *this;
0943       }
0944       reference operator*() { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
0945       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0946       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0947    };
0948    RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
0949       : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
0950    {
0951    }
0952    /// Sort the range using an arbitrary comparison function.
0953    RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field,
0954                             const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
0955       : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
0956    {
0957       std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
0958    }
0959    RIterator begin() { return RIterator(fNTuple, fFieldChildren, 0); }
0960    RIterator end() { return RIterator(fNTuple, fFieldChildren, fFieldChildren.size()); }
0961 };
0962 
0963 // clang-format off
0964 /**
0965 \class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
0966 \ingroup NTuple
0967 \brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
0968 
0969 Enumerate all cluster group IDs from the descriptor.  No specific order can be assumed.
0970 */
0971 // clang-format on
0972 class RNTupleDescriptor::RClusterGroupDescriptorIterable final {
0973 private:
0974    /// The associated RNTuple for this range.
0975    const RNTupleDescriptor &fNTuple;
0976 
0977 public:
0978    class RIterator final {
0979    private:
0980       /// The enclosing range's RNTuple.
0981       const RNTupleDescriptor &fNTuple;
0982       std::size_t fIndex = 0;
0983 
0984    public:
0985       using iterator_category = std::forward_iterator_tag;
0986       using iterator = RIterator;
0987       using value_type = RClusterGroupDescriptor;
0988       using difference_type = std::ptrdiff_t;
0989       using pointer = RClusterGroupDescriptor *;
0990       using reference = const RClusterGroupDescriptor &;
0991 
0992       RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
0993       iterator operator++()
0994       {
0995          ++fIndex;
0996          return *this;
0997       }
0998       reference operator*()
0999       {
1000          auto it = fNTuple.fClusterGroupDescriptors.begin();
1001          std::advance(it, fIndex);
1002          return it->second;
1003       }
1004       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1005       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1006    };
1007 
1008    RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1009    RIterator begin() { return RIterator(fNTuple, 0); }
1010    RIterator end() { return RIterator(fNTuple, fNTuple.GetNClusterGroups()); }
1011 };
1012 
1013 // clang-format off
1014 /**
1015 \class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1016 \ingroup NTuple
1017 \brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1018 
1019 Enumerate all cluster IDs from all cluster descriptors.  No specific order can be assumed, use
1020 RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1021 clusters by entry number.
1022 */
1023 // clang-format on
1024 class RNTupleDescriptor::RClusterDescriptorIterable final {
1025 private:
1026    /// The associated RNTuple for this range.
1027    const RNTupleDescriptor &fNTuple;
1028 
1029 public:
1030    class RIterator final {
1031    private:
1032       /// The enclosing range's RNTuple.
1033       const RNTupleDescriptor &fNTuple;
1034       std::size_t fIndex = 0;
1035 
1036    public:
1037       using iterator_category = std::forward_iterator_tag;
1038       using iterator = RIterator;
1039       using value_type = RClusterDescriptor;
1040       using difference_type = std::ptrdiff_t;
1041       using pointer = RClusterDescriptor *;
1042       using reference = const RClusterDescriptor &;
1043 
1044       RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
1045       iterator operator++()
1046       {
1047          ++fIndex;
1048          return *this;
1049       }
1050       reference operator*()
1051       {
1052          auto it = fNTuple.fClusterDescriptors.begin();
1053          std::advance(it, fIndex);
1054          return it->second;
1055       }
1056       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1057       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1058    };
1059 
1060    RClusterDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1061    RIterator begin() { return RIterator(fNTuple, 0); }
1062    RIterator end() { return RIterator(fNTuple, fNTuple.GetNActiveClusters()); }
1063 };
1064 
1065 // clang-format off
1066 /**
1067 \class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1068 \ingroup NTuple
1069 \brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1070 */
1071 // clang-format on
1072 class RNTupleDescriptor::RExtraTypeInfoDescriptorIterable final {
1073 private:
1074    /// The associated RNTuple for this range.
1075    const RNTupleDescriptor &fNTuple;
1076 
1077 public:
1078    class RIterator final {
1079    private:
1080       /// The enclosing range's RNTuple.
1081       const RNTupleDescriptor &fNTuple;
1082       std::size_t fIndex = 0;
1083 
1084    public:
1085       using iterator_category = std::forward_iterator_tag;
1086       using iterator = RIterator;
1087       using value_type = RExtraTypeInfoDescriptor;
1088       using difference_type = std::ptrdiff_t;
1089       using pointer = RExtraTypeInfoDescriptor *;
1090       using reference = const RExtraTypeInfoDescriptor &;
1091 
1092       RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
1093       iterator operator++()
1094       {
1095          ++fIndex;
1096          return *this;
1097       }
1098       reference operator*()
1099       {
1100          auto it = fNTuple.fExtraTypeInfoDescriptors.begin();
1101          std::advance(it, fIndex);
1102          return *it;
1103       }
1104       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1105       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1106    };
1107 
1108    RExtraTypeInfoDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1109    RIterator begin() { return RIterator(fNTuple, 0); }
1110    RIterator end() { return RIterator(fNTuple, fNTuple.GetNExtraTypeInfos()); }
1111 };
1112 
1113 // clang-format off
1114 /**
1115 \class ROOT::RNTupleDescriptor::RHeaderExtension
1116 \ingroup NTuple
1117 \brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1118 */
1119 // clang-format on
1120 class RNTupleDescriptor::RHeaderExtension final {
1121    friend class Internal::RNTupleDescriptorBuilder;
1122 
1123 private:
1124    /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1125    /// the fields in that order.
1126    std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1127    /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1128    /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1129    /// belongs to a field of the regular header that gets extended by additional column representations.
1130    std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1131    /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1132    /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1133    /// these columns need to be serialized in the extension header without re-serializing the field.
1134    std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1135    /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1136    std::uint32_t fNLogicalColumns = 0;
1137    std::uint32_t fNPhysicalColumns = 0;
1138 
1139    /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1140    /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1141    /// Descriptor alongside non-extended fields.
1142    void MarkExtendedField(const RFieldDescriptor &fieldDesc)
1143    {
1144       fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1145       fFieldIdsLookup.insert(fieldDesc.GetId());
1146    }
1147 
1148    /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1149    /// added through late model extension as an additional representation of an existing column). Note that the column
1150    /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1151    void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
1152    {
1153       fNLogicalColumns++;
1154       if (!columnDesc.IsAliasColumn())
1155          fNPhysicalColumns++;
1156       if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1157          fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1158       }
1159    }
1160 
1161 public:
1162    std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1163    std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1164    std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1165    const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1166    {
1167       return fExtendedColumnRepresentations;
1168    }
1169    /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1170    /// of their addition.
1171    /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1172    /// the field is not yet linked into the schema tree.
1173    std::vector<ROOT::DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1174 
1175    bool ContainsField(ROOT::DescriptorId_t fieldId) const
1176    {
1177       return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1178    }
1179    bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
1180    {
1181       return std::find(fExtendedColumnRepresentations.begin(), fExtendedColumnRepresentations.end(), columnId) !=
1182              fExtendedColumnRepresentations.end();
1183    }
1184 };
1185 
1186 namespace Internal {
1187 
1188 // clang-format off
1189 /**
1190 \class ROOT::Internal::RColumnDescriptorBuilder
1191 \ingroup NTuple
1192 \brief A helper class for piece-wise construction of an RColumnDescriptor
1193 
1194 Dangling column descriptors can become actual descriptors when added to an
1195 RNTupleDescriptorBuilder instance and then linked to their fields.
1196 */
1197 // clang-format on
1198 class RColumnDescriptorBuilder final {
1199 private:
1200    RColumnDescriptor fColumn = RColumnDescriptor();
1201 
1202 public:
1203    /// Make an empty column descriptor builder.
1204    RColumnDescriptorBuilder() = default;
1205 
1206    RColumnDescriptorBuilder &LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
1207    {
1208       fColumn.fLogicalColumnId = logicalColumnId;
1209       return *this;
1210    }
1211    RColumnDescriptorBuilder &PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
1212    {
1213       fColumn.fPhysicalColumnId = physicalColumnId;
1214       return *this;
1215    }
1216    RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1217    {
1218       fColumn.fBitsOnStorage = bitsOnStorage;
1219       return *this;
1220    }
1221    RColumnDescriptorBuilder &Type(ROOT::ENTupleColumnType type)
1222    {
1223       fColumn.fType = type;
1224       return *this;
1225    }
1226    RColumnDescriptorBuilder &FieldId(ROOT::DescriptorId_t fieldId)
1227    {
1228       fColumn.fFieldId = fieldId;
1229       return *this;
1230    }
1231    RColumnDescriptorBuilder &Index(std::uint32_t index)
1232    {
1233       fColumn.fIndex = index;
1234       return *this;
1235    }
1236    RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1237    {
1238       fColumn.fFirstElementIndex = firstElementIdx;
1239       return *this;
1240    }
1241    RColumnDescriptorBuilder &SetSuppressedDeferred()
1242    {
1243       R__ASSERT(fColumn.fFirstElementIndex != 0);
1244       if (fColumn.fFirstElementIndex > 0)
1245          fColumn.fFirstElementIndex = -fColumn.fFirstElementIndex;
1246       return *this;
1247    }
1248    RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1249    {
1250       fColumn.fRepresentationIndex = representationIndex;
1251       return *this;
1252    }
1253    RColumnDescriptorBuilder &ValueRange(double min, double max)
1254    {
1255       fColumn.fValueRange = {min, max};
1256       return *this;
1257    }
1258    RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1259    {
1260       fColumn.fValueRange = valueRange;
1261       return *this;
1262    }
1263    ROOT::DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
1264    ROOT::DescriptorId_t GetRepresentationIndex() const { return fColumn.fRepresentationIndex; }
1265    /// Attempt to make a column descriptor. This may fail if the column
1266    /// was not given enough information to make a proper descriptor.
1267    RResult<RColumnDescriptor> MakeDescriptor() const;
1268 };
1269 
1270 // clang-format off
1271 /**
1272 \class ROOT::Internal::RFieldDescriptorBuilder
1273 \ingroup NTuple
1274 \brief A helper class for piece-wise construction of an RFieldDescriptor
1275 
1276 Dangling field descriptors describe a single field in isolation. They are
1277 missing the necessary relationship information (parent field, any child fields)
1278 required to describe a real RNTuple field.
1279 
1280 Dangling field descriptors can only become actual descriptors when added to an
1281 RNTupleDescriptorBuilder instance and then linked to other fields.
1282 */
1283 // clang-format on
1284 class RFieldDescriptorBuilder final {
1285 private:
1286    RFieldDescriptor fField = RFieldDescriptor();
1287 
1288 public:
1289    /// Make an empty dangling field descriptor.
1290    RFieldDescriptorBuilder() = default;
1291    /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
1292    /// Relationship information is lost during the conversion to a
1293    /// dangling descriptor:
1294    /// * Parent id is reset to an invalid id.
1295    /// * Field children ids are forgotten.
1296    ///
1297    /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
1298    explicit RFieldDescriptorBuilder(const RFieldDescriptor &fieldDesc);
1299 
1300    /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1301    static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field);
1302 
1303    RFieldDescriptorBuilder &FieldId(ROOT::DescriptorId_t fieldId)
1304    {
1305       fField.fFieldId = fieldId;
1306       return *this;
1307    }
1308    RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1309    {
1310       fField.fFieldVersion = fieldVersion;
1311       return *this;
1312    }
1313    RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1314    {
1315       fField.fTypeVersion = typeVersion;
1316       return *this;
1317    }
1318    RFieldDescriptorBuilder &ParentId(ROOT::DescriptorId_t id)
1319    {
1320       fField.fParentId = id;
1321       return *this;
1322    }
1323    RFieldDescriptorBuilder &ProjectionSourceId(ROOT::DescriptorId_t id)
1324    {
1325       fField.fProjectionSourceId = id;
1326       return *this;
1327    }
1328    RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1329    {
1330       fField.fFieldName = fieldName;
1331       return *this;
1332    }
1333    RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1334    {
1335       fField.fFieldDescription = fieldDescription;
1336       return *this;
1337    }
1338    RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1339    {
1340       fField.fTypeName = typeName;
1341       return *this;
1342    }
1343    RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1344    {
1345       fField.fTypeAlias = typeAlias;
1346       return *this;
1347    }
1348    RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1349    {
1350       fField.fNRepetitions = nRepetitions;
1351       return *this;
1352    }
1353    RFieldDescriptorBuilder &Structure(const ROOT::ENTupleStructure &structure)
1354    {
1355       fField.fStructure = structure;
1356       return *this;
1357    }
1358    RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1359    {
1360       fField.fTypeChecksum = typeChecksum;
1361       return *this;
1362    }
1363    ROOT::DescriptorId_t GetParentId() const { return fField.fParentId; }
1364    /// Attempt to make a field descriptor. This may fail if the dangling field
1365    /// was not given enough information to make a proper descriptor.
1366    RResult<RFieldDescriptor> MakeDescriptor() const;
1367 };
1368 
1369 // clang-format off
1370 /**
1371 \class ROOT::Internal::RClusterDescriptorBuilder
1372 \ingroup NTuple
1373 \brief A helper class for piece-wise construction of an RClusterDescriptor
1374 
1375 The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1376 piecewise addition of page locations.
1377 */
1378 // clang-format on
1379 class RClusterDescriptorBuilder final {
1380 private:
1381    RClusterDescriptor fCluster;
1382 
1383 public:
1384    RClusterDescriptorBuilder &ClusterId(ROOT::DescriptorId_t clusterId)
1385    {
1386       fCluster.fClusterId = clusterId;
1387       return *this;
1388    }
1389 
1390    RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1391    {
1392       fCluster.fFirstEntryIndex = firstEntryIndex;
1393       return *this;
1394    }
1395 
1396    RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1397    {
1398       fCluster.fNEntries = nEntries;
1399       return *this;
1400    }
1401 
1402    RResult<void> CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1403                                    std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1404 
1405    /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1406    /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1407    /// MarkSuppressedColumnRange() took place.
1408    RResult<void> MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId);
1409 
1410    /// Sets the first element index and number of elements for all the suppressed column ranges.
1411    /// The information is taken from the corresponding columns from the primary representation.
1412    /// Needs to be called when all the columns (suppressed and regular) where added.
1413    RResult<void> CommitSuppressedColumnRanges(const RNTupleDescriptor &desc);
1414 
1415    /// Add column and page ranges for columns created during late model extension missing in this cluster.  The locator
1416    /// type for the synthesized page ranges is `kTypePageZero`.  All the page sources must be able to populate the
1417    /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1418    /// should happen before calling this function.
1419    RClusterDescriptorBuilder &AddExtendedColumnRanges(const RNTupleDescriptor &desc);
1420 
1421    const RClusterDescriptor::RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId)
1422    {
1423       return fCluster.GetColumnRange(physicalId);
1424    }
1425 
1426    /// Move out the full cluster descriptor including page locations
1427    RResult<RClusterDescriptor> MoveDescriptor();
1428 };
1429 
1430 // clang-format off
1431 /**
1432 \class ROOT::Internal::RClusterGroupDescriptorBuilder
1433 \ingroup NTuple
1434 \brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1435 */
1436 // clang-format on
1437 class RClusterGroupDescriptorBuilder final {
1438 private:
1439    RClusterGroupDescriptor fClusterGroup;
1440 
1441 public:
1442    RClusterGroupDescriptorBuilder() = default;
1443    static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc);
1444 
1445    RClusterGroupDescriptorBuilder &ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
1446    {
1447       fClusterGroup.fClusterGroupId = clusterGroupId;
1448       return *this;
1449    }
1450    RClusterGroupDescriptorBuilder &PageListLocator(const RNTupleLocator &pageListLocator)
1451    {
1452       fClusterGroup.fPageListLocator = pageListLocator;
1453       return *this;
1454    }
1455    RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1456    {
1457       fClusterGroup.fPageListLength = pageListLength;
1458       return *this;
1459    }
1460    RClusterGroupDescriptorBuilder &MinEntry(std::uint64_t minEntry)
1461    {
1462       fClusterGroup.fMinEntry = minEntry;
1463       return *this;
1464    }
1465    RClusterGroupDescriptorBuilder &EntrySpan(std::uint64_t entrySpan)
1466    {
1467       fClusterGroup.fEntrySpan = entrySpan;
1468       return *this;
1469    }
1470    RClusterGroupDescriptorBuilder &NClusters(std::uint32_t nClusters)
1471    {
1472       fClusterGroup.fNClusters = nClusters;
1473       return *this;
1474    }
1475    void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1476    {
1477       if (clusterIds.size() != fClusterGroup.GetNClusters())
1478          throw RException(R__FAIL("mismatch of number of clusters"));
1479       fClusterGroup.fClusterIds = clusterIds;
1480    }
1481 
1482    RResult<RClusterGroupDescriptor> MoveDescriptor();
1483 };
1484 
1485 // clang-format off
1486 /**
1487 \class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1488 \ingroup NTuple
1489 \brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1490 */
1491 // clang-format on
1492 class RExtraTypeInfoDescriptorBuilder final {
1493 private:
1494    RExtraTypeInfoDescriptor fExtraTypeInfo;
1495 
1496 public:
1497    RExtraTypeInfoDescriptorBuilder() = default;
1498 
1499    RExtraTypeInfoDescriptorBuilder &ContentId(EExtraTypeInfoIds contentId)
1500    {
1501       fExtraTypeInfo.fContentId = contentId;
1502       return *this;
1503    }
1504    RExtraTypeInfoDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1505    {
1506       fExtraTypeInfo.fTypeVersion = typeVersion;
1507       return *this;
1508    }
1509    RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1510    {
1511       fExtraTypeInfo.fTypeName = typeName;
1512       return *this;
1513    }
1514    RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1515    {
1516       fExtraTypeInfo.fContent = content;
1517       return *this;
1518    }
1519 
1520    RResult<RExtraTypeInfoDescriptor> MoveDescriptor();
1521 };
1522 
1523 // clang-format off
1524 /**
1525 \class ROOT::Internal::RNTupleDescriptorBuilder
1526 \ingroup NTuple
1527 \brief A helper class for piece-wise construction of an RNTupleDescriptor
1528 
1529 Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1530 */
1531 // clang-format on
1532 class RNTupleDescriptorBuilder final {
1533 private:
1534    RNTupleDescriptor fDescriptor;
1535    RResult<void> EnsureFieldExists(ROOT::DescriptorId_t fieldId) const;
1536 
1537 public:
1538    /// Checks whether invariants hold:
1539    /// * RNTuple epoch is valid
1540    /// * RNTuple name is valid
1541    /// * Fields have valid parents
1542    /// * Number of columns is constant across column representations
1543    RResult<void> EnsureValidDescriptor() const;
1544    const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
1545    RNTupleDescriptor MoveDescriptor();
1546 
1547    /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1548    /// This resets the builder's descriptor.
1549    void SetSchemaFromExisting(const RNTupleDescriptor &descriptor);
1550 
1551    void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
1552                    std::uint16_t versionPatch);
1553    void SetVersionForWriting();
1554 
1555    void SetNTuple(const std::string_view name, const std::string_view description);
1556    void SetFeature(unsigned int flag);
1557 
1558    void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1559    void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1560    /// The real footer size also include the page list envelopes
1561    void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1562 
1563    void AddField(const RFieldDescriptor &fieldDesc);
1564    RResult<void> AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId);
1565    RResult<void> AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId);
1566 
1567    // The field that the column belongs to has to be already available. For fields with multiple columns,
1568    // the columns need to be added in order of the column index
1569    RResult<void> AddColumn(RColumnDescriptor &&columnDesc);
1570 
1571    RResult<void> AddClusterGroup(RClusterGroupDescriptor &&clusterGroup);
1572    RResult<void> AddCluster(RClusterDescriptor &&clusterDesc);
1573 
1574    RResult<void> AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1575    void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1576 
1577    /// Clears so-far stored clusters, fields, and columns and return to a pristine RNTupleDescriptor
1578    void Reset();
1579 
1580    /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1581    /// annotated as begin part of the header extension.
1582    void BeginHeaderExtension();
1583 
1584    /// \brief Shift column IDs of alias columns by `offset`
1585    ///
1586    /// If the descriptor is constructed in pieces consisting of physical and alias columns
1587    /// (regular and projected fields), the natural column order would be
1588    ///   - Physical and alias columns of piece one
1589    ///   - Physical and alias columns of piece two
1590    ///   - etc.
1591    /// What we want, however, are first all physical column IDs and then all alias column IDs.
1592    /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1593    /// column IDs in the projected field descriptors.  In this way, a new piece of physical and alias columns can
1594    /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1595    ///   - Physical columns of piece one
1596    ///   - Physical columns of piece two
1597    ///   - ...
1598    //    - Logical columns of piece one
1599    ///   - Logical columns of piece two
1600    ///   - ...
1601    void ShiftAliasColumns(std::uint32_t offset);
1602 
1603    /// Get the streamer info records for custom classes. Currently requires the corresponding dictionaries to be loaded.
1604    ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const;
1605 };
1606 
1607 inline RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
1608 {
1609    return desc.CloneSchema();
1610 }
1611 
1612 } // namespace Internal
1613 } // namespace ROOT
1614 
1615 #endif // ROOT_RNTupleDescriptor