Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:14:30

0001 /// \file ROOT/RNTupleDescriptor.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
0005 /// \date 2018-07-19
0006 
0007 /*************************************************************************
0008  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0009  * All rights reserved.                                                  *
0010  *                                                                       *
0011  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0012  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0013  *************************************************************************/
0014 
0015 #ifndef ROOT_RNTupleDescriptor
0016 #define ROOT_RNTupleDescriptor
0017 
0018 #include <ROOT/RCreateFieldOptions.hxx>
0019 #include <ROOT/RError.hxx>
0020 #include <ROOT/RNTupleSerialize.hxx>
0021 #include <ROOT/RNTupleUtil.hxx>
0022 #include <ROOT/RSpan.hxx>
0023 
0024 #include <TError.h>
0025 
0026 #include <algorithm>
0027 #include <chrono>
0028 #include <cmath>
0029 #include <functional>
0030 #include <iterator>
0031 #include <map>
0032 #include <memory>
0033 #include <optional>
0034 #include <ostream>
0035 #include <vector>
0036 #include <set>
0037 #include <string>
0038 #include <string_view>
0039 #include <unordered_map>
0040 #include <unordered_set>
0041 
0042 namespace ROOT {
0043 
0044 class RFieldBase;
0045 class RNTupleModel;
0046 
0047 namespace Internal {
0048 class RColumnElementBase;
0049 }
0050 
0051 class RNTupleDescriptor;
0052 
0053 namespace Internal {
0054 class RColumnDescriptorBuilder;
0055 class RClusterDescriptorBuilder;
0056 class RClusterGroupDescriptorBuilder;
0057 class RExtraTypeInfoDescriptorBuilder;
0058 class RFieldDescriptorBuilder;
0059 class RNTupleDescriptorBuilder;
0060 
0061 RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
0062 } // namespace Internal
0063 
0064 // clang-format off
0065 /**
0066 \class ROOT::RFieldDescriptor
0067 \ingroup NTuple
0068 \brief Metadata stored for every field of an RNTuple
0069 */
0070 // clang-format on
0071 class RFieldDescriptor final {
0072    friend class Internal::RNTupleDescriptorBuilder;
0073    friend class Internal::RFieldDescriptorBuilder;
0074 
0075 private:
0076    ROOT::DescriptorId_t fFieldId = ROOT::kInvalidDescriptorId;
0077    /// The version of the C++-type-to-column translation mechanics
0078    std::uint32_t fFieldVersion = 0;
0079    /// The version of the C++ type itself
0080    std::uint32_t fTypeVersion = 0;
0081    /// The leaf name, not including parent fields
0082    std::string fFieldName;
0083    /// Free text set by the user
0084    std::string fFieldDescription;
0085    /// The C++ type that was used when writing the field
0086    std::string fTypeName;
0087    /// A typedef or using directive that resolved to the type name during field creation
0088    std::string fTypeAlias;
0089    /// The number of elements per entry for fixed-size arrays
0090    std::uint64_t fNRepetitions = 0;
0091    /// The structural information carried by this field in the data model tree
0092    ROOT::ENTupleStructure fStructure = ROOT::ENTupleStructure::kInvalid;
0093    /// Establishes sub field relationships, such as classes and collections
0094    ROOT::DescriptorId_t fParentId = ROOT::kInvalidDescriptorId;
0095    /// For projected fields, the source field ID
0096    ROOT::DescriptorId_t fProjectionSourceId = ROOT::kInvalidDescriptorId;
0097    /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
0098    /// order of sub fields.
0099    std::vector<ROOT::DescriptorId_t> fLinkIds;
0100    /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
0101    /// list of logical column ids. For example, the second column of the third column representation is
0102    /// fLogicalColumnIds[2 * fColumnCardinality + 1]
0103    std::uint32_t fColumnCardinality = 0;
0104    /// The ordered list of columns attached to this field: first by representation index then by column index.
0105    std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
0106    /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
0107    /// identify types by their checksum
0108    std::optional<std::uint32_t> fTypeChecksum;
0109 
0110 public:
0111    RFieldDescriptor() = default;
0112    RFieldDescriptor(const RFieldDescriptor &other) = delete;
0113    RFieldDescriptor &operator=(const RFieldDescriptor &other) = delete;
0114    RFieldDescriptor(RFieldDescriptor &&other) = default;
0115    RFieldDescriptor &operator=(RFieldDescriptor &&other) = default;
0116 
0117    bool operator==(const RFieldDescriptor &other) const;
0118    /// Get a copy of the descriptor
0119    RFieldDescriptor Clone() const;
0120 
0121    /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
0122    /// access to sub fields, which is provided by the RNTupleDescriptor argument.
0123    std::unique_ptr<ROOT::RFieldBase>
0124    CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
0125 
0126    ROOT::DescriptorId_t GetId() const { return fFieldId; }
0127    std::uint32_t GetFieldVersion() const { return fFieldVersion; }
0128    std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0129    const std::string &GetFieldName() const { return fFieldName; }
0130    const std::string &GetFieldDescription() const { return fFieldDescription; }
0131    const std::string &GetTypeName() const { return fTypeName; }
0132    const std::string &GetTypeAlias() const { return fTypeAlias; }
0133    std::uint64_t GetNRepetitions() const { return fNRepetitions; }
0134    ROOT::ENTupleStructure GetStructure() const { return fStructure; }
0135    ROOT::DescriptorId_t GetParentId() const { return fParentId; }
0136    ROOT::DescriptorId_t GetProjectionSourceId() const { return fProjectionSourceId; }
0137    const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
0138    const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
0139    std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
0140    std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
0141    bool IsProjectedField() const { return fProjectionSourceId != ROOT::kInvalidDescriptorId; }
0142    /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
0143    /// natively supported stdlib classes.
0144    /// The dictionary does not need to be available for this method.
0145    bool IsCustomClass() const;
0146 };
0147 
0148 // clang-format off
0149 /**
0150 \class ROOT::RColumnDescriptor
0151 \ingroup NTuple
0152 \brief Metadata stored for every column of an RNTuple
0153 */
0154 // clang-format on
0155 class RColumnDescriptor final {
0156    friend class Internal::RColumnDescriptorBuilder;
0157    friend class Internal::RNTupleDescriptorBuilder;
0158 
0159 public:
0160    struct RValueRange {
0161       double fMin = 0, fMax = 0;
0162 
0163       RValueRange() = default;
0164       RValueRange(double min, double max) : fMin(min), fMax(max) {}
0165       RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
0166 
0167       bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
0168       bool operator!=(RValueRange other) const { return !(*this == other); }
0169    };
0170 
0171 private:
0172    /// The actual column identifier, which is the link to the corresponding field
0173    ROOT::DescriptorId_t fLogicalColumnId = ROOT::kInvalidDescriptorId;
0174    /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
0175    ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0176    /// Every column belongs to one and only one field
0177    ROOT::DescriptorId_t fFieldId = ROOT::kInvalidDescriptorId;
0178    /// The absolute value specifies the index for the first stored element for this column.
0179    /// For deferred columns the absolute value is larger than zero.
0180    /// Negative values specify a suppressed and deferred column.
0181    std::int64_t fFirstElementIndex = 0U;
0182    /// A field can be serialized into several columns, which are numbered from zero to $n$
0183    std::uint32_t fIndex = 0;
0184    /// A field may use multiple column representations, which are numbered from zero to $m$.
0185    /// Every representation has the same number of columns.
0186    std::uint16_t fRepresentationIndex = 0;
0187    /// The size in bits of elements of this column. Most columns have the size fixed by their type
0188    /// but low-precision float columns have variable bit widths.
0189    std::uint16_t fBitsOnStorage = 0;
0190    /// The on-disk column type
0191    ROOT::ENTupleColumnType fType = ROOT::ENTupleColumnType::kUnknown;
0192    /// Optional value range (used e.g. by quantized real fields)
0193    std::optional<RValueRange> fValueRange;
0194 
0195 public:
0196    RColumnDescriptor() = default;
0197    RColumnDescriptor(const RColumnDescriptor &other) = delete;
0198    RColumnDescriptor &operator=(const RColumnDescriptor &other) = delete;
0199    RColumnDescriptor(RColumnDescriptor &&other) = default;
0200    RColumnDescriptor &operator=(RColumnDescriptor &&other) = default;
0201 
0202    bool operator==(const RColumnDescriptor &other) const;
0203    /// Get a copy of the descriptor
0204    RColumnDescriptor Clone() const;
0205 
0206    ROOT::DescriptorId_t GetLogicalId() const { return fLogicalColumnId; }
0207    ROOT::DescriptorId_t GetPhysicalId() const { return fPhysicalColumnId; }
0208    ROOT::DescriptorId_t GetFieldId() const { return fFieldId; }
0209    std::uint32_t GetIndex() const { return fIndex; }
0210    std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
0211    std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
0212    std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
0213    ROOT::ENTupleColumnType GetType() const { return fType; }
0214    std::optional<RValueRange> GetValueRange() const { return fValueRange; }
0215    bool IsAliasColumn() const { return fPhysicalColumnId != fLogicalColumnId; }
0216    bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
0217    bool IsSuppressedDeferredColumn() const { return fFirstElementIndex < 0; }
0218 };
0219 
0220 // clang-format off
0221 /**
0222 \class ROOT::RClusterDescriptor
0223 \ingroup NTuple
0224 \brief Metadata for RNTuple clusters
0225 
0226 The cluster descriptor is built in two phases.  In a first phase, the descriptor has only an ID.
0227 In a second phase, the event range, column group, page locations and column ranges are added.
0228 Both phases are populated by the RClusterDescriptorBuilder.
0229 Clusters span across all available columns in the RNTuple.
0230 */
0231 // clang-format on
0232 class RClusterDescriptor final {
0233    friend class Internal::RClusterDescriptorBuilder;
0234 
0235 public:
0236    // clang-format off
0237    /**
0238    \class ROOT::RClusterDescriptor::RColumnRange
0239    \ingroup NTuple
0240    \brief The window of element indexes of a particular column in a particular cluster
0241    */
0242    // clang-format on
0243    class RColumnRange final {
0244       ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0245       /// The global index of the first column element in the cluster
0246       ROOT::NTupleSize_t fFirstElementIndex = ROOT::kInvalidNTupleIndex;
0247       /// The number of column elements in the cluster
0248       ROOT::NTupleSize_t fNElements = ROOT::kInvalidNTupleIndex;
0249       /// The usual format for ROOT compression settings (see Compression.h).
0250       /// The pages of a particular column in a particular cluster are all compressed with the same settings.
0251       /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
0252       std::optional<std::uint32_t> fCompressionSettings;
0253       /// Suppressed columns have an empty page range and unknown compression settings.
0254       /// Their element index range, however, is aligned with the corresponding column of the
0255       /// primary column representation (see Section "Suppressed Columns" in the specification)
0256       bool fIsSuppressed = false;
0257 
0258       // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
0259       // Should this be done on the field level?
0260 
0261    public:
0262       RColumnRange() = default;
0263 
0264       RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex,
0265                    ROOT::NTupleSize_t nElements, std::optional<std::uint32_t> compressionSettings,
0266                    bool suppressed = false)
0267          : fPhysicalColumnId(physicalColumnId),
0268            fFirstElementIndex(firstElementIndex),
0269            fNElements(nElements),
0270            fCompressionSettings(compressionSettings),
0271            fIsSuppressed(suppressed)
0272       {
0273       }
0274 
0275       ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
0276       void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
0277 
0278       ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0279       void SetFirstElementIndex(ROOT::NTupleSize_t idx) { fFirstElementIndex = idx; }
0280       void IncrementFirstElementIndex(ROOT::NTupleSize_t by) { fFirstElementIndex += by; }
0281 
0282       ROOT::NTupleSize_t GetNElements() const { return fNElements; }
0283       void SetNElements(ROOT::NTupleSize_t n) { fNElements = n; }
0284       void IncrementNElements(ROOT::NTupleSize_t by) { fNElements += by; }
0285 
0286       std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
0287       void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
0288 
0289       bool IsSuppressed() const { return fIsSuppressed; }
0290       void SetIsSuppressed(bool suppressed) { fIsSuppressed = suppressed; }
0291 
0292       bool operator==(const RColumnRange &other) const
0293       {
0294          return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
0295                 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings &&
0296                 fIsSuppressed == other.fIsSuppressed;
0297       }
0298 
0299       bool Contains(ROOT::NTupleSize_t index) const
0300       {
0301          return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
0302       }
0303    };
0304 
0305    // clang-format off
0306    /**
0307    \class ROOT::RClusterDescriptor::RPageInfo
0308    \ingroup NTuple
0309    \brief Information about a single page in the context of a cluster's page range.
0310    */
0311    // clang-format on
0312    // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
0313    // the page belongs
0314    struct RPageInfo {
0315    private:
0316       /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
0317       std::uint32_t fNElements = std::uint32_t(-1);
0318       /// The meaning of `fLocator` depends on the storage backend.
0319       RNTupleLocator fLocator;
0320       /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
0321       bool fHasChecksum = false;
0322 
0323    public:
0324       RPageInfo() = default;
0325       RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
0326          : fNElements(nElements), fLocator(locator), fHasChecksum(hasChecksum)
0327       {
0328       }
0329 
0330       bool operator==(const RPageInfo &other) const
0331       {
0332          return fNElements == other.fNElements && fLocator == other.fLocator;
0333       }
0334 
0335       std::uint32_t GetNElements() const { return fNElements; }
0336       void SetNElements(std::uint32_t n) { fNElements = n; }
0337 
0338       const RNTupleLocator &GetLocator() const { return fLocator; }
0339       RNTupleLocator &GetLocator() { return fLocator; }
0340       void SetLocator(const RNTupleLocator &locator) { fLocator = locator; }
0341 
0342       bool HasChecksum() const { return fHasChecksum; }
0343       void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; }
0344    };
0345 
0346    // clang-format off
0347    /**
0348    \class ROOT::RClusterDescriptor::RPageInfoExtended
0349    \ingroup NTuple
0350    \brief Additional information about a page in an in-memory RPageRange.
0351 
0352    Used by RPageRange::Find() to return information relative to the RPageRange.  This information is not stored on disk
0353    and we don't need to keep it in memory because it can be easily recomputed.
0354    */
0355    // clang-format on
0356    struct RPageInfoExtended final : RPageInfo {
0357    private:
0358       /// Index (in cluster) of the first element in page.
0359       ROOT::NTupleSize_t fFirstElementIndex = 0;
0360       /// Page number in the corresponding RPageRange.
0361       ROOT::NTupleSize_t fPageNumber = 0;
0362 
0363    public:
0364       RPageInfoExtended() = default;
0365       RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
0366          : RPageInfo(pageInfo), fFirstElementIndex(firstElementIndex), fPageNumber(pageNumber)
0367       {
0368       }
0369 
0370       ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0371       void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage) { fFirstElementIndex = firstInPage; }
0372 
0373       ROOT::NTupleSize_t GetPageNumber() const { return fPageNumber; }
0374       void SetPageNumber(ROOT::NTupleSize_t pageNumber) { fPageNumber = pageNumber; }
0375    };
0376 
0377    // clang-format off
0378    /**
0379    \class ROOT::RClusterDescriptor::RPageRange
0380    \ingroup NTuple
0381    \brief Records the partition of data into pages for a particular column in a particular cluster
0382    */
0383    // clang-format on
0384    class RPageRange final {
0385       friend class Internal::RClusterDescriptorBuilder;
0386 
0387    private:
0388       /// \brief Extend this RPageRange to fit the given RColumnRange.
0389       ///
0390       /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
0391       /// RPageInfos are constructed to contain as many elements of type `element` given a page size
0392       /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
0393       /// This function is used to make up RPageRanges for clusters that contain deferred columns.
0394       /// \return The number of column elements covered by the synthesized RPageInfos
0395       std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
0396                                          const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize);
0397 
0398       /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
0399       /// up to and including a given index. Used for binary search in Find().
0400       std::vector<ROOT::NTupleSize_t> fCumulativeNElements;
0401 
0402       ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0403       std::vector<RPageInfo> fPageInfos;
0404 
0405    public:
0406       RPageRange() = default;
0407       RPageRange(const RPageRange &other) = delete;
0408       RPageRange &operator=(const RPageRange &other) = delete;
0409       RPageRange(RPageRange &&other) = default;
0410       RPageRange &operator=(RPageRange &&other) = default;
0411 
0412       RPageRange Clone() const
0413       {
0414          RPageRange clone;
0415          clone.fPhysicalColumnId = fPhysicalColumnId;
0416          clone.fPageInfos = fPageInfos;
0417          clone.fCumulativeNElements = fCumulativeNElements;
0418          return clone;
0419       }
0420 
0421       /// Find the page in the RPageRange that contains the given element. The element must exist.
0422       RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const;
0423 
0424       ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
0425       void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
0426 
0427       const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
0428       std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
0429 
0430       bool operator==(const RPageRange &other) const
0431       {
0432          return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
0433       }
0434    };
0435 
0436 private:
0437    ROOT::DescriptorId_t fClusterId = ROOT::kInvalidDescriptorId;
0438    /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
0439    ROOT::NTupleSize_t fFirstEntryIndex = ROOT::kInvalidNTupleIndex;
0440    ROOT::NTupleSize_t fNEntries = ROOT::kInvalidNTupleIndex;
0441 
0442    std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
0443    std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
0444 
0445 public:
0446    class RColumnRangeIterable;
0447 
0448    RClusterDescriptor() = default;
0449    RClusterDescriptor(const RClusterDescriptor &other) = delete;
0450    RClusterDescriptor &operator=(const RClusterDescriptor &other) = delete;
0451    RClusterDescriptor(RClusterDescriptor &&other) = default;
0452    RClusterDescriptor &operator=(RClusterDescriptor &&other) = default;
0453 
0454    RClusterDescriptor Clone() const;
0455 
0456    bool operator==(const RClusterDescriptor &other) const;
0457 
0458    ROOT::DescriptorId_t GetId() const { return fClusterId; }
0459    ROOT::NTupleSize_t GetFirstEntryIndex() const { return fFirstEntryIndex; }
0460    ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
0461    const RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
0462    const RPageRange &GetPageRange(ROOT::DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
0463    /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
0464    RColumnRangeIterable GetColumnRangeIterable() const;
0465    bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
0466    {
0467       return fColumnRanges.find(physicalId) != fColumnRanges.end();
0468    }
0469    std::uint64_t GetNBytesOnStorage() const;
0470 };
0471 
0472 class RClusterDescriptor::RColumnRangeIterable final {
0473 private:
0474    const RClusterDescriptor &fDesc;
0475 
0476 public:
0477    class RIterator final {
0478    private:
0479       using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
0480       /// The wrapped map iterator
0481       Iter_t fIter;
0482 
0483    public:
0484       using iterator_category = std::forward_iterator_tag;
0485       using iterator = RIterator;
0486       using value_type = RColumnRange;
0487       using difference_type = std::ptrdiff_t;
0488       using pointer = const RColumnRange *;
0489       using reference = const RColumnRange &;
0490 
0491       RIterator(Iter_t iter) : fIter(iter) {}
0492       iterator operator++()
0493       {
0494          ++fIter;
0495          return *this;
0496       }
0497       reference operator*() { return fIter->second; }
0498       pointer operator->() { return &fIter->second; }
0499       bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
0500       bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
0501    };
0502 
0503    explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
0504 
0505    RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
0506    RIterator end() { return fDesc.fColumnRanges.cend(); }
0507    size_t size() { return fDesc.fColumnRanges.size(); }
0508 };
0509 
0510 // clang-format off
0511 /**
0512 \class ROOT::RClusterGroupDescriptor
0513 \ingroup NTuple
0514 \brief Clusters are bundled in cluster groups.
0515 
0516 Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
0517 Every RNTuple has at least one cluster group.  The clusters in a cluster group are ordered
0518 corresponding to their first entry number.
0519 */
0520 // clang-format on
0521 class RClusterGroupDescriptor final {
0522    friend class Internal::RClusterGroupDescriptorBuilder;
0523 
0524 private:
0525    ROOT::DescriptorId_t fClusterGroupId = ROOT::kInvalidDescriptorId;
0526    /// The cluster IDs can be empty if the corresponding page list is not loaded.
0527    /// Otherwise, cluster ids are sorted by first entry number.
0528    std::vector<ROOT::DescriptorId_t> fClusterIds;
0529    /// The page list that corresponds to the cluster group
0530    RNTupleLocator fPageListLocator;
0531    /// Uncompressed size of the page list
0532    std::uint64_t fPageListLength = 0;
0533    /// The minimum first entry number of the clusters in the cluster group
0534    std::uint64_t fMinEntry = 0;
0535    /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
0536    std::uint64_t fEntrySpan = 0;
0537    /// Number of clusters is always known even if the cluster IDs are not (yet) populated
0538    std::uint32_t fNClusters = 0;
0539 
0540 public:
0541    RClusterGroupDescriptor() = default;
0542    RClusterGroupDescriptor(const RClusterGroupDescriptor &other) = delete;
0543    RClusterGroupDescriptor &operator=(const RClusterGroupDescriptor &other) = delete;
0544    RClusterGroupDescriptor(RClusterGroupDescriptor &&other) = default;
0545    RClusterGroupDescriptor &operator=(RClusterGroupDescriptor &&other) = default;
0546 
0547    RClusterGroupDescriptor Clone() const;
0548    /// Creates a clone without the cluster IDs
0549    RClusterGroupDescriptor CloneSummary() const;
0550 
0551    bool operator==(const RClusterGroupDescriptor &other) const;
0552 
0553    ROOT::DescriptorId_t GetId() const { return fClusterGroupId; }
0554    std::uint32_t GetNClusters() const { return fNClusters; }
0555    RNTupleLocator GetPageListLocator() const { return fPageListLocator; }
0556    std::uint64_t GetPageListLength() const { return fPageListLength; }
0557    const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
0558    std::uint64_t GetMinEntry() const { return fMinEntry; }
0559    std::uint64_t GetEntrySpan() const { return fEntrySpan; }
0560    /// A cluster group is loaded in two stages. Stage one loads only the summary information.
0561    /// Stage two loads the list of cluster IDs.
0562    bool HasClusterDetails() const { return !fClusterIds.empty(); }
0563 };
0564 
0565 /// Used in RExtraTypeInfoDescriptor
0566 enum class EExtraTypeInfoIds {
0567    kInvalid,
0568    kStreamerInfo
0569 };
0570 
0571 // clang-format off
0572 /**
0573 \class ROOT::RExtraTypeInfoDescriptor
0574 \ingroup NTuple
0575 \brief Field specific extra type information from the header / extenstion header
0576 
0577 Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
0578 */
0579 // clang-format on
0580 class RExtraTypeInfoDescriptor final {
0581    friend class Internal::RExtraTypeInfoDescriptorBuilder;
0582 
0583 private:
0584    /// Specifies the meaning of the extra information
0585    EExtraTypeInfoIds fContentId = EExtraTypeInfoIds::kInvalid;
0586    /// Type version the extra type information is bound to
0587    std::uint32_t fTypeVersion = 0;
0588    /// The type name the extra information refers to; empty for RNTuple-wide extra information
0589    std::string fTypeName;
0590    /// The content format depends on the content ID and may be binary
0591    std::string fContent;
0592 
0593 public:
0594    RExtraTypeInfoDescriptor() = default;
0595    RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other) = delete;
0596    RExtraTypeInfoDescriptor &operator=(const RExtraTypeInfoDescriptor &other) = delete;
0597    RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other) = default;
0598    RExtraTypeInfoDescriptor &operator=(RExtraTypeInfoDescriptor &&other) = default;
0599 
0600    bool operator==(const RExtraTypeInfoDescriptor &other) const;
0601 
0602    RExtraTypeInfoDescriptor Clone() const;
0603 
0604    EExtraTypeInfoIds GetContentId() const { return fContentId; }
0605    std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0606    const std::string &GetTypeName() const { return fTypeName; }
0607    const std::string &GetContent() const { return fContent; }
0608 };
0609 
0610 // clang-format off
0611 /**
0612 \class ROOT::RNTupleDescriptor
0613 \ingroup NTuple
0614 \brief The on-storage metadata of an RNTuple
0615 
0616 Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
0617 potentially multiple page lists.
0618 The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
0619 The footer carries information about one or several cluster groups and links to their page lists.
0620 For every cluster group, a page list envelope stores cluster summaries and page locations.
0621 For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
0622 locations.
0623 
0624 The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
0625 for RNTuple objects (pages, clusters, ...).  It is supposed to be usable by all RPageStorage implementations.
0626 
0627 The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
0628 the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
0629 Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
0630 and backward compatibility when the metadata evolves.
0631 */
0632 // clang-format on
0633 class RNTupleDescriptor final {
0634    friend class Internal::RNTupleDescriptorBuilder;
0635    friend RNTupleDescriptor Internal::CloneDescriptorSchema(const RNTupleDescriptor &desc);
0636 
0637 public:
0638    class RHeaderExtension;
0639 
0640 private:
0641    /// The RNTuple name needs to be unique in a given storage location (file)
0642    std::string fName;
0643    /// Free text from the user
0644    std::string fDescription;
0645 
0646    ROOT::DescriptorId_t fFieldZeroId = ROOT::kInvalidDescriptorId; ///< Set by the descriptor builder
0647 
0648    std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
0649 
0650    std::set<unsigned int> fFeatureFlags;
0651    std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
0652    std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
0653 
0654    std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
0655    std::unique_ptr<RHeaderExtension> fHeaderExtension;
0656 
0657    //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
0658    //// (see CloneSchema())
0659 
0660    std::uint64_t fOnDiskHeaderSize = 0;    ///< Set by the descriptor builder when deserialized
0661    std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
0662    std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
0663 
0664    std::uint64_t fNEntries = 0;  ///< Updated by the descriptor builder when the cluster groups are added
0665    std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
0666 
0667    /// \brief The generation of the descriptor
0668    ///
0669    /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
0670    /// active page locations.  During the lifetime of the descriptor, page location information for clusters
0671    /// can be added or removed.  When this happens, the generation should be increased, so that users of the
0672    /// descriptor know that the information changed.  The generation is increased, e.g., by the page source's
0673    /// exclusive lock guard around the descriptor.  It is used, e.g., by the descriptor cache in RNTupleReader.
0674    std::uint64_t fGeneration = 0;
0675 
0676    std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
0677    /// References cluster groups sorted by entry range and thus allows for binary search.
0678    /// Note that this list is empty during the descriptor building process and will only be
0679    /// created when the final descriptor is extracted from the builder.
0680    std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
0681    /// Potentially a subset of all the available clusters
0682    std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
0683 
0684    // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
0685    ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const;
0686 
0687    /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
0688    /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
0689    /// when merging two RNTuples.
0690    RNTupleDescriptor CloneSchema() const;
0691 
0692 public:
0693    static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
0694 
0695    class RColumnDescriptorIterable;
0696    class RFieldDescriptorIterable;
0697    class RClusterGroupDescriptorIterable;
0698    class RClusterDescriptorIterable;
0699    class RExtraTypeInfoDescriptorIterable;
0700 
0701    /// Modifiers passed to CreateModel()
0702    struct RCreateModelOptions {
0703    private:
0704       /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
0705       /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
0706       bool fReconstructProjections = false;
0707       /// If this option is enabled, the model will be created and all fields containing unknown data (directly
0708       /// or indirectly) will be skipped instead.
0709       /// Normally creating a model will fail if any of the reconstructed fields contains an unknown column type.
0710       bool fForwardCompatible = false;
0711       /// If true, the model will be created without a default entry (bare model).
0712       bool fCreateBare = false;
0713       /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
0714       /// as record fields from the on-disk information; otherwise, they will cause an error.
0715       bool fEmulateUnknownTypes = false;
0716 
0717    public:
0718       RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
0719 
0720       void SetReconstructProjections(bool v) { fReconstructProjections = v; }
0721       bool GetReconstructProjections() const { return fReconstructProjections; }
0722 
0723       void SetForwardCompatible(bool v) { fForwardCompatible = v; }
0724       bool GetForwardCompatible() const { return fForwardCompatible; }
0725 
0726       void SetCreateBare(bool v) { fCreateBare = v; }
0727       bool GetCreateBare() const { return fCreateBare; }
0728 
0729       void SetEmulateUnknownTypes(bool v) { fEmulateUnknownTypes = v; }
0730       bool GetEmulateUnknownTypes() const { return fEmulateUnknownTypes; }
0731    };
0732 
0733    RNTupleDescriptor() = default;
0734    RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
0735    RNTupleDescriptor &operator=(const RNTupleDescriptor &other) = delete;
0736    RNTupleDescriptor(RNTupleDescriptor &&other) = default;
0737    RNTupleDescriptor &operator=(RNTupleDescriptor &&other) = default;
0738 
0739    RNTupleDescriptor Clone() const;
0740 
0741    bool operator==(const RNTupleDescriptor &other) const;
0742 
0743    std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
0744    std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
0745    std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
0746 
0747    const RFieldDescriptor &GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
0748    {
0749       return fFieldDescriptors.at(fieldId);
0750    }
0751    const RColumnDescriptor &GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
0752    {
0753       return fColumnDescriptors.at(columnId);
0754    }
0755    const RClusterGroupDescriptor &GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
0756    {
0757       return fClusterGroupDescriptors.at(clusterGroupId);
0758    }
0759    const RClusterDescriptor &GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
0760    {
0761       return fClusterDescriptors.at(clusterId);
0762    }
0763 
0764    RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
0765    RFieldDescriptorIterable
0766    GetFieldIterable(const RFieldDescriptor &fieldDesc,
0767                     const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0768    RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
0769    RFieldDescriptorIterable
0770    GetFieldIterable(ROOT::DescriptorId_t fieldId,
0771                     const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0772 
0773    RFieldDescriptorIterable GetTopLevelFields() const;
0774    RFieldDescriptorIterable
0775    GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0776 
0777    RColumnDescriptorIterable GetColumnIterable() const;
0778    RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
0779    RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
0780 
0781    RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
0782 
0783    RClusterDescriptorIterable GetClusterIterable() const;
0784 
0785    RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
0786 
0787    const std::string &GetName() const { return fName; }
0788    const std::string &GetDescription() const { return fDescription; }
0789 
0790    std::size_t GetNFields() const { return fFieldDescriptors.size(); }
0791    std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
0792    std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
0793    std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
0794    std::size_t GetNClusters() const { return fNClusters; }
0795    std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
0796    std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
0797 
0798    /// We know the number of entries from adding the cluster summaries
0799    ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
0800    ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const;
0801 
0802    /// Returns the logical parent of all top-level RNTuple data fields.
0803    ROOT::DescriptorId_t GetFieldZeroId() const { return fFieldZeroId; }
0804    const RFieldDescriptor &GetFieldZero() const { return GetFieldDescriptor(GetFieldZeroId()); }
0805    ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const;
0806    /// Searches for a top-level field
0807    ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
0808    ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
0809                                             std::uint16_t representationIndex) const;
0810    ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
0811                                              std::uint16_t representationIndex) const;
0812    ROOT::DescriptorId_t FindClusterId(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t index) const;
0813    ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const;
0814    ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const;
0815 
0816    /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
0817    /// In case of invalid field ID, an empty string is returned.
0818    std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const;
0819 
0820    bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
0821    std::vector<std::uint64_t> GetFeatureFlags() const;
0822 
0823    /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
0824    const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
0825 
0826    /// Methods to load and drop cluster group details (cluster IDs and page locations)
0827    RResult<void>
0828    AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
0829    RResult<void> DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId);
0830 
0831    std::uint64_t GetGeneration() const { return fGeneration; }
0832    void IncGeneration() { fGeneration++; }
0833 
0834    /// Re-create the C++ model from the stored metadata
0835    std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
0836    void PrintInfo(std::ostream &output) const;
0837 };
0838 
0839 // clang-format off
0840 /**
0841 \class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
0842 \ingroup NTuple
0843 \brief Used to loop over a field's associated columns
0844 */
0845 // clang-format on
0846 class RNTupleDescriptor::RColumnDescriptorIterable final {
0847 private:
0848    /// The associated RNTuple for this range.
0849    const RNTupleDescriptor &fNTuple;
0850    /// The descriptor ids of the columns ordered by field, representation, and column index
0851    std::vector<ROOT::DescriptorId_t> fColumns = {};
0852 
0853 public:
0854    class RIterator final {
0855    private:
0856       /// The enclosing range's RNTuple.
0857       const RNTupleDescriptor &fNTuple;
0858       /// The enclosing range's descriptor id list.
0859       const std::vector<ROOT::DescriptorId_t> &fColumns;
0860       std::size_t fIndex = 0;
0861 
0862    public:
0863       using iterator_category = std::forward_iterator_tag;
0864       using iterator = RIterator;
0865       using value_type = RFieldDescriptor;
0866       using difference_type = std::ptrdiff_t;
0867       using pointer = const RColumnDescriptor *;
0868       using reference = const RColumnDescriptor &;
0869 
0870       RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
0871          : fNTuple(ntuple), fColumns(columns), fIndex(index)
0872       {
0873       }
0874       iterator operator++()
0875       {
0876          ++fIndex;
0877          return *this;
0878       }
0879       reference operator*() { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0880       pointer operator->() { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0881       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0882       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0883    };
0884 
0885    RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
0886    RColumnDescriptorIterable(const RNTupleDescriptor &ntuple);
0887 
0888    RIterator begin() { return RIterator(fNTuple, fColumns, 0); }
0889    RIterator end() { return RIterator(fNTuple, fColumns, fColumns.size()); }
0890    size_t size() { return fColumns.size(); }
0891 };
0892 
0893 // clang-format off
0894 /**
0895 \class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
0896 \ingroup NTuple
0897 \brief Used to loop over a field's child fields
0898 */
0899 // clang-format on
0900 class RNTupleDescriptor::RFieldDescriptorIterable final {
0901 private:
0902    /// The associated RNTuple for this range.
0903    const RNTupleDescriptor &fNTuple;
0904    /// The descriptor IDs of the child fields. These may be sorted using
0905    /// a comparison function.
0906    std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
0907 
0908 public:
0909    class RIterator final {
0910    private:
0911       /// The enclosing range's RNTuple.
0912       const RNTupleDescriptor &fNTuple;
0913       /// The enclosing range's descriptor id list.
0914       const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
0915       std::size_t fIndex = 0;
0916 
0917    public:
0918       using iterator_category = std::forward_iterator_tag;
0919       using iterator = RIterator;
0920       using value_type = RFieldDescriptor;
0921       using difference_type = std::ptrdiff_t;
0922       using pointer = RFieldDescriptor *;
0923       using reference = const RFieldDescriptor &;
0924 
0925       RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
0926                 std::size_t index)
0927          : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
0928       {
0929       }
0930       iterator operator++()
0931       {
0932          ++fIndex;
0933          return *this;
0934       }
0935       reference operator*() { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
0936       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0937       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0938    };
0939    RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
0940       : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
0941    {
0942    }
0943    /// Sort the range using an arbitrary comparison function.
0944    RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field,
0945                             const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
0946       : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
0947    {
0948       std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
0949    }
0950    RIterator begin() { return RIterator(fNTuple, fFieldChildren, 0); }
0951    RIterator end() { return RIterator(fNTuple, fFieldChildren, fFieldChildren.size()); }
0952 };
0953 
0954 // clang-format off
0955 /**
0956 \class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
0957 \ingroup NTuple
0958 \brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
0959 
0960 Enumerate all cluster group IDs from the descriptor.  No specific order can be assumed.
0961 */
0962 // clang-format on
0963 class RNTupleDescriptor::RClusterGroupDescriptorIterable final {
0964 private:
0965    /// The associated RNTuple for this range.
0966    const RNTupleDescriptor &fNTuple;
0967 
0968 public:
0969    class RIterator final {
0970    private:
0971       /// The enclosing range's RNTuple.
0972       const RNTupleDescriptor &fNTuple;
0973       std::size_t fIndex = 0;
0974 
0975    public:
0976       using iterator_category = std::forward_iterator_tag;
0977       using iterator = RIterator;
0978       using value_type = RClusterGroupDescriptor;
0979       using difference_type = std::ptrdiff_t;
0980       using pointer = RClusterGroupDescriptor *;
0981       using reference = const RClusterGroupDescriptor &;
0982 
0983       RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
0984       iterator operator++()
0985       {
0986          ++fIndex;
0987          return *this;
0988       }
0989       reference operator*()
0990       {
0991          auto it = fNTuple.fClusterGroupDescriptors.begin();
0992          std::advance(it, fIndex);
0993          return it->second;
0994       }
0995       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0996       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0997    };
0998 
0999    RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1000    RIterator begin() { return RIterator(fNTuple, 0); }
1001    RIterator end() { return RIterator(fNTuple, fNTuple.GetNClusterGroups()); }
1002 };
1003 
1004 // clang-format off
1005 /**
1006 \class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1007 \ingroup NTuple
1008 \brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1009 
1010 Enumerate all cluster IDs from all cluster descriptors.  No specific order can be assumed, use
1011 RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1012 clusters by entry number.
1013 */
1014 // clang-format on
1015 class RNTupleDescriptor::RClusterDescriptorIterable final {
1016 private:
1017    /// The associated RNTuple for this range.
1018    const RNTupleDescriptor &fNTuple;
1019 
1020 public:
1021    class RIterator final {
1022    private:
1023       /// The enclosing range's RNTuple.
1024       const RNTupleDescriptor &fNTuple;
1025       std::size_t fIndex = 0;
1026 
1027    public:
1028       using iterator_category = std::forward_iterator_tag;
1029       using iterator = RIterator;
1030       using value_type = RClusterDescriptor;
1031       using difference_type = std::ptrdiff_t;
1032       using pointer = RClusterDescriptor *;
1033       using reference = const RClusterDescriptor &;
1034 
1035       RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
1036       iterator operator++()
1037       {
1038          ++fIndex;
1039          return *this;
1040       }
1041       reference operator*()
1042       {
1043          auto it = fNTuple.fClusterDescriptors.begin();
1044          std::advance(it, fIndex);
1045          return it->second;
1046       }
1047       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1048       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1049    };
1050 
1051    RClusterDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1052    RIterator begin() { return RIterator(fNTuple, 0); }
1053    RIterator end() { return RIterator(fNTuple, fNTuple.GetNActiveClusters()); }
1054 };
1055 
1056 // clang-format off
1057 /**
1058 \class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1059 \ingroup NTuple
1060 \brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1061 */
1062 // clang-format on
1063 class RNTupleDescriptor::RExtraTypeInfoDescriptorIterable final {
1064 private:
1065    /// The associated RNTuple for this range.
1066    const RNTupleDescriptor &fNTuple;
1067 
1068 public:
1069    class RIterator final {
1070    private:
1071       /// The enclosing range's RNTuple.
1072       const RNTupleDescriptor &fNTuple;
1073       std::size_t fIndex = 0;
1074 
1075    public:
1076       using iterator_category = std::forward_iterator_tag;
1077       using iterator = RIterator;
1078       using value_type = RExtraTypeInfoDescriptor;
1079       using difference_type = std::ptrdiff_t;
1080       using pointer = RExtraTypeInfoDescriptor *;
1081       using reference = const RExtraTypeInfoDescriptor &;
1082 
1083       RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
1084       iterator operator++()
1085       {
1086          ++fIndex;
1087          return *this;
1088       }
1089       reference operator*()
1090       {
1091          auto it = fNTuple.fExtraTypeInfoDescriptors.begin();
1092          std::advance(it, fIndex);
1093          return *it;
1094       }
1095       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1096       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1097    };
1098 
1099    RExtraTypeInfoDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1100    RIterator begin() { return RIterator(fNTuple, 0); }
1101    RIterator end() { return RIterator(fNTuple, fNTuple.GetNExtraTypeInfos()); }
1102 };
1103 
1104 // clang-format off
1105 /**
1106 \class ROOT::RNTupleDescriptor::RHeaderExtension
1107 \ingroup NTuple
1108 \brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1109 */
1110 // clang-format on
1111 class RNTupleDescriptor::RHeaderExtension final {
1112    friend class Internal::RNTupleDescriptorBuilder;
1113 
1114 private:
1115    /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1116    /// the fields in that order.
1117    std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1118    /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1119    /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1120    /// belongs to a field of the regular header that gets extended by additional column representations.
1121    std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1122    /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1123    /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1124    /// these columns need to be serialized in the extension header without re-serializing the field.
1125    std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1126    /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1127    std::uint32_t fNLogicalColumns = 0;
1128    std::uint32_t fNPhysicalColumns = 0;
1129 
1130    /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1131    /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1132    /// Descriptor alongside non-extended fields.
1133    void MarkExtendedField(const RFieldDescriptor &fieldDesc)
1134    {
1135       fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1136       fFieldIdsLookup.insert(fieldDesc.GetId());
1137    }
1138 
1139    /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1140    /// added through late model extension as an additional representation of an existing column). Note that the column
1141    /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1142    void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
1143    {
1144       fNLogicalColumns++;
1145       if (!columnDesc.IsAliasColumn())
1146          fNPhysicalColumns++;
1147       if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1148          fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1149       }
1150    }
1151 
1152 public:
1153    std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1154    std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1155    std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1156    const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1157    {
1158       return fExtendedColumnRepresentations;
1159    }
1160    /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1161    /// of their addition.
1162    /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1163    /// the field is not yet linked into the schema tree.
1164    std::vector<ROOT::DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1165 
1166    bool ContainsField(ROOT::DescriptorId_t fieldId) const
1167    {
1168       return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1169    }
1170    bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
1171    {
1172       return std::find(fExtendedColumnRepresentations.begin(), fExtendedColumnRepresentations.end(), columnId) !=
1173              fExtendedColumnRepresentations.end();
1174    }
1175 };
1176 
1177 namespace Internal {
1178 
1179 // clang-format off
1180 /**
1181 \class ROOT::Internal::RColumnDescriptorBuilder
1182 \ingroup NTuple
1183 \brief A helper class for piece-wise construction of an RColumnDescriptor
1184 
1185 Dangling column descriptors can become actual descriptors when added to an
1186 RNTupleDescriptorBuilder instance and then linked to their fields.
1187 */
1188 // clang-format on
1189 class RColumnDescriptorBuilder final {
1190 private:
1191    RColumnDescriptor fColumn = RColumnDescriptor();
1192 
1193 public:
1194    /// Make an empty column descriptor builder.
1195    RColumnDescriptorBuilder() = default;
1196 
1197    RColumnDescriptorBuilder &LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
1198    {
1199       fColumn.fLogicalColumnId = logicalColumnId;
1200       return *this;
1201    }
1202    RColumnDescriptorBuilder &PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
1203    {
1204       fColumn.fPhysicalColumnId = physicalColumnId;
1205       return *this;
1206    }
1207    RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1208    {
1209       fColumn.fBitsOnStorage = bitsOnStorage;
1210       return *this;
1211    }
1212    RColumnDescriptorBuilder &Type(ROOT::ENTupleColumnType type)
1213    {
1214       fColumn.fType = type;
1215       return *this;
1216    }
1217    RColumnDescriptorBuilder &FieldId(ROOT::DescriptorId_t fieldId)
1218    {
1219       fColumn.fFieldId = fieldId;
1220       return *this;
1221    }
1222    RColumnDescriptorBuilder &Index(std::uint32_t index)
1223    {
1224       fColumn.fIndex = index;
1225       return *this;
1226    }
1227    RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1228    {
1229       fColumn.fFirstElementIndex = firstElementIdx;
1230       return *this;
1231    }
1232    RColumnDescriptorBuilder &SetSuppressedDeferred()
1233    {
1234       R__ASSERT(fColumn.fFirstElementIndex != 0);
1235       if (fColumn.fFirstElementIndex > 0)
1236          fColumn.fFirstElementIndex = -fColumn.fFirstElementIndex;
1237       return *this;
1238    }
1239    RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1240    {
1241       fColumn.fRepresentationIndex = representationIndex;
1242       return *this;
1243    }
1244    RColumnDescriptorBuilder &ValueRange(double min, double max)
1245    {
1246       fColumn.fValueRange = {min, max};
1247       return *this;
1248    }
1249    RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1250    {
1251       fColumn.fValueRange = valueRange;
1252       return *this;
1253    }
1254    ROOT::DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
1255    ROOT::DescriptorId_t GetRepresentationIndex() const { return fColumn.fRepresentationIndex; }
1256    /// Attempt to make a column descriptor. This may fail if the column
1257    /// was not given enough information to make a proper descriptor.
1258    RResult<RColumnDescriptor> MakeDescriptor() const;
1259 };
1260 
1261 // clang-format off
1262 /**
1263 \class ROOT::Internal::RFieldDescriptorBuilder
1264 \ingroup NTuple
1265 \brief A helper class for piece-wise construction of an RFieldDescriptor
1266 
1267 Dangling field descriptors describe a single field in isolation. They are
1268 missing the necessary relationship information (parent field, any child fields)
1269 required to describe a real RNTuple field.
1270 
1271 Dangling field descriptors can only become actual descriptors when added to an
1272 RNTupleDescriptorBuilder instance and then linked to other fields.
1273 */
1274 // clang-format on
1275 class RFieldDescriptorBuilder final {
1276 private:
1277    RFieldDescriptor fField = RFieldDescriptor();
1278 
1279 public:
1280    /// Make an empty dangling field descriptor.
1281    RFieldDescriptorBuilder() = default;
1282    /// Make a new RFieldDescriptorBuilder based off an existing descriptor.
1283    /// Relationship information is lost during the conversion to a
1284    /// dangling descriptor:
1285    /// * Parent id is reset to an invalid id.
1286    /// * Field children ids are forgotten.
1287    ///
1288    /// These properties must be set using RNTupleDescriptorBuilder::AddFieldLink().
1289    explicit RFieldDescriptorBuilder(const RFieldDescriptor &fieldDesc);
1290 
1291    /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1292    static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field);
1293 
1294    RFieldDescriptorBuilder &FieldId(ROOT::DescriptorId_t fieldId)
1295    {
1296       fField.fFieldId = fieldId;
1297       return *this;
1298    }
1299    RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1300    {
1301       fField.fFieldVersion = fieldVersion;
1302       return *this;
1303    }
1304    RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1305    {
1306       fField.fTypeVersion = typeVersion;
1307       return *this;
1308    }
1309    RFieldDescriptorBuilder &ParentId(ROOT::DescriptorId_t id)
1310    {
1311       fField.fParentId = id;
1312       return *this;
1313    }
1314    RFieldDescriptorBuilder &ProjectionSourceId(ROOT::DescriptorId_t id)
1315    {
1316       fField.fProjectionSourceId = id;
1317       return *this;
1318    }
1319    RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1320    {
1321       fField.fFieldName = fieldName;
1322       return *this;
1323    }
1324    RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1325    {
1326       fField.fFieldDescription = fieldDescription;
1327       return *this;
1328    }
1329    RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1330    {
1331       fField.fTypeName = typeName;
1332       return *this;
1333    }
1334    RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1335    {
1336       fField.fTypeAlias = typeAlias;
1337       return *this;
1338    }
1339    RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1340    {
1341       fField.fNRepetitions = nRepetitions;
1342       return *this;
1343    }
1344    RFieldDescriptorBuilder &Structure(const ROOT::ENTupleStructure &structure)
1345    {
1346       fField.fStructure = structure;
1347       return *this;
1348    }
1349    RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1350    {
1351       fField.fTypeChecksum = typeChecksum;
1352       return *this;
1353    }
1354    ROOT::DescriptorId_t GetParentId() const { return fField.fParentId; }
1355    /// Attempt to make a field descriptor. This may fail if the dangling field
1356    /// was not given enough information to make a proper descriptor.
1357    RResult<RFieldDescriptor> MakeDescriptor() const;
1358 };
1359 
1360 // clang-format off
1361 /**
1362 \class ROOT::Internal::RClusterDescriptorBuilder
1363 \ingroup NTuple
1364 \brief A helper class for piece-wise construction of an RClusterDescriptor
1365 
1366 The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1367 piecewise addition of page locations.
1368 */
1369 // clang-format on
1370 class RClusterDescriptorBuilder final {
1371 private:
1372    RClusterDescriptor fCluster;
1373 
1374 public:
1375    RClusterDescriptorBuilder &ClusterId(ROOT::DescriptorId_t clusterId)
1376    {
1377       fCluster.fClusterId = clusterId;
1378       return *this;
1379    }
1380 
1381    RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1382    {
1383       fCluster.fFirstEntryIndex = firstEntryIndex;
1384       return *this;
1385    }
1386 
1387    RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1388    {
1389       fCluster.fNEntries = nEntries;
1390       return *this;
1391    }
1392 
1393    RResult<void> CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1394                                    std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1395 
1396    /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1397    /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1398    /// MarkSuppressedColumnRange() took place.
1399    RResult<void> MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId);
1400 
1401    /// Sets the first element index and number of elements for all the suppressed column ranges.
1402    /// The information is taken from the corresponding columns from the primary representation.
1403    /// Needs to be called when all the columns (suppressed and regular) where added.
1404    RResult<void> CommitSuppressedColumnRanges(const RNTupleDescriptor &desc);
1405 
1406    /// Add column and page ranges for columns created during late model extension missing in this cluster.  The locator
1407    /// type for the synthesized page ranges is `kTypePageZero`.  All the page sources must be able to populate the
1408    /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1409    /// should happen before calling this function.
1410    RClusterDescriptorBuilder &AddExtendedColumnRanges(const RNTupleDescriptor &desc);
1411 
1412    const RClusterDescriptor::RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId)
1413    {
1414       return fCluster.GetColumnRange(physicalId);
1415    }
1416 
1417    /// Move out the full cluster descriptor including page locations
1418    RResult<RClusterDescriptor> MoveDescriptor();
1419 };
1420 
1421 // clang-format off
1422 /**
1423 \class ROOT::Internal::RClusterGroupDescriptorBuilder
1424 \ingroup NTuple
1425 \brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1426 */
1427 // clang-format on
1428 class RClusterGroupDescriptorBuilder final {
1429 private:
1430    RClusterGroupDescriptor fClusterGroup;
1431 
1432 public:
1433    RClusterGroupDescriptorBuilder() = default;
1434    static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc);
1435 
1436    RClusterGroupDescriptorBuilder &ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
1437    {
1438       fClusterGroup.fClusterGroupId = clusterGroupId;
1439       return *this;
1440    }
1441    RClusterGroupDescriptorBuilder &PageListLocator(const RNTupleLocator &pageListLocator)
1442    {
1443       fClusterGroup.fPageListLocator = pageListLocator;
1444       return *this;
1445    }
1446    RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1447    {
1448       fClusterGroup.fPageListLength = pageListLength;
1449       return *this;
1450    }
1451    RClusterGroupDescriptorBuilder &MinEntry(std::uint64_t minEntry)
1452    {
1453       fClusterGroup.fMinEntry = minEntry;
1454       return *this;
1455    }
1456    RClusterGroupDescriptorBuilder &EntrySpan(std::uint64_t entrySpan)
1457    {
1458       fClusterGroup.fEntrySpan = entrySpan;
1459       return *this;
1460    }
1461    RClusterGroupDescriptorBuilder &NClusters(std::uint32_t nClusters)
1462    {
1463       fClusterGroup.fNClusters = nClusters;
1464       return *this;
1465    }
1466    void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1467    {
1468       if (clusterIds.size() != fClusterGroup.GetNClusters())
1469          throw RException(R__FAIL("mismatch of number of clusters"));
1470       fClusterGroup.fClusterIds = clusterIds;
1471    }
1472 
1473    RResult<RClusterGroupDescriptor> MoveDescriptor();
1474 };
1475 
1476 // clang-format off
1477 /**
1478 \class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1479 \ingroup NTuple
1480 \brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1481 */
1482 // clang-format on
1483 class RExtraTypeInfoDescriptorBuilder final {
1484 private:
1485    RExtraTypeInfoDescriptor fExtraTypeInfo;
1486 
1487 public:
1488    RExtraTypeInfoDescriptorBuilder() = default;
1489 
1490    RExtraTypeInfoDescriptorBuilder &ContentId(EExtraTypeInfoIds contentId)
1491    {
1492       fExtraTypeInfo.fContentId = contentId;
1493       return *this;
1494    }
1495    RExtraTypeInfoDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1496    {
1497       fExtraTypeInfo.fTypeVersion = typeVersion;
1498       return *this;
1499    }
1500    RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1501    {
1502       fExtraTypeInfo.fTypeName = typeName;
1503       return *this;
1504    }
1505    RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1506    {
1507       fExtraTypeInfo.fContent = content;
1508       return *this;
1509    }
1510 
1511    RResult<RExtraTypeInfoDescriptor> MoveDescriptor();
1512 };
1513 
1514 // clang-format off
1515 /**
1516 \class ROOT::Internal::RNTupleDescriptorBuilder
1517 \ingroup NTuple
1518 \brief A helper class for piece-wise construction of an RNTupleDescriptor
1519 
1520 Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1521 */
1522 // clang-format on
1523 class RNTupleDescriptorBuilder final {
1524 private:
1525    RNTupleDescriptor fDescriptor;
1526    RResult<void> EnsureFieldExists(ROOT::DescriptorId_t fieldId) const;
1527 
1528 public:
1529    /// Checks whether invariants hold:
1530    /// * RNTuple name is valid
1531    /// * Fields have valid parents
1532    /// * Number of columns is constant across column representations
1533    RResult<void> EnsureValidDescriptor() const;
1534    const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
1535    RNTupleDescriptor MoveDescriptor();
1536 
1537    /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1538    /// This resets the builder's descriptor.
1539    void SetSchemaFromExisting(const RNTupleDescriptor &descriptor);
1540 
1541    void SetNTuple(const std::string_view name, const std::string_view description);
1542    void SetFeature(unsigned int flag);
1543 
1544    void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1545    void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1546    /// The real footer size also include the page list envelopes
1547    void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1548 
1549    void AddField(const RFieldDescriptor &fieldDesc);
1550    RResult<void> AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId);
1551    RResult<void> AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId);
1552 
1553    // The field that the column belongs to has to be already available. For fields with multiple columns,
1554    // the columns need to be added in order of the column index
1555    RResult<void> AddColumn(RColumnDescriptor &&columnDesc);
1556 
1557    RResult<void> AddClusterGroup(RClusterGroupDescriptor &&clusterGroup);
1558    RResult<void> AddCluster(RClusterDescriptor &&clusterDesc);
1559 
1560    RResult<void> AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1561    void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1562 
1563    /// Clears so-far stored clusters, fields, and columns and return to a pristine RNTupleDescriptor
1564    void Reset();
1565 
1566    /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1567    /// annotated as begin part of the header extension.
1568    void BeginHeaderExtension();
1569 
1570    /// \brief Shift column IDs of alias columns by `offset`
1571    ///
1572    /// If the descriptor is constructed in pieces consisting of physical and alias columns
1573    /// (regular and projected fields), the natural column order would be
1574    ///   - Physical and alias columns of piece one
1575    ///   - Physical and alias columns of piece two
1576    ///   - etc.
1577    /// What we want, however, are first all physical column IDs and then all alias column IDs.
1578    /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1579    /// column IDs in the projected field descriptors.  In this way, a new piece of physical and alias columns can
1580    /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1581    ///   - Physical columns of piece one
1582    ///   - Physical columns of piece two
1583    ///   - ...
1584    //    - Logical columns of piece one
1585    ///   - Logical columns of piece two
1586    ///   - ...
1587    void ShiftAliasColumns(std::uint32_t offset);
1588 
1589    /// Get the streamer info records for custom classes. Currently requires the corresponding dictionaries to be loaded.
1590    ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t BuildStreamerInfos() const;
1591 };
1592 
1593 inline RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
1594 {
1595    return desc.CloneSchema();
1596 }
1597 
1598 } // namespace Internal
1599 } // namespace ROOT
1600 
1601 #endif // ROOT_RNTupleDescriptor