Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-17 10:26:20

0001 /// \file ROOT/RNTupleDescriptor.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
0005 /// \date 2018-07-19
0006 
0007 /*************************************************************************
0008  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0009  * All rights reserved.                                                  *
0010  *                                                                       *
0011  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0012  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0013  *************************************************************************/
0014 
0015 #ifndef ROOT_RNTupleDescriptor
0016 #define ROOT_RNTupleDescriptor
0017 
0018 #include <ROOT/RCreateFieldOptions.hxx>
0019 #include <ROOT/RError.hxx>
0020 #include <ROOT/RNTupleSerialize.hxx>
0021 #include <ROOT/RNTupleTypes.hxx>
0022 #include <ROOT/RSpan.hxx>
0023 
0024 #include <TError.h>
0025 
0026 #include <algorithm>
0027 #include <chrono>
0028 #include <cmath>
0029 #include <functional>
0030 #include <iterator>
0031 #include <map>
0032 #include <memory>
0033 #include <optional>
0034 #include <ostream>
0035 #include <vector>
0036 #include <set>
0037 #include <string>
0038 #include <string_view>
0039 #include <unordered_map>
0040 #include <unordered_set>
0041 
0042 namespace ROOT {
0043 
0044 class RFieldBase;
0045 class RNTupleModel;
0046 
0047 namespace Internal {
0048 class RColumnElementBase;
0049 }
0050 
0051 class RNTupleDescriptor;
0052 
0053 namespace Internal {
0054 class RColumnDescriptorBuilder;
0055 class RClusterDescriptorBuilder;
0056 class RClusterGroupDescriptorBuilder;
0057 class RExtraTypeInfoDescriptorBuilder;
0058 class RFieldDescriptorBuilder;
0059 class RNTupleDescriptorBuilder;
0060 
0061 RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc);
0062 struct RNTupleClusterBoundaries {
0063    ROOT::NTupleSize_t fFirstEntry = kInvalidNTupleIndex;
0064    ROOT::NTupleSize_t fLastEntryPlusOne = kInvalidNTupleIndex;
0065 };
0066 
0067 std::vector<ROOT::Internal::RNTupleClusterBoundaries> GetClusterBoundaries(const RNTupleDescriptor &desc);
0068 } // namespace Internal
0069 
0070 namespace Experimental {
0071 
0072 // clang-format off
0073 /**
0074 \class ROOT::Experimental::RNTupleAttrSetDescriptor
0075 \ingroup NTuple
0076 \brief Metadata stored for every Attribute Set linked to an RNTuple.
0077 */
0078 // clang-format on
0079 class RNTupleAttrSetDescriptor final {
0080    friend class Experimental::Internal::RNTupleAttrSetDescriptorBuilder;
0081 
0082    std::uint16_t fSchemaVersionMajor = 0;
0083    std::uint16_t fSchemaVersionMinor = 0;
0084    std::uint32_t fAnchorLength = 0; ///< uncompressed size of the linked anchor
0085    // The locator of the AttributeSet anchor.
0086    // In case of kTypeFile, it points to the beginning of the Anchor's payload.
0087    // NOTE: Only kTypeFile is supported at the moment.
0088    RNTupleLocator fAnchorLocator;
0089    std::string fName;
0090 
0091 public:
0092    RNTupleAttrSetDescriptor() = default;
0093    RNTupleAttrSetDescriptor(const RNTupleAttrSetDescriptor &other) = delete;
0094    RNTupleAttrSetDescriptor &operator=(const RNTupleAttrSetDescriptor &other) = delete;
0095    RNTupleAttrSetDescriptor(RNTupleAttrSetDescriptor &&other) = default;
0096    RNTupleAttrSetDescriptor &operator=(RNTupleAttrSetDescriptor &&other) = default;
0097 
0098    bool operator==(const RNTupleAttrSetDescriptor &other) const;
0099    bool operator!=(const RNTupleAttrSetDescriptor &other) const { return !(*this == other); }
0100 
0101    const std::string &GetName() const { return fName; }
0102    std::uint16_t GetSchemaVersionMajor() const { return fSchemaVersionMajor; }
0103    std::uint16_t GetSchemaVersionMinor() const { return fSchemaVersionMinor; }
0104    std::uint32_t GetAnchorLength() const { return fAnchorLength; }
0105    const RNTupleLocator &GetAnchorLocator() const { return fAnchorLocator; }
0106 
0107    RNTupleAttrSetDescriptor Clone() const;
0108 };
0109 
0110 class RNTupleAttrSetDescriptorIterable;
0111 
0112 } // namespace Experimental
0113 
0114 // clang-format off
0115 /**
0116 \class ROOT::RFieldDescriptor
0117 \ingroup NTuple
0118 \brief Metadata stored for every field of an RNTuple
0119 */
0120 // clang-format on
0121 class RFieldDescriptor final {
0122    friend class Internal::RNTupleDescriptorBuilder;
0123    friend class Internal::RFieldDescriptorBuilder;
0124 
0125 private:
0126    ROOT::DescriptorId_t fFieldId = ROOT::kInvalidDescriptorId;
0127    /// The version of the C++-type-to-column translation mechanics
0128    std::uint32_t fFieldVersion = 0;
0129    /// The version of the C++ type itself
0130    std::uint32_t fTypeVersion = 0;
0131    /// The leaf name, not including parent fields
0132    std::string fFieldName;
0133    /// Free text set by the user
0134    std::string fFieldDescription;
0135    /// The C++ type that was used when writing the field
0136    std::string fTypeName;
0137    /// A typedef or using directive that resolved to the type name during field creation
0138    std::string fTypeAlias;
0139    /// The number of elements per entry for fixed-size arrays
0140    std::uint64_t fNRepetitions = 0;
0141    /// The structural information carried by this field in the data model tree
0142    ROOT::ENTupleStructure fStructure = ROOT::ENTupleStructure::kInvalid;
0143    /// Establishes sub field relationships, such as classes and collections
0144    ROOT::DescriptorId_t fParentId = ROOT::kInvalidDescriptorId;
0145    /// For projected fields, the source field ID
0146    ROOT::DescriptorId_t fProjectionSourceId = ROOT::kInvalidDescriptorId;
0147    /// The pointers in the other direction from parent to children. They are serialized, too, to keep the
0148    /// order of sub fields.
0149    std::vector<ROOT::DescriptorId_t> fLinkIds;
0150    /// The number of columns in the column representations of the field. The column cardinality helps to navigate the
0151    /// list of logical column ids. For example, the second column of the third column representation is
0152    /// fLogicalColumnIds[2 * fColumnCardinality + 1]
0153    std::uint32_t fColumnCardinality = 0;
0154    /// The ordered list of columns attached to this field: first by representation index then by column index.
0155    std::vector<ROOT::DescriptorId_t> fLogicalColumnIds;
0156    /// For custom classes, we store the ROOT TClass reported checksum to facilitate the use of I/O rules that
0157    /// identify types by their checksum
0158    std::optional<std::uint32_t> fTypeChecksum;
0159 
0160 public:
0161    RFieldDescriptor() = default;
0162    RFieldDescriptor(const RFieldDescriptor &other) = delete;
0163    RFieldDescriptor &operator=(const RFieldDescriptor &other) = delete;
0164    RFieldDescriptor(RFieldDescriptor &&other) = default;
0165    RFieldDescriptor &operator=(RFieldDescriptor &&other) = default;
0166 
0167    bool operator==(const RFieldDescriptor &other) const;
0168    /// Get a copy of the descriptor
0169    RFieldDescriptor Clone() const;
0170 
0171    /// In general, we create a field simply from the C++ type name. For untyped fields, however, we potentially need
0172    /// access to sub fields, which is provided by the RNTupleDescriptor argument.
0173    std::unique_ptr<ROOT::RFieldBase>
0174    CreateField(const RNTupleDescriptor &ntplDesc, const ROOT::RCreateFieldOptions &options = {}) const;
0175 
0176    ROOT::DescriptorId_t GetId() const { return fFieldId; }
0177    std::uint32_t GetFieldVersion() const { return fFieldVersion; }
0178    std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0179    const std::string &GetFieldName() const { return fFieldName; }
0180    const std::string &GetFieldDescription() const { return fFieldDescription; }
0181    const std::string &GetTypeName() const { return fTypeName; }
0182    const std::string &GetTypeAlias() const { return fTypeAlias; }
0183    std::uint64_t GetNRepetitions() const { return fNRepetitions; }
0184    ROOT::ENTupleStructure GetStructure() const { return fStructure; }
0185    ROOT::DescriptorId_t GetParentId() const { return fParentId; }
0186    ROOT::DescriptorId_t GetProjectionSourceId() const { return fProjectionSourceId; }
0187    const std::vector<ROOT::DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
0188    const std::vector<ROOT::DescriptorId_t> &GetLogicalColumnIds() const { return fLogicalColumnIds; }
0189    std::uint32_t GetColumnCardinality() const { return fColumnCardinality; }
0190    std::optional<std::uint32_t> GetTypeChecksum() const { return fTypeChecksum; }
0191    bool IsProjectedField() const { return fProjectionSourceId != ROOT::kInvalidDescriptorId; }
0192    /// Tells if the field describes a user-defined class rather than a fundamental type, a collection, or one of the
0193    /// natively supported stdlib classes.
0194    /// The dictionary does not need to be available for this method.
0195    bool IsCustomClass() const;
0196    /// Tells if the field describes a user-defined enum type.
0197    /// The dictionary does not need to be available for this method.
0198    /// Needs the full descriptor to look up sub fields.
0199    bool IsCustomEnum(const RNTupleDescriptor &desc) const;
0200    bool IsStdAtomic() const;
0201 };
0202 
0203 // clang-format off
0204 /**
0205 \class ROOT::RColumnDescriptor
0206 \ingroup NTuple
0207 \brief Metadata stored for every column of an RNTuple
0208 */
0209 // clang-format on
0210 class RColumnDescriptor final {
0211    friend class Internal::RColumnDescriptorBuilder;
0212    friend class Internal::RNTupleDescriptorBuilder;
0213 
0214 public:
0215    struct RValueRange {
0216       double fMin = 0, fMax = 0;
0217 
0218       RValueRange() = default;
0219       RValueRange(double min, double max) : fMin(min), fMax(max) {}
0220       RValueRange(std::pair<double, double> range) : fMin(range.first), fMax(range.second) {}
0221 
0222       bool operator==(RValueRange other) const { return fMin == other.fMin && fMax == other.fMax; }
0223       bool operator!=(RValueRange other) const { return !(*this == other); }
0224    };
0225 
0226 private:
0227    /// The actual column identifier, which is the link to the corresponding field
0228    ROOT::DescriptorId_t fLogicalColumnId = ROOT::kInvalidDescriptorId;
0229    /// Usually identical to the logical column ID, except for alias columns where it references the shadowed column
0230    ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0231    /// Every column belongs to one and only one field
0232    ROOT::DescriptorId_t fFieldId = ROOT::kInvalidDescriptorId;
0233    /// The absolute value specifies the index for the first stored element for this column.
0234    /// For deferred columns the absolute value is larger than zero.
0235    /// Negative values specify a suppressed and deferred column.
0236    std::int64_t fFirstElementIndex = 0U;
0237    /// A field can be serialized into several columns, which are numbered from zero to $n$
0238    std::uint32_t fIndex = 0;
0239    /// A field may use multiple column representations, which are numbered from zero to $m$.
0240    /// Every representation has the same number of columns.
0241    std::uint16_t fRepresentationIndex = 0;
0242    /// The size in bits of elements of this column. Most columns have the size fixed by their type
0243    /// but low-precision float columns have variable bit widths.
0244    std::uint16_t fBitsOnStorage = 0;
0245    /// The on-disk column type
0246    ROOT::ENTupleColumnType fType = ROOT::ENTupleColumnType::kUnknown;
0247    /// Optional value range (used e.g. by quantized real fields)
0248    std::optional<RValueRange> fValueRange;
0249 
0250 public:
0251    RColumnDescriptor() = default;
0252    RColumnDescriptor(const RColumnDescriptor &other) = delete;
0253    RColumnDescriptor &operator=(const RColumnDescriptor &other) = delete;
0254    RColumnDescriptor(RColumnDescriptor &&other) = default;
0255    RColumnDescriptor &operator=(RColumnDescriptor &&other) = default;
0256 
0257    bool operator==(const RColumnDescriptor &other) const;
0258    /// Get a copy of the descriptor
0259    RColumnDescriptor Clone() const;
0260 
0261    ROOT::DescriptorId_t GetLogicalId() const { return fLogicalColumnId; }
0262    ROOT::DescriptorId_t GetPhysicalId() const { return fPhysicalColumnId; }
0263    ROOT::DescriptorId_t GetFieldId() const { return fFieldId; }
0264    std::uint32_t GetIndex() const { return fIndex; }
0265    std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
0266    std::uint64_t GetFirstElementIndex() const { return std::abs(fFirstElementIndex); }
0267    std::uint16_t GetBitsOnStorage() const { return fBitsOnStorage; }
0268    ROOT::ENTupleColumnType GetType() const { return fType; }
0269    std::optional<RValueRange> GetValueRange() const { return fValueRange; }
0270    bool IsAliasColumn() const { return fPhysicalColumnId != fLogicalColumnId; }
0271    bool IsDeferredColumn() const { return fFirstElementIndex != 0; }
0272    bool IsSuppressedDeferredColumn() const { return fFirstElementIndex < 0; }
0273 };
0274 
0275 // clang-format off
0276 /**
0277 \class ROOT::RClusterDescriptor
0278 \ingroup NTuple
0279 \brief Metadata for RNTuple clusters
0280 
0281 The cluster descriptor is built in two phases.  In a first phase, the descriptor has only an ID.
0282 In a second phase, the event range, column group, page locations and column ranges are added.
0283 Both phases are populated by the RClusterDescriptorBuilder.
0284 Clusters span across all available columns in the RNTuple.
0285 */
0286 // clang-format on
0287 class RClusterDescriptor final {
0288    friend class Internal::RClusterDescriptorBuilder;
0289 
0290 public:
0291    // clang-format off
0292    /**
0293    \class ROOT::RClusterDescriptor::RColumnRange
0294    \ingroup NTuple
0295    \brief The window of element indexes of a particular column in a particular cluster
0296    */
0297    // clang-format on
0298    class RColumnRange final {
0299       ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0300       /// The global index of the first column element in the cluster
0301       ROOT::NTupleSize_t fFirstElementIndex = ROOT::kInvalidNTupleIndex;
0302       /// The number of column elements in the cluster
0303       ROOT::NTupleSize_t fNElements = ROOT::kInvalidNTupleIndex;
0304       /// The usual format for ROOT compression settings (see Compression.h).
0305       /// The pages of a particular column in a particular cluster are all compressed with the same settings.
0306       /// If unset, the compression settings are undefined (deferred columns, suppressed columns).
0307       std::optional<std::uint32_t> fCompressionSettings;
0308       /// Suppressed columns have an empty page range and unknown compression settings.
0309       /// Their element index range, however, is aligned with the corresponding column of the
0310       /// primary column representation (see Section "Suppressed Columns" in the specification)
0311       bool fIsSuppressed = false;
0312 
0313       // TODO(jblomer): we perhaps want to store summary information, such as average, min/max, etc.
0314       // Should this be done on the field level?
0315 
0316    public:
0317       RColumnRange() = default;
0318 
0319       RColumnRange(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t firstElementIndex,
0320                    ROOT::NTupleSize_t nElements, std::optional<std::uint32_t> compressionSettings,
0321                    bool suppressed = false)
0322          : fPhysicalColumnId(physicalColumnId),
0323            fFirstElementIndex(firstElementIndex),
0324            fNElements(nElements),
0325            fCompressionSettings(compressionSettings),
0326            fIsSuppressed(suppressed)
0327       {
0328       }
0329 
0330       ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
0331       void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
0332 
0333       ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0334       void SetFirstElementIndex(ROOT::NTupleSize_t idx) { fFirstElementIndex = idx; }
0335       void IncrementFirstElementIndex(ROOT::NTupleSize_t by) { fFirstElementIndex += by; }
0336 
0337       ROOT::NTupleSize_t GetNElements() const { return fNElements; }
0338       void SetNElements(ROOT::NTupleSize_t n) { fNElements = n; }
0339       void IncrementNElements(ROOT::NTupleSize_t by) { fNElements += by; }
0340 
0341       std::optional<std::uint32_t> GetCompressionSettings() const { return fCompressionSettings; }
0342       void SetCompressionSettings(std::optional<std::uint32_t> comp) { fCompressionSettings = comp; }
0343 
0344       bool IsSuppressed() const { return fIsSuppressed; }
0345       void SetIsSuppressed(bool suppressed) { fIsSuppressed = suppressed; }
0346 
0347       bool operator==(const RColumnRange &other) const
0348       {
0349          return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
0350                 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings &&
0351                 fIsSuppressed == other.fIsSuppressed;
0352       }
0353 
0354       bool Contains(ROOT::NTupleSize_t index) const
0355       {
0356          return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
0357       }
0358    };
0359 
0360    // clang-format off
0361    /**
0362    \class ROOT::RClusterDescriptor::RPageInfo
0363    \ingroup NTuple
0364    \brief Information about a single page in the context of a cluster's page range.
0365    */
0366    // clang-format on
0367    // NOTE: We do not need to store the element size / uncompressed page size because we know to which column
0368    // the page belongs
0369    struct RPageInfo {
0370    private:
0371       /// The sum of the elements of all the pages must match the corresponding `fNElements` field in `fColumnRanges`
0372       std::uint32_t fNElements = std::uint32_t(-1);
0373       /// The meaning of `fLocator` depends on the storage backend.
0374       RNTupleLocator fLocator;
0375       /// If true, the 8 bytes following the serialized page are an xxhash of the on-disk page data
0376       bool fHasChecksum = false;
0377 
0378    public:
0379       RPageInfo() = default;
0380       RPageInfo(std::uint32_t nElements, const RNTupleLocator &locator, bool hasChecksum)
0381          : fNElements(nElements), fLocator(locator), fHasChecksum(hasChecksum)
0382       {
0383       }
0384 
0385       bool operator==(const RPageInfo &other) const
0386       {
0387          return fNElements == other.fNElements && fLocator == other.fLocator;
0388       }
0389 
0390       std::uint32_t GetNElements() const { return fNElements; }
0391       void SetNElements(std::uint32_t n) { fNElements = n; }
0392 
0393       const RNTupleLocator &GetLocator() const { return fLocator; }
0394       RNTupleLocator &GetLocator() { return fLocator; }
0395       void SetLocator(const RNTupleLocator &locator) { fLocator = locator; }
0396 
0397       bool HasChecksum() const { return fHasChecksum; }
0398       void SetHasChecksum(bool hasChecksum) { fHasChecksum = hasChecksum; }
0399    };
0400 
0401    // clang-format off
0402    /**
0403    \class ROOT::RClusterDescriptor::RPageInfoExtended
0404    \ingroup NTuple
0405    \brief Additional information about a page in an in-memory RPageRange.
0406 
0407    Used by RPageRange::Find() to return information relative to the RPageRange.  This information is not stored on disk
0408    and we don't need to keep it in memory because it can be easily recomputed.
0409    */
0410    // clang-format on
0411    struct RPageInfoExtended final : RPageInfo {
0412    private:
0413       /// Index (in cluster) of the first element in page.
0414       ROOT::NTupleSize_t fFirstElementIndex = 0;
0415       /// Page number in the corresponding RPageRange.
0416       ROOT::NTupleSize_t fPageNumber = 0;
0417 
0418    public:
0419       RPageInfoExtended() = default;
0420       RPageInfoExtended(const RPageInfo &pageInfo, ROOT::NTupleSize_t firstElementIndex, ROOT::NTupleSize_t pageNumber)
0421          : RPageInfo(pageInfo), fFirstElementIndex(firstElementIndex), fPageNumber(pageNumber)
0422       {
0423       }
0424 
0425       ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0426       void SetFirstElementIndex(ROOT::NTupleSize_t firstInPage) { fFirstElementIndex = firstInPage; }
0427 
0428       ROOT::NTupleSize_t GetPageNumber() const { return fPageNumber; }
0429       void SetPageNumber(ROOT::NTupleSize_t pageNumber) { fPageNumber = pageNumber; }
0430    };
0431 
0432    // clang-format off
0433    /**
0434    \class ROOT::RClusterDescriptor::RPageRange
0435    \ingroup NTuple
0436    \brief Records the partition of data into pages for a particular column in a particular cluster
0437    */
0438    // clang-format on
0439    class RPageRange final {
0440       friend class Internal::RClusterDescriptorBuilder;
0441 
0442    private:
0443       /// \brief Extend this RPageRange to fit the given RColumnRange.
0444       ///
0445       /// To do so, prepend as many synthetic RPageInfos as needed to cover the range in `columnRange`.
0446       /// RPageInfos are constructed to contain as many elements of type `element` given a page size
0447       /// limit of `pageSize` (in bytes); the locator for the referenced pages is `kTypePageZero`.
0448       /// This function is used to make up RPageRanges for clusters that contain deferred columns.
0449       /// \return The number of column elements covered by the synthesized RPageInfos
0450       std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange,
0451                                          const ROOT::Internal::RColumnElementBase &element, std::size_t pageSize);
0452 
0453       /// Has the same length than fPageInfos and stores the sum of the number of elements of all the pages
0454       /// up to and including a given index. Used for binary search in Find().
0455       std::vector<ROOT::NTupleSize_t> fCumulativeNElements;
0456 
0457       ROOT::DescriptorId_t fPhysicalColumnId = ROOT::kInvalidDescriptorId;
0458       std::vector<RPageInfo> fPageInfos;
0459 
0460    public:
0461       RPageRange() = default;
0462       RPageRange(const RPageRange &other) = delete;
0463       RPageRange &operator=(const RPageRange &other) = delete;
0464       RPageRange(RPageRange &&other) = default;
0465       RPageRange &operator=(RPageRange &&other) = default;
0466 
0467       RPageRange Clone() const
0468       {
0469          RPageRange clone;
0470          clone.fPhysicalColumnId = fPhysicalColumnId;
0471          clone.fPageInfos = fPageInfos;
0472          clone.fCumulativeNElements = fCumulativeNElements;
0473          return clone;
0474       }
0475 
0476       /// Find the page in the RPageRange that contains the given element. The element must exist.
0477       RPageInfoExtended Find(ROOT::NTupleSize_t idxInCluster) const;
0478 
0479       ROOT::DescriptorId_t GetPhysicalColumnId() const { return fPhysicalColumnId; }
0480       void SetPhysicalColumnId(ROOT::DescriptorId_t id) { fPhysicalColumnId = id; }
0481 
0482       const std::vector<RPageInfo> &GetPageInfos() const { return fPageInfos; }
0483       std::vector<RPageInfo> &GetPageInfos() { return fPageInfos; }
0484 
0485       bool operator==(const RPageRange &other) const
0486       {
0487          return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
0488       }
0489    };
0490 
0491 private:
0492    ROOT::DescriptorId_t fClusterId = ROOT::kInvalidDescriptorId;
0493    /// Clusters can be swapped by adjusting the entry offsets of the cluster and all ranges
0494    ROOT::NTupleSize_t fFirstEntryIndex = ROOT::kInvalidNTupleIndex;
0495    ROOT::NTupleSize_t fNEntries = ROOT::kInvalidNTupleIndex;
0496 
0497    std::unordered_map<ROOT::DescriptorId_t, RColumnRange> fColumnRanges;
0498    std::unordered_map<ROOT::DescriptorId_t, RPageRange> fPageRanges;
0499 
0500 public:
0501    class RColumnRangeIterable;
0502 
0503    RClusterDescriptor() = default;
0504    RClusterDescriptor(const RClusterDescriptor &other) = delete;
0505    RClusterDescriptor &operator=(const RClusterDescriptor &other) = delete;
0506    RClusterDescriptor(RClusterDescriptor &&other) = default;
0507    RClusterDescriptor &operator=(RClusterDescriptor &&other) = default;
0508 
0509    RClusterDescriptor Clone() const;
0510 
0511    bool operator==(const RClusterDescriptor &other) const;
0512 
0513    ROOT::DescriptorId_t GetId() const { return fClusterId; }
0514    ROOT::NTupleSize_t GetFirstEntryIndex() const { return fFirstEntryIndex; }
0515    ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
0516    const RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
0517    const RPageRange &GetPageRange(ROOT::DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
0518    /// Returns an iterator over pairs { columnId, columnRange }. The iteration order is unspecified.
0519    RColumnRangeIterable GetColumnRangeIterable() const;
0520    bool ContainsColumn(ROOT::DescriptorId_t physicalId) const
0521    {
0522       return fColumnRanges.find(physicalId) != fColumnRanges.end();
0523    }
0524    std::uint64_t GetNBytesOnStorage() const;
0525 };
0526 
0527 class RClusterDescriptor::RColumnRangeIterable final {
0528 private:
0529    const RClusterDescriptor &fDesc;
0530 
0531 public:
0532    class RIterator final {
0533    private:
0534       using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RColumnRange>::const_iterator;
0535       /// The wrapped map iterator
0536       Iter_t fIter;
0537 
0538    public:
0539       using iterator_category = std::forward_iterator_tag;
0540       using iterator = RIterator;
0541       using value_type = RColumnRange;
0542       using difference_type = std::ptrdiff_t;
0543       using pointer = const RColumnRange *;
0544       using reference = const RColumnRange &;
0545 
0546       RIterator(Iter_t iter) : fIter(iter) {}
0547       iterator &operator++() /* prefix */
0548       {
0549          ++fIter;
0550          return *this;
0551       }
0552       iterator operator++(int) /* postfix */
0553       {
0554          auto old = *this;
0555          operator++();
0556          return old;
0557       }
0558       reference operator*() const { return fIter->second; }
0559       pointer operator->() const { return &fIter->second; }
0560       bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
0561       bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
0562    };
0563 
0564    explicit RColumnRangeIterable(const RClusterDescriptor &desc) : fDesc(desc) {}
0565 
0566    RIterator begin() { return RIterator{fDesc.fColumnRanges.cbegin()}; }
0567    RIterator end() { return RIterator{fDesc.fColumnRanges.cend()}; }
0568    size_t size() { return fDesc.fColumnRanges.size(); }
0569 };
0570 
0571 // clang-format off
0572 /**
0573 \class ROOT::RClusterGroupDescriptor
0574 \ingroup NTuple
0575 \brief Clusters are bundled in cluster groups.
0576 
0577 Very large RNTuples can contain multiple cluster groups to organize cluster metadata.
0578 Every RNTuple has at least one cluster group.  The clusters in a cluster group are ordered
0579 corresponding to their first entry number.
0580 */
0581 // clang-format on
0582 class RClusterGroupDescriptor final {
0583    friend class Internal::RClusterGroupDescriptorBuilder;
0584 
0585 private:
0586    ROOT::DescriptorId_t fClusterGroupId = ROOT::kInvalidDescriptorId;
0587    /// The cluster IDs can be empty if the corresponding page list is not loaded.
0588    /// Otherwise, cluster ids are sorted by first entry number.
0589    std::vector<ROOT::DescriptorId_t> fClusterIds;
0590    /// The page list that corresponds to the cluster group
0591    RNTupleLocator fPageListLocator;
0592    /// Uncompressed size of the page list
0593    std::uint64_t fPageListLength = 0;
0594    /// The minimum first entry number of the clusters in the cluster group
0595    std::uint64_t fMinEntry = 0;
0596    /// Number of entries that are (partially for sharded clusters) covered by this cluster group.
0597    std::uint64_t fEntrySpan = 0;
0598    /// Number of clusters is always known even if the cluster IDs are not (yet) populated
0599    std::uint32_t fNClusters = 0;
0600 
0601 public:
0602    RClusterGroupDescriptor() = default;
0603    RClusterGroupDescriptor(const RClusterGroupDescriptor &other) = delete;
0604    RClusterGroupDescriptor &operator=(const RClusterGroupDescriptor &other) = delete;
0605    RClusterGroupDescriptor(RClusterGroupDescriptor &&other) = default;
0606    RClusterGroupDescriptor &operator=(RClusterGroupDescriptor &&other) = default;
0607 
0608    RClusterGroupDescriptor Clone() const;
0609    /// Creates a clone without the cluster IDs
0610    RClusterGroupDescriptor CloneSummary() const;
0611 
0612    bool operator==(const RClusterGroupDescriptor &other) const;
0613 
0614    ROOT::DescriptorId_t GetId() const { return fClusterGroupId; }
0615    std::uint32_t GetNClusters() const { return fNClusters; }
0616    RNTupleLocator GetPageListLocator() const { return fPageListLocator; }
0617    std::uint64_t GetPageListLength() const { return fPageListLength; }
0618    const std::vector<ROOT::DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
0619    std::uint64_t GetMinEntry() const { return fMinEntry; }
0620    std::uint64_t GetEntrySpan() const { return fEntrySpan; }
0621    /// A cluster group is loaded in two stages. Stage one loads only the summary information.
0622    /// Stage two loads the list of cluster IDs.
0623    bool HasClusterDetails() const { return !fClusterIds.empty(); }
0624 };
0625 
0626 /// Used in RExtraTypeInfoDescriptor
0627 enum class EExtraTypeInfoIds {
0628    kInvalid,
0629    kStreamerInfo
0630 };
0631 
0632 // clang-format off
0633 /**
0634 \class ROOT::RExtraTypeInfoDescriptor
0635 \ingroup NTuple
0636 \brief Field specific extra type information from the header / extenstion header
0637 
0638 Currently only used by streamer fields to store RNTuple-wide list of streamer info records.
0639 */
0640 // clang-format on
0641 class RExtraTypeInfoDescriptor final {
0642    friend class Internal::RExtraTypeInfoDescriptorBuilder;
0643 
0644 private:
0645    /// Specifies the meaning of the extra information
0646    EExtraTypeInfoIds fContentId = EExtraTypeInfoIds::kInvalid;
0647    /// Type version the extra type information is bound to
0648    std::uint32_t fTypeVersion = 0;
0649    /// The type name the extra information refers to; empty for RNTuple-wide extra information
0650    std::string fTypeName;
0651    /// The content format depends on the content ID and may be binary
0652    std::string fContent;
0653 
0654 public:
0655    RExtraTypeInfoDescriptor() = default;
0656    RExtraTypeInfoDescriptor(const RExtraTypeInfoDescriptor &other) = delete;
0657    RExtraTypeInfoDescriptor &operator=(const RExtraTypeInfoDescriptor &other) = delete;
0658    RExtraTypeInfoDescriptor(RExtraTypeInfoDescriptor &&other) = default;
0659    RExtraTypeInfoDescriptor &operator=(RExtraTypeInfoDescriptor &&other) = default;
0660 
0661    bool operator==(const RExtraTypeInfoDescriptor &other) const;
0662 
0663    RExtraTypeInfoDescriptor Clone() const;
0664 
0665    EExtraTypeInfoIds GetContentId() const { return fContentId; }
0666    std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0667    const std::string &GetTypeName() const { return fTypeName; }
0668    const std::string &GetContent() const { return fContent; }
0669 };
0670 
0671 // clang-format off
0672 /**
0673 \class ROOT::RNTupleDescriptor
0674 \ingroup NTuple
0675 \brief The on-storage metadata of an RNTuple
0676 
0677 Represents the on-disk (on storage) information about an RNTuple. The metadata consists of a header, a footer, and
0678 potentially multiple page lists.
0679 The header carries the RNTuple schema, i.e. the fields and the associated columns and their relationships.
0680 The footer carries information about one or several cluster groups and links to their page lists.
0681 For every cluster group, a page list envelope stores cluster summaries and page locations.
0682 For every cluster, it stores for every column the range of element indexes as well as a list of pages and page
0683 locations.
0684 
0685 The descriptor provides machine-independent (de-)serialization of headers and footers, and it provides lookup routines
0686 for RNTuple objects (pages, clusters, ...).  It is supposed to be usable by all RPageStorage implementations.
0687 
0688 The serialization does not use standard ROOT streamers in order to not let it depend on libCore. The serialization uses
0689 the concept of envelopes and frames: header, footer, and page list envelopes have a preamble with a type ID and length.
0690 Substructures are serialized in frames and have a size and number of items (for list frames). This allows for forward
0691 and backward compatibility when the metadata evolves.
0692 */
0693 // clang-format on
0694 class RNTupleDescriptor final {
0695    friend class Internal::RNTupleDescriptorBuilder;
0696    friend RNTupleDescriptor Internal::CloneDescriptorSchema(const RNTupleDescriptor &desc);
0697 
0698 public:
0699    class RHeaderExtension;
0700 
0701 private:
0702    /// The RNTuple name needs to be unique in a given storage location (file)
0703    std::string fName;
0704    /// Free text from the user
0705    std::string fDescription;
0706 
0707    ROOT::DescriptorId_t fFieldZeroId = ROOT::kInvalidDescriptorId; ///< Set by the descriptor builder
0708 
0709    std::uint64_t fNPhysicalColumns = 0; ///< Updated by the descriptor builder when columns are added
0710 
0711    std::set<unsigned int> fFeatureFlags;
0712    std::unordered_map<ROOT::DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
0713    std::unordered_map<ROOT::DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
0714 
0715    std::vector<RExtraTypeInfoDescriptor> fExtraTypeInfoDescriptors;
0716    std::unique_ptr<RHeaderExtension> fHeaderExtension;
0717 
0718    //// All fields above are part of the schema and are cloned when creating a new descriptor from a given one
0719    //// (see CloneSchema())
0720 
0721    std::uint16_t fVersionEpoch = 0; ///< Set by the descriptor builder when deserialized
0722    std::uint16_t fVersionMajor = 0; ///< Set by the descriptor builder when deserialized
0723    std::uint16_t fVersionMinor = 0; ///< Set by the descriptor builder when deserialized
0724    std::uint16_t fVersionPatch = 0; ///< Set by the descriptor builder when deserialized
0725 
0726    std::uint64_t fOnDiskHeaderSize = 0;    ///< Set by the descriptor builder when deserialized
0727    std::uint64_t fOnDiskHeaderXxHash3 = 0; ///< Set by the descriptor builder when deserialized
0728    std::uint64_t fOnDiskFooterSize = 0; ///< Like fOnDiskHeaderSize, contains both cluster summaries and page locations
0729 
0730    std::uint64_t fNEntries = 0;  ///< Updated by the descriptor builder when the cluster groups are added
0731    std::uint64_t fNClusters = 0; ///< Updated by the descriptor builder when the cluster groups are added
0732 
0733    /// \brief The generation of the descriptor
0734    ///
0735    /// Once constructed by an RNTupleDescriptorBuilder, the descriptor is mostly immutable except for the set of
0736    /// active page locations.  During the lifetime of the descriptor, page location information for clusters
0737    /// can be added or removed.  When this happens, the generation should be increased, so that users of the
0738    /// descriptor know that the information changed.  The generation is increased, e.g., by the page source's
0739    /// exclusive lock guard around the descriptor.  It is used, e.g., by the descriptor cache in RNTupleReader.
0740    std::uint64_t fGeneration = 0;
0741 
0742    std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
0743    /// References cluster groups sorted by entry range and thus allows for binary search.
0744    /// Note that this list is empty during the descriptor building process and will only be
0745    /// created when the final descriptor is extracted from the builder.
0746    std::vector<ROOT::DescriptorId_t> fSortedClusterGroupIds;
0747    /// Potentially a subset of all the available clusters
0748    std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
0749    /// List of AttributeSets linked to this RNTuple
0750    std::vector<Experimental::RNTupleAttrSetDescriptor> fAttributeSets;
0751 
0752    // We don't expose this publicly because when we add sharded clusters, this interface does not make sense anymore
0753    ROOT::DescriptorId_t FindClusterId(ROOT::NTupleSize_t entryIdx) const;
0754 
0755    /// Creates a descriptor containing only the schema information about this RNTuple, i.e. all the information needed
0756    /// to create a new RNTuple with the same schema as this one but not necessarily the same clustering. This is used
0757    /// when merging two RNTuples.
0758    RNTupleDescriptor CloneSchema() const;
0759 
0760 public:
0761    static constexpr unsigned int kFeatureFlagTest = 137; // Bit reserved for forward-compatibility testing
0762 
0763    class RColumnDescriptorIterable;
0764    class RFieldDescriptorIterable;
0765    class RClusterGroupDescriptorIterable;
0766    class RClusterDescriptorIterable;
0767    class RExtraTypeInfoDescriptorIterable;
0768    friend class Experimental::RNTupleAttrSetDescriptorIterable;
0769 
0770    /// Modifiers passed to CreateModel()
0771    struct RCreateModelOptions {
0772    private:
0773       /// If set to true, projected fields will be reconstructed as such. This will prevent the model to be used
0774       /// with an RNTupleReader, but it is useful, e.g., to accurately merge data.
0775       bool fReconstructProjections = false;
0776       /// By default, creating a model will fail if any of the reconstructed fields contains an unknown column type
0777       /// or an unknown field structural role.
0778       /// If this option is enabled, the model will be created and all fields containing unknown data (directly
0779       /// or indirectly) will be skipped instead.
0780       bool fForwardCompatible = false;
0781       /// If true, the model will be created without a default entry (bare model).
0782       bool fCreateBare = false;
0783       /// If true, fields with a user defined type that have no available dictionaries will be reconstructed
0784       /// as record fields from the on-disk information; otherwise, they will cause an error.
0785       bool fEmulateUnknownTypes = false;
0786 
0787    public:
0788       RCreateModelOptions() {} // Work around compiler bug, see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88165
0789 
0790       void SetReconstructProjections(bool v) { fReconstructProjections = v; }
0791       bool GetReconstructProjections() const { return fReconstructProjections; }
0792 
0793       void SetForwardCompatible(bool v) { fForwardCompatible = v; }
0794       bool GetForwardCompatible() const { return fForwardCompatible; }
0795 
0796       void SetCreateBare(bool v) { fCreateBare = v; }
0797       bool GetCreateBare() const { return fCreateBare; }
0798 
0799       void SetEmulateUnknownTypes(bool v) { fEmulateUnknownTypes = v; }
0800       bool GetEmulateUnknownTypes() const { return fEmulateUnknownTypes; }
0801    };
0802 
0803    RNTupleDescriptor() = default;
0804    RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
0805    RNTupleDescriptor &operator=(const RNTupleDescriptor &other) = delete;
0806    RNTupleDescriptor(RNTupleDescriptor &&other) = default;
0807    RNTupleDescriptor &operator=(RNTupleDescriptor &&other) = default;
0808 
0809    RNTupleDescriptor Clone() const;
0810 
0811    bool operator==(const RNTupleDescriptor &other) const;
0812 
0813    std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
0814    std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
0815    std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
0816 
0817    const RFieldDescriptor &GetFieldDescriptor(ROOT::DescriptorId_t fieldId) const
0818    {
0819       return fFieldDescriptors.at(fieldId);
0820    }
0821    const RColumnDescriptor &GetColumnDescriptor(ROOT::DescriptorId_t columnId) const
0822    {
0823       return fColumnDescriptors.at(columnId);
0824    }
0825    const RClusterGroupDescriptor &GetClusterGroupDescriptor(ROOT::DescriptorId_t clusterGroupId) const
0826    {
0827       return fClusterGroupDescriptors.at(clusterGroupId);
0828    }
0829    const RClusterDescriptor &GetClusterDescriptor(ROOT::DescriptorId_t clusterId) const
0830    {
0831       return fClusterDescriptors.at(clusterId);
0832    }
0833 
0834    RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor &fieldDesc) const;
0835    RFieldDescriptorIterable
0836    GetFieldIterable(const RFieldDescriptor &fieldDesc,
0837                     const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0838    RFieldDescriptorIterable GetFieldIterable(ROOT::DescriptorId_t fieldId) const;
0839    RFieldDescriptorIterable
0840    GetFieldIterable(ROOT::DescriptorId_t fieldId,
0841                     const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0842 
0843    RFieldDescriptorIterable GetTopLevelFields() const;
0844    RFieldDescriptorIterable
0845    GetTopLevelFields(const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator) const;
0846 
0847    RColumnDescriptorIterable GetColumnIterable() const;
0848    RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const;
0849    RColumnDescriptorIterable GetColumnIterable(ROOT::DescriptorId_t fieldId) const;
0850 
0851    RClusterGroupDescriptorIterable GetClusterGroupIterable() const;
0852 
0853    RClusterDescriptorIterable GetClusterIterable() const;
0854 
0855    RExtraTypeInfoDescriptorIterable GetExtraTypeInfoIterable() const;
0856 
0857    ROOT::Experimental::RNTupleAttrSetDescriptorIterable GetAttrSetIterable() const;
0858 
0859    const std::string &GetName() const { return fName; }
0860    const std::string &GetDescription() const { return fDescription; }
0861 
0862    std::size_t GetNFields() const { return fFieldDescriptors.size(); }
0863    std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
0864    std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
0865    std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
0866    std::size_t GetNClusters() const { return fNClusters; }
0867    std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
0868    std::size_t GetNExtraTypeInfos() const { return fExtraTypeInfoDescriptors.size(); }
0869    std::size_t GetNAttributeSets() const { return fAttributeSets.size(); }
0870 
0871    /// We know the number of entries from adding the cluster summaries
0872    ROOT::NTupleSize_t GetNEntries() const { return fNEntries; }
0873    ROOT::NTupleSize_t GetNElements(ROOT::DescriptorId_t physicalColumnId) const;
0874 
0875    /// Returns the logical parent of all top-level RNTuple data fields.
0876    ROOT::DescriptorId_t GetFieldZeroId() const { return fFieldZeroId; }
0877    const RFieldDescriptor &GetFieldZero() const { return GetFieldDescriptor(GetFieldZeroId()); }
0878    ROOT::DescriptorId_t FindFieldId(std::string_view fieldName, ROOT::DescriptorId_t parentId) const;
0879    /// Searches for a top-level field
0880    ROOT::DescriptorId_t FindFieldId(std::string_view fieldName) const;
0881    ROOT::DescriptorId_t FindLogicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
0882                                             std::uint16_t representationIndex) const;
0883    ROOT::DescriptorId_t FindPhysicalColumnId(ROOT::DescriptorId_t fieldId, std::uint32_t columnIndex,
0884                                              std::uint16_t representationIndex) const;
0885    ROOT::DescriptorId_t FindClusterId(ROOT::DescriptorId_t physicalColumnId, ROOT::NTupleSize_t index) const;
0886    ROOT::DescriptorId_t FindNextClusterId(ROOT::DescriptorId_t clusterId) const;
0887    ROOT::DescriptorId_t FindPrevClusterId(ROOT::DescriptorId_t clusterId) const;
0888 
0889    /// Walks up the parents of the field ID and returns a field name of the form a.b.c.d
0890    /// In case of invalid field ID, an empty string is returned.
0891    std::string GetQualifiedFieldName(ROOT::DescriptorId_t fieldId) const;
0892 
0893    /// Adjust the type name of the passed RFieldDescriptor for comparison with another renormalized type name.
0894    std::string GetTypeNameForComparison(const RFieldDescriptor &fieldDesc) const;
0895 
0896    bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
0897    std::vector<std::uint64_t> GetFeatureFlags() const;
0898 
0899    /// Return header extension information; if the descriptor does not have a header extension, return `nullptr`
0900    const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
0901 
0902    /// Methods to load and drop cluster group details (cluster IDs and page locations)
0903    RResult<void>
0904    AddClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
0905    RResult<void> DropClusterGroupDetails(ROOT::DescriptorId_t clusterGroupId);
0906 
0907    std::uint64_t GetGeneration() const { return fGeneration; }
0908    void IncGeneration() { fGeneration++; }
0909 
0910    /// Re-create the C++ model from the stored metadata
0911    std::unique_ptr<ROOT::RNTupleModel> CreateModel(const RCreateModelOptions &options = RCreateModelOptions()) const;
0912    void PrintInfo(std::ostream &output) const;
0913 };
0914 
0915 // clang-format off
0916 /**
0917 \class ROOT::RNTupleDescriptor::RColumnDescriptorIterable
0918 \ingroup NTuple
0919 \brief Used to loop over a field's associated columns
0920 */
0921 // clang-format on
0922 class RNTupleDescriptor::RColumnDescriptorIterable final {
0923 private:
0924    /// The associated RNTuple for this range.
0925    const RNTupleDescriptor &fNTuple;
0926    /// The descriptor ids of the columns ordered by field, representation, and column index
0927    std::vector<ROOT::DescriptorId_t> fColumns = {};
0928 
0929 public:
0930    class RIterator final {
0931    private:
0932       /// The enclosing range's RNTuple.
0933       const RNTupleDescriptor &fNTuple;
0934       /// The enclosing range's descriptor id list.
0935       const std::vector<ROOT::DescriptorId_t> &fColumns;
0936       std::size_t fIndex = 0;
0937 
0938    public:
0939       using iterator_category = std::forward_iterator_tag;
0940       using iterator = RIterator;
0941       using value_type = RFieldDescriptor;
0942       using difference_type = std::ptrdiff_t;
0943       using pointer = const RColumnDescriptor *;
0944       using reference = const RColumnDescriptor &;
0945 
0946       RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &columns, std::size_t index)
0947          : fNTuple(ntuple), fColumns(columns), fIndex(index)
0948       {
0949       }
0950       iterator &operator++() /* prefix */
0951       {
0952          ++fIndex;
0953          return *this;
0954       }
0955       iterator operator++(int) /* postfix */
0956       {
0957          auto old = *this;
0958          operator++();
0959          return old;
0960       }
0961       reference operator*() const { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0962       pointer operator->() const { return &fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0963       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0964       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0965    };
0966 
0967    RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &fieldDesc);
0968    RColumnDescriptorIterable(const RNTupleDescriptor &ntuple);
0969 
0970    RIterator begin() { return RIterator(fNTuple, fColumns, 0); }
0971    RIterator end() { return RIterator(fNTuple, fColumns, fColumns.size()); }
0972    size_t size() { return fColumns.size(); }
0973 };
0974 
0975 // clang-format off
0976 /**
0977 \class ROOT::RNTupleDescriptor::RFieldDescriptorIterable
0978 \ingroup NTuple
0979 \brief Used to loop over a field's child fields
0980 */
0981 // clang-format on
0982 class RNTupleDescriptor::RFieldDescriptorIterable final {
0983 private:
0984    /// The associated RNTuple for this range.
0985    const RNTupleDescriptor &fNTuple;
0986    /// The descriptor IDs of the child fields. These may be sorted using
0987    /// a comparison function.
0988    std::vector<ROOT::DescriptorId_t> fFieldChildren = {};
0989 
0990 public:
0991    class RIterator final {
0992    private:
0993       /// The enclosing range's RNTuple.
0994       const RNTupleDescriptor &fNTuple;
0995       /// The enclosing range's descriptor id list.
0996       const std::vector<ROOT::DescriptorId_t> &fFieldChildren;
0997       std::size_t fIndex = 0;
0998 
0999    public:
1000       using iterator_category = std::forward_iterator_tag;
1001       using iterator = RIterator;
1002       using value_type = RFieldDescriptor;
1003       using difference_type = std::ptrdiff_t;
1004       using pointer = const RFieldDescriptor *;
1005       using reference = const RFieldDescriptor &;
1006 
1007       RIterator(const RNTupleDescriptor &ntuple, const std::vector<ROOT::DescriptorId_t> &fieldChildren,
1008                 std::size_t index)
1009          : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index)
1010       {
1011       }
1012       iterator &operator++() /* prefix */
1013       {
1014          ++fIndex;
1015          return *this;
1016       }
1017       iterator operator++(int) /* postfix */
1018       {
1019          auto old = *this;
1020          operator++();
1021          return old;
1022       }
1023       reference operator*() const { return fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1024       pointer operator->() const { return &fNTuple.GetFieldDescriptor(fFieldChildren.at(fIndex)); }
1025       bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
1026       bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
1027    };
1028    RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field)
1029       : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1030    {
1031    }
1032    /// Sort the range using an arbitrary comparison function.
1033    RFieldDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field,
1034                             const std::function<bool(ROOT::DescriptorId_t, ROOT::DescriptorId_t)> &comparator)
1035       : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
1036    {
1037       std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
1038    }
1039    RIterator begin() { return RIterator(fNTuple, fFieldChildren, 0); }
1040    RIterator end() { return RIterator(fNTuple, fFieldChildren, fFieldChildren.size()); }
1041 };
1042 
1043 // clang-format off
1044 /**
1045 \class ROOT::RNTupleDescriptor::RClusterGroupDescriptorIterable
1046 \ingroup NTuple
1047 \brief Used to loop over all the cluster groups of an RNTuple (in unspecified order)
1048 
1049 Enumerate all cluster group IDs from the descriptor.  No specific order can be assumed.
1050 */
1051 // clang-format on
1052 class RNTupleDescriptor::RClusterGroupDescriptorIterable final {
1053 private:
1054    /// The associated RNTuple for this range.
1055    const RNTupleDescriptor &fNTuple;
1056 
1057 public:
1058    class RIterator final {
1059    private:
1060       using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterGroupDescriptor>::const_iterator;
1061       /// The wrapped map iterator
1062       Iter_t fIter;
1063 
1064    public:
1065       using iterator_category = std::forward_iterator_tag;
1066       using iterator = RIterator;
1067       using value_type = RClusterGroupDescriptor;
1068       using difference_type = std::ptrdiff_t;
1069       using pointer = const RClusterGroupDescriptor *;
1070       using reference = const RClusterGroupDescriptor &;
1071 
1072       RIterator(Iter_t iter) : fIter(iter) {}
1073       iterator &operator++() /* prefix */
1074       {
1075          ++fIter;
1076          return *this;
1077       }
1078       iterator operator++(int) /* postfix */
1079       {
1080          auto old = *this;
1081          operator++();
1082          return old;
1083       }
1084       reference operator*() const { return fIter->second; }
1085       pointer operator->() const { return &fIter->second; }
1086       bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1087       bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1088    };
1089 
1090    RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1091    RIterator begin() { return RIterator(fNTuple.fClusterGroupDescriptors.cbegin()); }
1092    RIterator end() { return RIterator(fNTuple.fClusterGroupDescriptors.cend()); }
1093 };
1094 
1095 // clang-format off
1096 /**
1097 \class ROOT::RNTupleDescriptor::RClusterDescriptorIterable
1098 \ingroup NTuple
1099 \brief Used to loop over all the clusters of an RNTuple (in unspecified order)
1100 
1101 Enumerate all cluster IDs from all cluster descriptors.  No specific order can be assumed, use
1102 RNTupleDescriptor::FindNextClusterId() and RNTupleDescriptor::FindPrevClusterId() to traverse
1103 clusters by entry number.
1104 */
1105 // clang-format on
1106 class RNTupleDescriptor::RClusterDescriptorIterable final {
1107 private:
1108    /// The associated RNTuple for this range.
1109    const RNTupleDescriptor &fNTuple;
1110 
1111 public:
1112    class RIterator final {
1113    private:
1114       using Iter_t = std::unordered_map<ROOT::DescriptorId_t, RClusterDescriptor>::const_iterator;
1115       /// The wrapped map iterator
1116       Iter_t fIter;
1117 
1118    public:
1119       using iterator_category = std::forward_iterator_tag;
1120       using iterator = RIterator;
1121       using value_type = RClusterDescriptor;
1122       using difference_type = std::ptrdiff_t;
1123       using pointer = const RClusterDescriptor *;
1124       using reference = const RClusterDescriptor &;
1125 
1126       RIterator(Iter_t iter) : fIter(iter) {}
1127       iterator &operator++() /* prefix */
1128       {
1129          ++fIter;
1130          return *this;
1131       }
1132       iterator operator++(int) /* postfix */
1133       {
1134          auto old = *this;
1135          operator++();
1136          return old;
1137       }
1138       reference operator*() const { return fIter->second; }
1139       pointer operator->() const { return &fIter->second; }
1140       bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1141       bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1142    };
1143 
1144    RClusterDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1145    RIterator begin() { return RIterator(fNTuple.fClusterDescriptors.cbegin()); }
1146    RIterator end() { return RIterator(fNTuple.fClusterDescriptors.cend()); }
1147 };
1148 
1149 // clang-format off
1150 /**
1151 \class ROOT::RNTupleDescriptor::RExtraTypeInfoDescriptorIterable
1152 \ingroup NTuple
1153 \brief Used to loop over all the extra type info record of an RNTuple (in unspecified order)
1154 */
1155 // clang-format on
1156 class RNTupleDescriptor::RExtraTypeInfoDescriptorIterable final {
1157 private:
1158    /// The associated RNTuple for this range.
1159    const RNTupleDescriptor &fNTuple;
1160 
1161 public:
1162    class RIterator final {
1163    private:
1164       using Iter_t = std::vector<RExtraTypeInfoDescriptor>::const_iterator;
1165       /// The wrapped vector iterator
1166       Iter_t fIter;
1167 
1168    public:
1169       using iterator_category = std::forward_iterator_tag;
1170       using iterator = RIterator;
1171       using value_type = RExtraTypeInfoDescriptor;
1172       using difference_type = std::ptrdiff_t;
1173       using pointer = const RExtraTypeInfoDescriptor *;
1174       using reference = const RExtraTypeInfoDescriptor &;
1175 
1176       RIterator(Iter_t iter) : fIter(iter) {}
1177       iterator &operator++() /* prefix */
1178       {
1179          ++fIter;
1180          return *this;
1181       }
1182       iterator operator++(int) /* postfix */
1183       {
1184          auto old = *this;
1185          operator++();
1186          return old;
1187       }
1188       reference operator*() const { return *fIter; }
1189       pointer operator->() const { return &*fIter; }
1190       bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1191       bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1192    };
1193 
1194    RExtraTypeInfoDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1195    RIterator begin() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cbegin()); }
1196    RIterator end() { return RIterator(fNTuple.fExtraTypeInfoDescriptors.cend()); }
1197 };
1198 
1199 namespace Experimental {
1200 // clang-format off
1201 /**
1202 \class ROOT::Experimental::RNTupleAttrSetDescriptorIterable
1203 \ingroup NTuple
1204 \brief Used to loop over all the Attribute Sets linked to an RNTuple
1205 */
1206 // clang-format on
1207 // TODO: move this to RNTupleDescriptor::RNTupleAttrSetDescriptorIterable when it moves out of Experimental.
1208 class RNTupleAttrSetDescriptorIterable final {
1209 private:
1210    /// The associated RNTuple for this range.
1211    const RNTupleDescriptor &fNTuple;
1212 
1213 public:
1214    class RIterator final {
1215    private:
1216       using Iter_t = std::vector<RNTupleAttrSetDescriptor>::const_iterator;
1217       /// The wrapped vector iterator
1218       Iter_t fIter;
1219 
1220    public:
1221       using iterator_category = std::forward_iterator_tag;
1222       using iterator = RIterator;
1223       using value_type = RNTupleAttrSetDescriptor;
1224       using difference_type = std::ptrdiff_t;
1225       using pointer = const value_type *;
1226       using reference = const value_type &;
1227 
1228       RIterator(Iter_t iter) : fIter(iter) {}
1229       iterator &operator++() /* prefix */
1230       {
1231          ++fIter;
1232          return *this;
1233       }
1234       iterator operator++(int) /* postfix */
1235       {
1236          auto old = *this;
1237          operator++();
1238          return old;
1239       }
1240       reference operator*() const { return *fIter; }
1241       pointer operator->() const { return &*fIter; }
1242       bool operator!=(const iterator &rh) const { return fIter != rh.fIter; }
1243       bool operator==(const iterator &rh) const { return fIter == rh.fIter; }
1244    };
1245 
1246    RNTupleAttrSetDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
1247    RIterator begin() { return RIterator(fNTuple.fAttributeSets.cbegin()); }
1248    RIterator end() { return RIterator(fNTuple.fAttributeSets.cend()); }
1249 };
1250 } // namespace Experimental
1251 
1252 // clang-format off
1253 /**
1254 \class ROOT::RNTupleDescriptor::RHeaderExtension
1255 \ingroup NTuple
1256 \brief Summarizes information about fields and the corresponding columns that were added after the header has been serialized
1257 */
1258 // clang-format on
1259 class RNTupleDescriptor::RHeaderExtension final {
1260    friend class Internal::RNTupleDescriptorBuilder;
1261 
1262 private:
1263    /// All field IDs of late model extensions, in the order of field addition. This is necessary to serialize the
1264    /// the fields in that order.
1265    std::vector<ROOT::DescriptorId_t> fFieldIdsOrder;
1266    /// All field IDs of late model extensions for efficient lookup. When a column gets added to the extension
1267    /// header, this enables us to determine if the column belongs to a field of the header extension of if it
1268    /// belongs to a field of the regular header that gets extended by additional column representations.
1269    std::unordered_set<ROOT::DescriptorId_t> fFieldIdsLookup;
1270    /// All logical column IDs of columns that extend, with additional column representations, fields of the regular
1271    /// header. During serialization, these columns are not picked up as columns of `fFieldIdsOrder`. But instead
1272    /// these columns need to be serialized in the extension header without re-serializing the field.
1273    std::vector<ROOT::DescriptorId_t> fExtendedColumnRepresentations;
1274    /// Number of logical and physical columns; updated by the descriptor builder when columns are added
1275    std::uint32_t fNLogicalColumns = 0;
1276    std::uint32_t fNPhysicalColumns = 0;
1277 
1278    /// Marks `fieldDesc` as an extended field, i.e. a field that appears in the Header Extension (e.g. having been added
1279    /// through late model extension). Note that the field descriptor should also have been added to the RNTuple
1280    /// Descriptor alongside non-extended fields.
1281    void MarkExtendedField(const RFieldDescriptor &fieldDesc)
1282    {
1283       fFieldIdsOrder.emplace_back(fieldDesc.GetId());
1284       fFieldIdsLookup.insert(fieldDesc.GetId());
1285    }
1286 
1287    /// Marks `columnDesc` as an extended column, i.e. a column that appears in the Header Extension (e.g. having been
1288    /// added through late model extension as an additional representation of an existing column). Note that the column
1289    /// descriptor should also have been added to the RNTuple Descriptor alongside non-extended columns.
1290    void MarkExtendedColumn(const RColumnDescriptor &columnDesc)
1291    {
1292       fNLogicalColumns++;
1293       if (!columnDesc.IsAliasColumn())
1294          fNPhysicalColumns++;
1295       if (fFieldIdsLookup.count(columnDesc.GetFieldId()) == 0) {
1296          fExtendedColumnRepresentations.emplace_back(columnDesc.GetLogicalId());
1297       }
1298    }
1299 
1300 public:
1301    std::size_t GetNFields() const { return fFieldIdsOrder.size(); }
1302    std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
1303    std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
1304    const std::vector<ROOT::DescriptorId_t> &GetExtendedColumnRepresentations() const
1305    {
1306       return fExtendedColumnRepresentations;
1307    }
1308    /// Return a vector containing the IDs of the top-level fields defined in the extension header, in the order
1309    /// of their addition.
1310    /// We cannot create this vector when building the fFields because at the time when AddExtendedField is called,
1311    /// the field is not yet linked into the schema tree.
1312    std::vector<ROOT::DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
1313 
1314    bool ContainsField(ROOT::DescriptorId_t fieldId) const
1315    {
1316       return fFieldIdsLookup.find(fieldId) != fFieldIdsLookup.end();
1317    }
1318    bool ContainsExtendedColumnRepresentation(ROOT::DescriptorId_t columnId) const
1319    {
1320       return std::find(fExtendedColumnRepresentations.begin(), fExtendedColumnRepresentations.end(), columnId) !=
1321              fExtendedColumnRepresentations.end();
1322    }
1323 };
1324 
1325 namespace Experimental::Internal {
1326 class RNTupleAttrSetDescriptorBuilder final {
1327    ROOT::Experimental::RNTupleAttrSetDescriptor fDesc;
1328 
1329 public:
1330    RNTupleAttrSetDescriptorBuilder &Name(std::string_view name)
1331    {
1332       fDesc.fName = name;
1333       return *this;
1334    }
1335    RNTupleAttrSetDescriptorBuilder &SchemaVersion(std::uint16_t major, std::uint16_t minor)
1336    {
1337       fDesc.fSchemaVersionMajor = major;
1338       fDesc.fSchemaVersionMinor = minor;
1339       return *this;
1340    }
1341    RNTupleAttrSetDescriptorBuilder &AnchorLocator(const RNTupleLocator &loc)
1342    {
1343       fDesc.fAnchorLocator = loc;
1344       return *this;
1345    }
1346    RNTupleAttrSetDescriptorBuilder &AnchorLength(std::uint32_t length)
1347    {
1348       fDesc.fAnchorLength = length;
1349       return *this;
1350    }
1351 
1352    /// Attempt to make an AttributeSet descriptor. This may fail if the builder
1353    /// was not given enough information to make a proper descriptor.
1354    RResult<ROOT::Experimental::RNTupleAttrSetDescriptor> MoveDescriptor();
1355 };
1356 } // namespace Experimental::Internal
1357 
1358 namespace Internal {
1359 
1360 // clang-format off
1361 /**
1362 \class ROOT::Internal::RColumnDescriptorBuilder
1363 \ingroup NTuple
1364 \brief A helper class for piece-wise construction of an RColumnDescriptor
1365 
1366 Dangling column descriptors can become actual descriptors when added to an
1367 RNTupleDescriptorBuilder instance and then linked to their fields.
1368 */
1369 // clang-format on
1370 class RColumnDescriptorBuilder final {
1371 private:
1372    RColumnDescriptor fColumn = RColumnDescriptor();
1373 
1374 public:
1375    /// Make an empty column descriptor builder.
1376    RColumnDescriptorBuilder() = default;
1377 
1378    RColumnDescriptorBuilder &LogicalColumnId(ROOT::DescriptorId_t logicalColumnId)
1379    {
1380       fColumn.fLogicalColumnId = logicalColumnId;
1381       return *this;
1382    }
1383    RColumnDescriptorBuilder &PhysicalColumnId(ROOT::DescriptorId_t physicalColumnId)
1384    {
1385       fColumn.fPhysicalColumnId = physicalColumnId;
1386       return *this;
1387    }
1388    RColumnDescriptorBuilder &BitsOnStorage(std::uint16_t bitsOnStorage)
1389    {
1390       fColumn.fBitsOnStorage = bitsOnStorage;
1391       return *this;
1392    }
1393    RColumnDescriptorBuilder &Type(ROOT::ENTupleColumnType type)
1394    {
1395       fColumn.fType = type;
1396       return *this;
1397    }
1398    RColumnDescriptorBuilder &FieldId(ROOT::DescriptorId_t fieldId)
1399    {
1400       fColumn.fFieldId = fieldId;
1401       return *this;
1402    }
1403    RColumnDescriptorBuilder &Index(std::uint32_t index)
1404    {
1405       fColumn.fIndex = index;
1406       return *this;
1407    }
1408    RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
1409    {
1410       fColumn.fFirstElementIndex = firstElementIdx;
1411       return *this;
1412    }
1413    RColumnDescriptorBuilder &SetSuppressedDeferred()
1414    {
1415       R__ASSERT(fColumn.fFirstElementIndex != 0);
1416       if (fColumn.fFirstElementIndex > 0)
1417          fColumn.fFirstElementIndex = -fColumn.fFirstElementIndex;
1418       return *this;
1419    }
1420    RColumnDescriptorBuilder &RepresentationIndex(std::uint16_t representationIndex)
1421    {
1422       fColumn.fRepresentationIndex = representationIndex;
1423       return *this;
1424    }
1425    RColumnDescriptorBuilder &ValueRange(double min, double max)
1426    {
1427       fColumn.fValueRange = {min, max};
1428       return *this;
1429    }
1430    RColumnDescriptorBuilder &ValueRange(std::optional<RColumnDescriptor::RValueRange> valueRange)
1431    {
1432       fColumn.fValueRange = valueRange;
1433       return *this;
1434    }
1435    ROOT::DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
1436    ROOT::DescriptorId_t GetRepresentationIndex() const { return fColumn.fRepresentationIndex; }
1437    /// Attempt to make a column descriptor. This may fail if the column
1438    /// was not given enough information to make a proper descriptor.
1439    RResult<RColumnDescriptor> MakeDescriptor() const;
1440 };
1441 
1442 // clang-format off
1443 /**
1444 \class ROOT::Internal::RFieldDescriptorBuilder
1445 \ingroup NTuple
1446 \brief A helper class for piece-wise construction of an RFieldDescriptor
1447 
1448 Dangling field descriptors describe a single field in isolation. They are
1449 missing the necessary relationship information (parent field, any child fields)
1450 required to describe a real RNTuple field.
1451 
1452 Dangling field descriptors can only become actual descriptors when added to an
1453 RNTupleDescriptorBuilder instance and then linked to other fields.
1454 */
1455 // clang-format on
1456 class RFieldDescriptorBuilder final {
1457 private:
1458    RFieldDescriptor fField = RFieldDescriptor();
1459 
1460 public:
1461    /// Make an empty dangling field descriptor.
1462    RFieldDescriptorBuilder() = default;
1463 
1464    /// Make a new RFieldDescriptorBuilder based off a live RNTuple field.
1465    static RFieldDescriptorBuilder FromField(const ROOT::RFieldBase &field);
1466 
1467    RFieldDescriptorBuilder &FieldId(ROOT::DescriptorId_t fieldId)
1468    {
1469       fField.fFieldId = fieldId;
1470       return *this;
1471    }
1472    RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
1473    {
1474       fField.fFieldVersion = fieldVersion;
1475       return *this;
1476    }
1477    RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1478    {
1479       fField.fTypeVersion = typeVersion;
1480       return *this;
1481    }
1482    RFieldDescriptorBuilder &ParentId(ROOT::DescriptorId_t id)
1483    {
1484       fField.fParentId = id;
1485       return *this;
1486    }
1487    RFieldDescriptorBuilder &ProjectionSourceId(ROOT::DescriptorId_t id)
1488    {
1489       fField.fProjectionSourceId = id;
1490       return *this;
1491    }
1492    RFieldDescriptorBuilder &FieldName(const std::string &fieldName)
1493    {
1494       fField.fFieldName = fieldName;
1495       return *this;
1496    }
1497    RFieldDescriptorBuilder &FieldDescription(const std::string &fieldDescription)
1498    {
1499       fField.fFieldDescription = fieldDescription;
1500       return *this;
1501    }
1502    RFieldDescriptorBuilder &TypeName(const std::string &typeName)
1503    {
1504       fField.fTypeName = typeName;
1505       return *this;
1506    }
1507    RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
1508    {
1509       fField.fTypeAlias = typeAlias;
1510       return *this;
1511    }
1512    RFieldDescriptorBuilder &NRepetitions(std::uint64_t nRepetitions)
1513    {
1514       fField.fNRepetitions = nRepetitions;
1515       return *this;
1516    }
1517    RFieldDescriptorBuilder &Structure(const ROOT::ENTupleStructure &structure)
1518    {
1519       fField.fStructure = structure;
1520       return *this;
1521    }
1522    RFieldDescriptorBuilder &TypeChecksum(const std::optional<std::uint32_t> typeChecksum)
1523    {
1524       fField.fTypeChecksum = typeChecksum;
1525       return *this;
1526    }
1527    ROOT::DescriptorId_t GetParentId() const { return fField.fParentId; }
1528    /// Attempt to make a field descriptor. This may fail if the dangling field
1529    /// was not given enough information to make a proper descriptor.
1530    RResult<RFieldDescriptor> MakeDescriptor() const;
1531 };
1532 
1533 // clang-format off
1534 /**
1535 \class ROOT::Internal::RClusterDescriptorBuilder
1536 \ingroup NTuple
1537 \brief A helper class for piece-wise construction of an RClusterDescriptor
1538 
1539 The cluster descriptor builder starts from a summary-only cluster descriptor and allows for the
1540 piecewise addition of page locations.
1541 */
1542 // clang-format on
1543 class RClusterDescriptorBuilder final {
1544 private:
1545    RClusterDescriptor fCluster;
1546 
1547 public:
1548    RClusterDescriptorBuilder &ClusterId(ROOT::DescriptorId_t clusterId)
1549    {
1550       fCluster.fClusterId = clusterId;
1551       return *this;
1552    }
1553 
1554    RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
1555    {
1556       fCluster.fFirstEntryIndex = firstEntryIndex;
1557       return *this;
1558    }
1559 
1560    RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
1561    {
1562       fCluster.fNEntries = nEntries;
1563       return *this;
1564    }
1565 
1566    RResult<void> CommitColumnRange(ROOT::DescriptorId_t physicalId, std::uint64_t firstElementIndex,
1567                                    std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
1568 
1569    /// Books the given column ID as being suppressed in this cluster. The correct first element index and number of
1570    /// elements need to be set by CommitSuppressedColumnRanges() once all the calls to CommitColumnRange() and
1571    /// MarkSuppressedColumnRange() took place.
1572    RResult<void> MarkSuppressedColumnRange(ROOT::DescriptorId_t physicalId);
1573 
1574    /// Sets the first element index and number of elements for all the suppressed column ranges.
1575    /// The information is taken from the corresponding columns from the primary representation.
1576    /// Needs to be called when all the columns (suppressed and regular) where added.
1577    RResult<void> CommitSuppressedColumnRanges(const RNTupleDescriptor &desc);
1578 
1579    /// Add column and page ranges for columns created during late model extension missing in this cluster.  The locator
1580    /// type for the synthesized page ranges is `kTypePageZero`.  All the page sources must be able to populate the
1581    /// 'zero' page from such locator. Any call to CommitColumnRange() and CommitSuppressedColumnRanges()
1582    /// should happen before calling this function.
1583    RClusterDescriptorBuilder &AddExtendedColumnRanges(const RNTupleDescriptor &desc);
1584 
1585    const RClusterDescriptor::RColumnRange &GetColumnRange(ROOT::DescriptorId_t physicalId)
1586    {
1587       return fCluster.GetColumnRange(physicalId);
1588    }
1589 
1590    /// Move out the full cluster descriptor including page locations
1591    RResult<RClusterDescriptor> MoveDescriptor();
1592 };
1593 
1594 // clang-format off
1595 /**
1596 \class ROOT::Internal::RClusterGroupDescriptorBuilder
1597 \ingroup NTuple
1598 \brief A helper class for piece-wise construction of an RClusterGroupDescriptor
1599 */
1600 // clang-format on
1601 class RClusterGroupDescriptorBuilder final {
1602 private:
1603    RClusterGroupDescriptor fClusterGroup;
1604 
1605 public:
1606    RClusterGroupDescriptorBuilder() = default;
1607    static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc);
1608 
1609    RClusterGroupDescriptorBuilder &ClusterGroupId(ROOT::DescriptorId_t clusterGroupId)
1610    {
1611       fClusterGroup.fClusterGroupId = clusterGroupId;
1612       return *this;
1613    }
1614    RClusterGroupDescriptorBuilder &PageListLocator(const RNTupleLocator &pageListLocator)
1615    {
1616       fClusterGroup.fPageListLocator = pageListLocator;
1617       return *this;
1618    }
1619    RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1620    {
1621       fClusterGroup.fPageListLength = pageListLength;
1622       return *this;
1623    }
1624    RClusterGroupDescriptorBuilder &MinEntry(std::uint64_t minEntry)
1625    {
1626       fClusterGroup.fMinEntry = minEntry;
1627       return *this;
1628    }
1629    RClusterGroupDescriptorBuilder &EntrySpan(std::uint64_t entrySpan)
1630    {
1631       fClusterGroup.fEntrySpan = entrySpan;
1632       return *this;
1633    }
1634    RClusterGroupDescriptorBuilder &NClusters(std::uint32_t nClusters)
1635    {
1636       fClusterGroup.fNClusters = nClusters;
1637       return *this;
1638    }
1639    void AddSortedClusters(const std::vector<ROOT::DescriptorId_t> &clusterIds)
1640    {
1641       if (clusterIds.size() != fClusterGroup.GetNClusters())
1642          throw RException(R__FAIL("mismatch of number of clusters"));
1643       fClusterGroup.fClusterIds = clusterIds;
1644    }
1645 
1646    RResult<RClusterGroupDescriptor> MoveDescriptor();
1647 };
1648 
1649 // clang-format off
1650 /**
1651 \class ROOT::Internal::RExtraTypeInfoDescriptorBuilder
1652 \ingroup NTuple
1653 \brief A helper class for piece-wise construction of an RExtraTypeInfoDescriptor
1654 */
1655 // clang-format on
1656 class RExtraTypeInfoDescriptorBuilder final {
1657 private:
1658    RExtraTypeInfoDescriptor fExtraTypeInfo;
1659 
1660 public:
1661    RExtraTypeInfoDescriptorBuilder() = default;
1662 
1663    RExtraTypeInfoDescriptorBuilder &ContentId(EExtraTypeInfoIds contentId)
1664    {
1665       fExtraTypeInfo.fContentId = contentId;
1666       return *this;
1667    }
1668    RExtraTypeInfoDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
1669    {
1670       fExtraTypeInfo.fTypeVersion = typeVersion;
1671       return *this;
1672    }
1673    RExtraTypeInfoDescriptorBuilder &TypeName(const std::string &typeName)
1674    {
1675       fExtraTypeInfo.fTypeName = typeName;
1676       return *this;
1677    }
1678    RExtraTypeInfoDescriptorBuilder &Content(const std::string &content)
1679    {
1680       fExtraTypeInfo.fContent = content;
1681       return *this;
1682    }
1683 
1684    RResult<RExtraTypeInfoDescriptor> MoveDescriptor();
1685 };
1686 
1687 // clang-format off
1688 /**
1689 \class ROOT::Internal::RNTupleDescriptorBuilder
1690 \ingroup NTuple
1691 \brief A helper class for piece-wise construction of an RNTupleDescriptor
1692 
1693 Used by RPageStorage implementations in order to construct the RNTupleDescriptor from the various header parts.
1694 */
1695 // clang-format on
1696 class RNTupleDescriptorBuilder final {
1697 private:
1698    RNTupleDescriptor fDescriptor;
1699    RResult<void> EnsureFieldExists(ROOT::DescriptorId_t fieldId) const;
1700 
1701 public:
1702    /// Checks whether invariants hold:
1703    /// * RNTuple epoch is valid
1704    /// * RNTuple name is valid
1705    /// * Fields have valid parents
1706    /// * Number of columns is constant across column representations
1707    RResult<void> EnsureValidDescriptor() const;
1708    const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
1709    RNTupleDescriptor MoveDescriptor();
1710 
1711    /// Copies the "schema" part of `descriptor` into the builder's descriptor.
1712    /// This resets the builder's descriptor.
1713    void SetSchemaFromExisting(const RNTupleDescriptor &descriptor);
1714 
1715    void SetVersion(std::uint16_t versionEpoch, std::uint16_t versionMajor, std::uint16_t versionMinor,
1716                    std::uint16_t versionPatch);
1717    void SetVersionForWriting();
1718 
1719    void SetNTuple(const std::string_view name, const std::string_view description);
1720    void SetFeature(unsigned int flag);
1721 
1722    void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1723    void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1724    /// The real footer size also include the page list envelopes
1725    void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1726 
1727    void AddField(const RFieldDescriptor &fieldDesc);
1728    RResult<void> AddFieldLink(ROOT::DescriptorId_t fieldId, ROOT::DescriptorId_t linkId);
1729    RResult<void> AddFieldProjection(ROOT::DescriptorId_t sourceId, ROOT::DescriptorId_t targetId);
1730 
1731    // The field that the column belongs to has to be already available. For fields with multiple columns,
1732    // the columns need to be added in order of the column index
1733    RResult<void> AddColumn(RColumnDescriptor &&columnDesc);
1734 
1735    RResult<void> AddClusterGroup(RClusterGroupDescriptor &&clusterGroup);
1736    RResult<void> AddCluster(RClusterDescriptor &&clusterDesc);
1737 
1738    RResult<void> AddExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1739    void ReplaceExtraTypeInfo(RExtraTypeInfoDescriptor &&extraTypeInfoDesc);
1740 
1741    RResult<void> AddAttributeSet(Experimental::RNTupleAttrSetDescriptor &&attrSetDesc);
1742 
1743    /// Mark the beginning of the header extension; any fields and columns added after a call to this function are
1744    /// annotated as begin part of the header extension.
1745    void BeginHeaderExtension();
1746 
1747    /// \brief Shift column IDs of alias columns by `offset`
1748    ///
1749    /// If the descriptor is constructed in pieces consisting of physical and alias columns
1750    /// (regular and projected fields), the natural column order would be
1751    ///   - Physical and alias columns of piece one
1752    ///   - Physical and alias columns of piece two
1753    ///   - etc.
1754    /// What we want, however, are first all physical column IDs and then all alias column IDs.
1755    /// This method adds `offset` to the logical column IDs of all alias columns and fixes up the corresponding
1756    /// column IDs in the projected field descriptors.  In this way, a new piece of physical and alias columns can
1757    /// first shift the existing alias columns by the number of new physical columns, resulting in the following order
1758    ///   - Physical columns of piece one
1759    ///   - Physical columns of piece two
1760    ///   - ...
1761    //    - Logical columns of piece one
1762    ///   - Logical columns of piece two
1763    ///   - ...
1764    void ShiftAliasColumns(std::uint32_t offset);
1765 };
1766 
1767 inline RNTupleDescriptor CloneDescriptorSchema(const RNTupleDescriptor &desc)
1768 {
1769    return desc.CloneSchema();
1770 }
1771 
1772 } // namespace Internal
1773 
1774 } // namespace ROOT
1775 
1776 #endif // ROOT_RNTupleDescriptor