Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-16 09:08:34

0001 /// \file ROOT/RNTupleUtil.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2018-10-04
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2020, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RNTupleUtil
0015 #define ROOT_RNTupleUtil
0016 
0017 #include <cstdint>
0018 #include <string>
0019 #include <string_view>
0020 #include <type_traits>
0021 #include <variant>
0022 
0023 #include <ROOT/RError.hxx>
0024 #include <ROOT/RLogger.hxx>
0025 
0026 namespace ROOT {
0027 
0028 class RLogChannel;
0029 namespace Internal {
0030 /// Log channel for RNTuple diagnostics.
0031 ROOT::RLogChannel &NTupleLog();
0032 } // namespace Internal
0033 
0034 /// Helper types to present an offset column as array of collection sizes.
0035 /// See RField<RNTupleCardinality<SizeT>> for details.
0036 template <typename SizeT>
0037 struct RNTupleCardinality {
0038    static_assert(std::is_same_v<SizeT, std::uint32_t> || std::is_same_v<SizeT, std::uint64_t>,
0039                  "RNTupleCardinality is only supported with std::uint32_t or std::uint64_t template parameters");
0040 
0041    using ValueType = SizeT;
0042 
0043    RNTupleCardinality() : fValue(0) {}
0044    explicit constexpr RNTupleCardinality(ValueType value) : fValue(value) {}
0045    RNTupleCardinality &operator=(const ValueType value)
0046    {
0047       fValue = value;
0048       return *this;
0049    }
0050    operator ValueType() const { return fValue; }
0051 
0052    ValueType fValue;
0053 };
0054 
0055 // clang-format off
0056 /**
0057 \class ROOT::ENTupleColumnType
0058 \ingroup NTuple
0059 \brief The available trivial, native content types of a column
0060 
0061 More complex types, such as classes, get translated into columns of such simple types by the RField.
0062 When changed, remember to update
0063   - RColumnElement::Generate()
0064   - RColumnElement::GetTypeName()
0065   - RColumnElement::GetValidBitRange()
0066   - RColumnElement template specializations / packing & unpacking
0067   - If necessary, endianess handling for the packing + unit test in ntuple_endian
0068   - RNTupleSerializer::[Des|S]erializeColumnType
0069 */
0070 // clang-format on
0071 enum class ENTupleColumnType {
0072    kUnknown = 0,
0073    // type for root columns of (nested) collections; offsets are relative to the current cluster
0074    kIndex64,
0075    kIndex32,
0076    // 96 bit column that is a pair of a kIndex64 and a 32bit dispatch tag to a column ID;
0077    // used to serialize std::variant.
0078    kSwitch,
0079    kByte,
0080    kChar,
0081    kBit,
0082    kReal64,
0083    kReal32,
0084    kReal16,
0085    kInt64,
0086    kUInt64,
0087    kInt32,
0088    kUInt32,
0089    kInt16,
0090    kUInt16,
0091    kInt8,
0092    kUInt8,
0093    kSplitIndex64,
0094    kSplitIndex32,
0095    kSplitReal64,
0096    kSplitReal32,
0097    kSplitInt64,
0098    kSplitUInt64,
0099    kSplitInt32,
0100    kSplitUInt32,
0101    kSplitInt16,
0102    kSplitUInt16,
0103    kReal32Trunc,
0104    kReal32Quant,
0105    kMax,
0106 };
0107 
0108 /// The fields in the ntuple model tree can carry different structural information about the type system.
0109 /// Leaf fields contain just data, collection fields resolve to offset columns, record fields have no
0110 /// materialization on the primitive column layer.
0111 enum class ENTupleStructure : std::uint16_t {
0112    kInvalid,
0113    kLeaf,
0114    kCollection,
0115    kRecord,
0116    kVariant,
0117    kStreamer,
0118    kUnknown
0119 };
0120 
0121 /// Integer type long enough to hold the maximum number of entries in a column
0122 using NTupleSize_t = std::uint64_t;
0123 constexpr NTupleSize_t kInvalidNTupleIndex = std::uint64_t(-1);
0124 
0125 /// Distriniguishes elements of the same type within a descriptor, e.g. different fields
0126 using DescriptorId_t = std::uint64_t;
0127 constexpr DescriptorId_t kInvalidDescriptorId = std::uint64_t(-1);
0128 
0129 /// Addresses a column element or field item relative to a particular cluster, instead of a global NTupleSize_t index
0130 class RNTupleLocalIndex {
0131 private:
0132    ROOT::DescriptorId_t fClusterId = ROOT::kInvalidDescriptorId;
0133    ROOT::NTupleSize_t fIndexInCluster = ROOT::kInvalidNTupleIndex;
0134 
0135 public:
0136    RNTupleLocalIndex() = default;
0137    RNTupleLocalIndex(const RNTupleLocalIndex &other) = default;
0138    RNTupleLocalIndex &operator=(const RNTupleLocalIndex &other) = default;
0139    constexpr RNTupleLocalIndex(ROOT::DescriptorId_t clusterId, ROOT::NTupleSize_t indexInCluster)
0140       : fClusterId(clusterId), fIndexInCluster(indexInCluster)
0141    {
0142    }
0143 
0144    RNTupleLocalIndex operator+(ROOT::NTupleSize_t off) const
0145    {
0146       return RNTupleLocalIndex(fClusterId, fIndexInCluster + off);
0147    }
0148 
0149    RNTupleLocalIndex operator-(ROOT::NTupleSize_t off) const
0150    {
0151       return RNTupleLocalIndex(fClusterId, fIndexInCluster - off);
0152    }
0153 
0154    RNTupleLocalIndex operator++(int) /* postfix */
0155    {
0156       auto r = *this;
0157       fIndexInCluster++;
0158       return r;
0159    }
0160 
0161    RNTupleLocalIndex &operator++() /* prefix */
0162    {
0163       ++fIndexInCluster;
0164       return *this;
0165    }
0166 
0167    bool operator==(RNTupleLocalIndex other) const
0168    {
0169       return fClusterId == other.fClusterId && fIndexInCluster == other.fIndexInCluster;
0170    }
0171 
0172    bool operator!=(RNTupleLocalIndex other) const { return !(*this == other); }
0173 
0174    ROOT::DescriptorId_t GetClusterId() const { return fClusterId; }
0175    ROOT::NTupleSize_t GetIndexInCluster() const { return fIndexInCluster; }
0176 };
0177 
0178 /// RNTupleLocator payload that is common for object stores using 64bit location information.
0179 /// This might not contain the full location of the content. In particular, for page locators this information may be
0180 /// used in conjunction with the cluster and column ID.
0181 class RNTupleLocatorObject64 {
0182 private:
0183    std::uint64_t fLocation = 0;
0184 
0185 public:
0186    RNTupleLocatorObject64() = default;
0187    explicit RNTupleLocatorObject64(std::uint64_t location) : fLocation(location) {}
0188    bool operator==(const RNTupleLocatorObject64 &other) const { return fLocation == other.fLocation; }
0189    std::uint64_t GetLocation() const { return fLocation; }
0190 };
0191 
0192 /// Generic information about the physical location of data. Values depend on the concrete storage type.  E.g.,
0193 /// for a local file `fPosition` might be a 64bit file offset. Referenced objects on storage can be compressed
0194 /// and therefore we need to store their actual size.
0195 class RNTupleLocator {
0196 public:
0197    /// Values for the _Type_ field in non-disk locators.  Serializable types must have the MSb == 0; see
0198    /// `doc/BinaryFormatSpecification.md` for details
0199    enum ELocatorType : std::uint8_t {
0200       // The kTypeFile locator may translate to an on-disk standard locator (type 0x00) or a large locator (type 0x01),
0201       // if the size of the referenced data block is >2GB
0202       kTypeFile = 0x00,
0203       kTypeDAOS = 0x02,
0204 
0205       kLastSerializableType = 0x7f,
0206       kTypePageZero = kLastSerializableType + 1,
0207       kTypeUnknown,
0208    };
0209 
0210 private:
0211    std::uint64_t fNBytesOnStorage = 0;
0212    /// Simple on-disk locators consisting of a 64-bit offset use variant type `uint64_t`; extended locators have
0213    /// `fPosition.index()` > 0
0214    std::variant<std::uint64_t, RNTupleLocatorObject64> fPosition{};
0215    /// For non-disk locators, the value for the _Type_ field. This makes it possible to have different type values even
0216    /// if the payload structure is identical.
0217    ELocatorType fType = kTypeFile;
0218    /// Reserved for use by concrete storage backends
0219    std::uint8_t fReserved = 0;
0220 
0221 public:
0222    RNTupleLocator() = default;
0223 
0224    bool operator==(const RNTupleLocator &other) const
0225    {
0226       return fPosition == other.fPosition && fNBytesOnStorage == other.fNBytesOnStorage && fType == other.fType;
0227    }
0228 
0229    std::uint64_t GetNBytesOnStorage() const { return fNBytesOnStorage; }
0230    ELocatorType GetType() const { return fType; }
0231    std::uint8_t GetReserved() const { return fReserved; }
0232 
0233    void SetNBytesOnStorage(std::uint64_t nBytesOnStorage) { fNBytesOnStorage = nBytesOnStorage; }
0234    void SetType(ELocatorType type) { fType = type; }
0235    void SetReserved(std::uint8_t reserved) { fReserved = reserved; }
0236 
0237    template <typename T>
0238    T GetPosition() const
0239    {
0240       return std::get<T>(fPosition);
0241    }
0242 
0243    template <typename T>
0244    void SetPosition(T position)
0245    {
0246       fPosition = position;
0247    }
0248 };
0249 
0250 namespace Internal {
0251 
0252 /// The in-memory representation of a 32bit or 64bit on-disk index column. Wraps the integer in a
0253 /// named type so that templates can distinguish between integer data columns and index columns.
0254 class RColumnIndex {
0255 public:
0256    using ValueType = std::uint64_t;
0257 
0258 private:
0259    ValueType fValue = 0;
0260 
0261 public:
0262    RColumnIndex() = default;
0263    explicit constexpr RColumnIndex(ValueType value) : fValue(value) {}
0264    RColumnIndex &operator=(const ValueType value)
0265    {
0266       fValue = value;
0267       return *this;
0268    }
0269    RColumnIndex &operator+=(const ValueType value)
0270    {
0271       fValue += value;
0272       return *this;
0273    }
0274    RColumnIndex operator++(int)
0275    {
0276       auto result = *this;
0277       fValue++;
0278       return result;
0279    }
0280    operator ValueType() const { return fValue; }
0281 };
0282 
0283 /// Holds the index and the tag of a kSwitch column
0284 class RColumnSwitch {
0285 private:
0286    ROOT::NTupleSize_t fIndex;
0287    std::uint32_t fTag = 0;
0288 
0289 public:
0290    RColumnSwitch() = default;
0291    RColumnSwitch(ROOT::NTupleSize_t index, std::uint32_t tag) : fIndex(index), fTag(tag) {}
0292    ROOT::NTupleSize_t GetIndex() const { return fIndex; }
0293    std::uint32_t GetTag() const { return fTag; }
0294 };
0295 
0296 } // namespace Internal
0297 
0298 namespace Internal {
0299 
0300 template <typename T>
0301 auto MakeAliasedSharedPtr(T *rawPtr)
0302 {
0303    const static std::shared_ptr<T> fgRawPtrCtrlBlock;
0304    return std::shared_ptr<T>(fgRawPtrCtrlBlock, rawPtr);
0305 }
0306 
0307 /// Make an array of default-initialized elements. This is useful for buffers that do not need to be initialized.
0308 ///
0309 /// With C++20, this function can be replaced by std::make_unique_for_overwrite<T[]>.
0310 template <typename T>
0311 std::unique_ptr<T[]> MakeUninitArray(std::size_t size)
0312 {
0313    // DO NOT use std::make_unique<T[]>, the array elements are value-initialized!
0314    return std::unique_ptr<T[]>(new T[size]);
0315 }
0316 
0317 inline constexpr ENTupleColumnType kTestFutureColumnType =
0318    static_cast<ENTupleColumnType>(std::numeric_limits<std::underlying_type_t<ENTupleColumnType>>::max() - 1);
0319 
0320 inline constexpr ROOT::ENTupleStructure kTestFutureFieldStructure =
0321    static_cast<ROOT::ENTupleStructure>(std::numeric_limits<std::underlying_type_t<ROOT::ENTupleStructure>>::max() - 1);
0322 
0323 inline constexpr RNTupleLocator::ELocatorType kTestLocatorType = static_cast<RNTupleLocator::ELocatorType>(0x7e);
0324 static_assert(kTestLocatorType < RNTupleLocator::ELocatorType::kLastSerializableType);
0325 
0326 /// Check whether a given string is a valid name according to the RNTuple specification
0327 RResult<void> EnsureValidNameForRNTuple(std::string_view name, std::string_view where);
0328 
0329 } // namespace Internal
0330 } // namespace ROOT
0331 
0332 #endif