Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-05 08:50:58

0001 /// \file ROOT/RNTupleSerialize.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \author Javier Lopez-Gomez <javier.lopez.gomez@cern.ch>
0005 /// \date 2021-08-02
0006 
0007 /*************************************************************************
0008  * Copyright (C) 1995-2021, Rene Brun and Fons Rademakers.               *
0009  * All rights reserved.                                                  *
0010  *                                                                       *
0011  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0012  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0013  *************************************************************************/
0014 
0015 #ifndef ROOT_RNTupleSerialize
0016 #define ROOT_RNTupleSerialize
0017 
0018 #include <ROOT/RError.hxx>
0019 #include <ROOT/RNTupleTypes.hxx>
0020 #include <ROOT/RSpan.hxx>
0021 
0022 #include <Rtypes.h>
0023 
0024 #include <cstdint>
0025 #include <limits>
0026 #include <map>
0027 #include <string>
0028 #include <unordered_map>
0029 #include <vector>
0030 
0031 class TVirtualStreamerInfo;
0032 
0033 namespace ROOT {
0034 
0035 class RNTupleDescriptor;
0036 class RClusterDescriptor;
0037 enum class EExtraTypeInfoIds;
0038 
0039 namespace Experimental {
0040 class RNTupleAttrSetDescriptor;
0041 namespace Internal {
0042 class RNTupleAttrSetDescriptorBuilder;
0043 } // namespace Internal
0044 } // namespace Experimental
0045 
0046 namespace Internal {
0047 
0048 class RClusterDescriptorBuilder;
0049 class RNTupleDescriptorBuilder;
0050 
0051 // clang-format off
0052 /**
0053 \class ROOT::Internal::RNTupleSerializer
0054 \ingroup NTuple
0055 \brief A helper class for serializing and deserialization of the RNTuple binary format
0056 
0057 All serialization and deserialization routines return the number of bytes processed (written or read).
0058 
0059 The serialization routines can be called with a nullptr buffer, in which case only the size required to perform
0060 a serialization is returned. Deserialization routines must be called with a buffer that is sufficiently large.
0061 
0062 Deserialization errors throw exceptions. Only when indicated or when passed as a parameter is the buffer size checked.
0063 */
0064 // clang-format on
0065 class RNTupleSerializer {
0066    static RResult<std::vector<ROOT::Internal::RClusterDescriptorBuilder>>
0067    DeserializePageListRaw(const void *buffer, std::uint64_t bufSize, ROOT::DescriptorId_t clusterGroupId,
0068                           const RNTupleDescriptor &desc);
0069 
0070 public:
0071    static constexpr std::uint16_t kEnvelopeTypeHeader = 0x01;
0072    static constexpr std::uint16_t kEnvelopeTypeFooter = 0x02;
0073    static constexpr std::uint16_t kEnvelopeTypePageList = 0x03;
0074 
0075    static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
0076    static constexpr std::uint16_t kFlagProjectedField = 0x02;
0077    static constexpr std::uint16_t kFlagHasTypeChecksum = 0x04;
0078 
0079    static constexpr std::uint16_t kFlagDeferredColumn = 0x01;
0080    static constexpr std::uint16_t kFlagHasValueRange = 0x02;
0081 
0082    static constexpr ROOT::DescriptorId_t kZeroFieldId = std::uint64_t(-2);
0083 
0084    static constexpr int64_t kSuppressedColumnMarker = std::numeric_limits<std::int64_t>::min();
0085 
0086    // In the page sink and the streamer field, the seen streamer infos are stored in a map
0087    // with the unique streamer info number being the key. Sorted by unique number.
0088    using StreamerInfoMap_t = std::map<Int_t, TVirtualStreamerInfo *>;
0089 
0090    struct REnvelopeLink {
0091       std::uint64_t fLength = 0;
0092       RNTupleLocator fLocator;
0093    };
0094 
0095    struct RClusterSummary {
0096       std::uint64_t fFirstEntry = 0;
0097       std::uint64_t fNEntries = 0;
0098       std::uint8_t fFlags = 0;
0099    };
0100 
0101    struct RClusterGroup {
0102       std::uint64_t fMinEntry = 0;
0103       std::uint64_t fEntrySpan = 0;
0104       std::uint32_t fNClusters = 0;
0105       REnvelopeLink fPageListEnvelopeLink;
0106    };
0107 
0108    /// The serialization context is used for the piecewise serialization of a descriptor.  During header serialization,
0109    /// the mapping of in-memory field and column IDs to on-disk IDs is built so that it can be used for the
0110    /// footer serialization in a second step.
0111    class RContext {
0112    private:
0113       std::uint64_t fHeaderSize = 0;
0114       std::uint64_t fHeaderXxHash3 = 0;
0115       std::map<ROOT::DescriptorId_t, ROOT::DescriptorId_t> fMem2OnDiskFieldIDs;
0116       std::map<ROOT::DescriptorId_t, ROOT::DescriptorId_t> fMem2OnDiskColumnIDs;
0117       std::map<ROOT::DescriptorId_t, ROOT::DescriptorId_t> fMem2OnDiskClusterIDs;
0118       std::map<ROOT::DescriptorId_t, ROOT::DescriptorId_t> fMem2OnDiskClusterGroupIDs;
0119       std::vector<ROOT::DescriptorId_t> fOnDisk2MemFieldIDs;
0120       std::vector<ROOT::DescriptorId_t> fOnDisk2MemColumnIDs;
0121       std::vector<ROOT::DescriptorId_t> fOnDisk2MemClusterIDs;
0122       std::vector<ROOT::DescriptorId_t> fOnDisk2MemClusterGroupIDs;
0123 
0124    public:
0125       void SetHeaderSize(std::uint64_t size) { fHeaderSize = size; }
0126       std::uint64_t GetHeaderSize() const { return fHeaderSize; }
0127       void SetHeaderXxHash3(std::uint64_t xxhash3) { fHeaderXxHash3 = xxhash3; }
0128       std::uint64_t GetHeaderXxHash3() const { return fHeaderXxHash3; }
0129       /// Map an in-memory field ID to its on-disk counterpart. It is allowed to call this function multiple times for
0130       /// the same `memId`, in which case the return value is the on-disk ID assigned on the first call.
0131       ROOT::DescriptorId_t MapFieldId(ROOT::DescriptorId_t memId)
0132       {
0133          auto onDiskId = fOnDisk2MemFieldIDs.size();
0134          const auto &p = fMem2OnDiskFieldIDs.try_emplace(memId, onDiskId);
0135          if (p.second)
0136             fOnDisk2MemFieldIDs.push_back(memId);
0137          return (*p.first).second;
0138       }
0139       /// Map an in-memory column ID to its on-disk counterpart. It is allowed to call this function multiple times for
0140       /// the same `memId`, in which case the return value is the on-disk ID assigned on the first call.
0141       /// Note that we only map physical column IDs.  Logical column IDs of alias columns are shifted before the
0142       /// serialization of the extension header.  Also, we only need to query physical column IDs for the page list
0143       /// serialization.
0144       ROOT::DescriptorId_t MapPhysicalColumnId(ROOT::DescriptorId_t memId)
0145       {
0146          auto onDiskId = fOnDisk2MemColumnIDs.size();
0147          const auto &p = fMem2OnDiskColumnIDs.try_emplace(memId, onDiskId);
0148          if (p.second)
0149             fOnDisk2MemColumnIDs.push_back(memId);
0150          return (*p.first).second;
0151       }
0152       ROOT::DescriptorId_t MapClusterId(ROOT::DescriptorId_t memId)
0153       {
0154          auto onDiskId = fOnDisk2MemClusterIDs.size();
0155          fMem2OnDiskClusterIDs[memId] = onDiskId;
0156          fOnDisk2MemClusterIDs.push_back(memId);
0157          return onDiskId;
0158       }
0159       ROOT::DescriptorId_t MapClusterGroupId(ROOT::DescriptorId_t memId)
0160       {
0161          auto onDiskId = fOnDisk2MemClusterGroupIDs.size();
0162          fMem2OnDiskClusterGroupIDs[memId] = onDiskId;
0163          fOnDisk2MemClusterGroupIDs.push_back(memId);
0164          return onDiskId;
0165       }
0166       /// Map in-memory field and column IDs to their on-disk counterparts. This function is unconditionally called
0167       /// during header serialization.  This function must be manually called after an incremental schema update as page
0168       /// list serialization requires all columns to be mapped.
0169       void MapSchema(const RNTupleDescriptor &desc, bool forHeaderExtension);
0170 
0171       ROOT::DescriptorId_t GetOnDiskFieldId(ROOT::DescriptorId_t memId) const { return fMem2OnDiskFieldIDs.at(memId); }
0172       ROOT::DescriptorId_t GetOnDiskColumnId(ROOT::DescriptorId_t memId) const
0173       {
0174          return fMem2OnDiskColumnIDs.at(memId);
0175       }
0176       ROOT::DescriptorId_t GetOnDiskClusterId(ROOT::DescriptorId_t memId) const
0177       {
0178          return fMem2OnDiskClusterIDs.at(memId);
0179       }
0180       ROOT::DescriptorId_t GetOnDiskClusterGroupId(ROOT::DescriptorId_t memId) const
0181       {
0182          return fMem2OnDiskClusterGroupIDs.at(memId);
0183       }
0184       ROOT::DescriptorId_t GetMemFieldId(ROOT::DescriptorId_t onDiskId) const { return fOnDisk2MemFieldIDs[onDiskId]; }
0185       ROOT::DescriptorId_t GetMemColumnId(ROOT::DescriptorId_t onDiskId) const
0186       {
0187          return fOnDisk2MemColumnIDs[onDiskId];
0188       }
0189       ROOT::DescriptorId_t GetMemClusterId(ROOT::DescriptorId_t onDiskId) const
0190       {
0191          return fOnDisk2MemClusterIDs[onDiskId];
0192       }
0193       ROOT::DescriptorId_t GetMemClusterGroupId(ROOT::DescriptorId_t onDiskId) const
0194       {
0195          return fOnDisk2MemClusterGroupIDs[onDiskId];
0196       }
0197 
0198       /// Return a vector containing the in-memory field ID for each on-disk counterpart, in order, i.e. the `i`-th
0199       /// value corresponds to the in-memory field ID for `i`-th on-disk ID
0200       const std::vector<ROOT::DescriptorId_t> &GetOnDiskFieldList() const { return fOnDisk2MemFieldIDs; }
0201    };
0202 
0203    /// Writes a XxHash-3 64bit checksum of the byte range given by data and length.
0204    static std::uint32_t
0205    SerializeXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3, void *buffer);
0206    /// Expects an xxhash3 checksum in the 8 bytes following data + length and verifies it.
0207    static RResult<void> VerifyXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3);
0208    static RResult<void> VerifyXxHash3(const unsigned char *data, std::uint64_t length);
0209 
0210    static std::uint32_t SerializeInt16(std::int16_t val, void *buffer);
0211    static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val);
0212    static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer);
0213    static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val);
0214 
0215    static std::uint32_t SerializeInt32(std::int32_t val, void *buffer);
0216    static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val);
0217    static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer);
0218    static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val);
0219 
0220    static std::uint32_t SerializeInt64(std::int64_t val, void *buffer);
0221    static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val);
0222    static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer);
0223    static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val);
0224 
0225    static std::uint32_t SerializeString(const std::string &val, void *buffer);
0226    static RResult<std::uint32_t> DeserializeString(const void *buffer, std::uint64_t bufSize, std::string &val);
0227 
0228    /// While we could just interpret the enums as ints, we make the translation explicit
0229    /// in order to avoid accidentally changing the on-disk numbers when adjusting the enum classes.
0230    static RResult<std::uint32_t> SerializeFieldStructure(ROOT::ENTupleStructure structure, void *buffer);
0231    static RResult<std::uint32_t> SerializeColumnType(ROOT::ENTupleColumnType type, void *buffer);
0232    static RResult<std::uint32_t> SerializeExtraTypeInfoId(ROOT::EExtraTypeInfoIds id, void *buffer);
0233    static RResult<std::uint32_t> DeserializeFieldStructure(const void *buffer, ROOT::ENTupleStructure &structure);
0234    static RResult<std::uint32_t> DeserializeColumnType(const void *buffer, ROOT::ENTupleColumnType &type);
0235    static RResult<std::uint32_t> DeserializeExtraTypeInfoId(const void *buffer, ROOT::EExtraTypeInfoIds &id);
0236 
0237    static std::uint32_t SerializeEnvelopePreamble(std::uint16_t envelopeType, void *buffer);
0238    static RResult<std::uint32_t> SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size);
0239    static RResult<std::uint32_t>
0240    SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size, std::uint64_t &xxhash3);
0241    // The bufSize must include the 8 bytes for the final xxhash3 checksum.
0242    static RResult<std::uint32_t>
0243    DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType);
0244    static RResult<std::uint32_t>
0245    DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType, std::uint64_t &xxhash3);
0246 
0247    static std::uint32_t SerializeRecordFramePreamble(void *buffer);
0248    static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer);
0249    static RResult<std::uint32_t> SerializeFramePostscript(void *frame, std::uint64_t size);
0250    static RResult<std::uint32_t>
0251    DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize, std::uint32_t &nitems);
0252    static RResult<std::uint32_t>
0253    DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize);
0254 
0255    // An empty flags vector will be serialized as a single, zero feature flag
0256    // The most significant bit in every flag is reserved and must _not_ be set
0257    static RResult<std::uint32_t> SerializeFeatureFlags(const std::vector<std::uint64_t> &flags, void *buffer);
0258    static RResult<std::uint32_t>
0259    DeserializeFeatureFlags(const void *buffer, std::uint64_t bufSize, std::vector<std::uint64_t> &flags);
0260 
0261    static RResult<std::uint32_t> SerializeLocator(const RNTupleLocator &locator, void *buffer);
0262    static RResult<std::uint32_t> SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer);
0263    static RResult<std::uint32_t> DeserializeLocator(const void *buffer, std::uint64_t bufSize, RNTupleLocator &locator);
0264    static RResult<std::uint32_t>
0265    DeserializeEnvelopeLink(const void *buffer, std::uint64_t bufSize, REnvelopeLink &envelopeLink);
0266 
0267    static RResult<std::uint32_t> SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer);
0268    static RResult<std::uint32_t> SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer);
0269    static RResult<std::uint32_t>
0270    DeserializeClusterSummary(const void *buffer, std::uint64_t bufSize, RClusterSummary &clusterSummary);
0271    static RResult<std::uint32_t>
0272    DeserializeClusterGroup(const void *buffer, std::uint64_t bufSize, RClusterGroup &clusterGroup);
0273 
0274    /// Serialize the schema description in `desc` into `buffer`. If `forHeaderExtension` is true, serialize only the
0275    /// fields and columns tagged as part of the header extension (see `RNTupleDescriptorBuilder::BeginHeaderExtension`).
0276    static RResult<std::uint32_t> SerializeSchemaDescription(void *buffer, const RNTupleDescriptor &desc,
0277                                                             const RContext &context, bool forHeaderExtension = false);
0278    static RResult<std::uint32_t> DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize,
0279                                                               ROOT::Internal::RNTupleDescriptorBuilder &descBuilder);
0280 
0281    static RResult<std::uint32_t>
0282    SerializeAttributeSet(const Experimental::RNTupleAttrSetDescriptor &attrSetDesc, void *buffer);
0283    static RResult<std::uint32_t>
0284    DeserializeAttributeSet(const void *buffer, std::uint64_t bufSize,
0285                            Experimental::Internal::RNTupleAttrSetDescriptorBuilder &attrSetDescBld);
0286 
0287    static RResult<RContext> SerializeHeader(void *buffer, const RNTupleDescriptor &desc);
0288    static RResult<std::uint32_t> SerializePageList(void *buffer, const RNTupleDescriptor &desc,
0289                                                    std::span<ROOT::DescriptorId_t> physClusterIDs,
0290                                                    const RContext &context);
0291    static RResult<std::uint32_t> SerializeFooter(void *buffer, const RNTupleDescriptor &desc, const RContext &context);
0292 
0293    static RResult<void>
0294    DeserializeHeader(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder);
0295    static RResult<void>
0296    DeserializeFooter(const void *buffer, std::uint64_t bufSize, ROOT::Internal::RNTupleDescriptorBuilder &descBuilder);
0297 
0298    enum class EDescriptorDeserializeMode {
0299       /// Deserializes the descriptor as-is without performing any additional fixup. The produced descriptor is
0300       /// unsuitable for reading or writing, but it's a faithful representation of the on-disk information.
0301       kRaw,
0302       /// Deserializes the descriptor and performs fixup on the suppressed column ranges. This produces a descriptor
0303       /// that is suitable for writing, but not reading.
0304       kForWriting,
0305       /// Deserializes the descriptor and performs fixup on the suppressed column ranges and on clusters, taking
0306       /// into account the header extension. This produces a descriptor that is suitable for reading.
0307       kForReading,
0308    };
0309    // The clusters vector must be initialized with the cluster summaries corresponding to the page list
0310    static RResult<void> DeserializePageList(const void *buffer, std::uint64_t bufSize,
0311                                             ROOT::DescriptorId_t clusterGroupId, RNTupleDescriptor &desc,
0312                                             EDescriptorDeserializeMode mode);
0313 
0314    // Helper functions to (de-)serialize the streamer info type extra information
0315    static std::string SerializeStreamerInfos(const StreamerInfoMap_t &infos);
0316    static RResult<StreamerInfoMap_t> DeserializeStreamerInfos(const std::string &extraTypeInfoContent);
0317 }; // class RNTupleSerializer
0318 
0319 } // namespace Internal
0320 } // namespace ROOT
0321 
0322 #endif // ROOT_RNTupleSerialize