File indexing completed on 2025-01-18 10:10:46
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef ROOT7_RNTupleSerialize
0018 #define ROOT7_RNTupleSerialize
0019
0020 #include <ROOT/RError.hxx>
0021 #include <ROOT/RNTupleUtil.hxx>
0022 #include <ROOT/RSpan.hxx>
0023
0024 #include <cstdint>
0025 #include <map>
0026 #include <string>
0027 #include <vector>
0028
0029 namespace ROOT {
0030 namespace Experimental {
0031
0032 enum class EColumnType;
0033 class RClusterDescriptor;
0034 class RNTupleDescriptor;
0035
0036 namespace Internal {
0037
0038 class RClusterDescriptorBuilder;
0039 class RNTupleDescriptorBuilder;
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055 class RNTupleSerializer {
0056 public:
0057 static constexpr std::uint16_t kEnvelopeTypeHeader = 0x01;
0058 static constexpr std::uint16_t kEnvelopeTypeFooter = 0x02;
0059 static constexpr std::uint16_t kEnvelopeTypePageList = 0x03;
0060
0061 static constexpr std::uint16_t kFlagRepetitiveField = 0x01;
0062
0063 static constexpr std::uint32_t kFlagSortAscColumn = 0x01;
0064 static constexpr std::uint32_t kFlagSortDesColumn = 0x02;
0065 static constexpr std::uint32_t kFlagNonNegativeColumn = 0x04;
0066 static constexpr std::uint32_t kFlagDeferredColumn = 0x08;
0067
0068 static constexpr DescriptorId_t kZeroFieldId = std::uint64_t(-2);
0069
0070 struct REnvelopeLink {
0071 std::uint64_t fLength = 0;
0072 RNTupleLocator fLocator;
0073 };
0074
0075 struct RClusterSummary {
0076 std::uint64_t fFirstEntry = 0;
0077 std::uint64_t fNEntries = 0;
0078
0079 std::int32_t fColumnGroupID = -1;
0080 };
0081
0082 struct RClusterGroup {
0083 std::uint64_t fMinEntry = 0;
0084 std::uint64_t fEntrySpan = 0;
0085 std::uint32_t fNClusters = 0;
0086 REnvelopeLink fPageListEnvelopeLink;
0087 };
0088
0089
0090
0091
0092 class RContext {
0093 private:
0094 std::uint64_t fHeaderSize = 0;
0095 std::uint64_t fHeaderXxHash3 = 0;
0096 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskFieldIDs;
0097 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskColumnIDs;
0098 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterIDs;
0099 std::map<DescriptorId_t, DescriptorId_t> fMem2OnDiskClusterGroupIDs;
0100 std::vector<DescriptorId_t> fOnDisk2MemFieldIDs;
0101 std::vector<DescriptorId_t> fOnDisk2MemColumnIDs;
0102 std::vector<DescriptorId_t> fOnDisk2MemClusterIDs;
0103 std::vector<DescriptorId_t> fOnDisk2MemClusterGroupIDs;
0104 std::size_t fHeaderExtensionOffset = -1U;
0105
0106 public:
0107 void SetHeaderSize(std::uint64_t size) { fHeaderSize = size; }
0108 std::uint64_t GetHeaderSize() const { return fHeaderSize; }
0109 void SetHeaderXxHash3(std::uint64_t xxhash3) { fHeaderXxHash3 = xxhash3; }
0110 std::uint64_t GetHeaderXxHash3() const { return fHeaderXxHash3; }
0111
0112
0113 DescriptorId_t MapFieldId(DescriptorId_t memId) {
0114 auto onDiskId = fOnDisk2MemFieldIDs.size();
0115 const auto &p = fMem2OnDiskFieldIDs.try_emplace(memId, onDiskId);
0116 if (p.second)
0117 fOnDisk2MemFieldIDs.push_back(memId);
0118 return (*p.first).second;
0119 }
0120
0121
0122 DescriptorId_t MapColumnId(DescriptorId_t memId) {
0123 auto onDiskId = fOnDisk2MemColumnIDs.size();
0124 const auto &p = fMem2OnDiskColumnIDs.try_emplace(memId, onDiskId);
0125 if (p.second)
0126 fOnDisk2MemColumnIDs.push_back(memId);
0127 return (*p.first).second;
0128 }
0129 DescriptorId_t MapClusterId(DescriptorId_t memId) {
0130 auto onDiskId = fOnDisk2MemClusterIDs.size();
0131 fMem2OnDiskClusterIDs[memId] = onDiskId;
0132 fOnDisk2MemClusterIDs.push_back(memId);
0133 return onDiskId;
0134 }
0135 DescriptorId_t MapClusterGroupId(DescriptorId_t memId)
0136 {
0137 auto onDiskId = fOnDisk2MemClusterGroupIDs.size();
0138 fMem2OnDiskClusterGroupIDs[memId] = onDiskId;
0139 fOnDisk2MemClusterGroupIDs.push_back(memId);
0140 return onDiskId;
0141 }
0142
0143
0144
0145 void MapSchema(const RNTupleDescriptor &desc, bool forHeaderExtension);
0146
0147 DescriptorId_t GetOnDiskFieldId(DescriptorId_t memId) const { return fMem2OnDiskFieldIDs.at(memId); }
0148 DescriptorId_t GetOnDiskColumnId(DescriptorId_t memId) const { return fMem2OnDiskColumnIDs.at(memId); }
0149 DescriptorId_t GetOnDiskClusterId(DescriptorId_t memId) const { return fMem2OnDiskClusterIDs.at(memId); }
0150 DescriptorId_t GetOnDiskClusterGroupId(DescriptorId_t memId) const
0151 {
0152 return fMem2OnDiskClusterGroupIDs.at(memId);
0153 }
0154 DescriptorId_t GetMemFieldId(DescriptorId_t onDiskId) const { return fOnDisk2MemFieldIDs[onDiskId]; }
0155 DescriptorId_t GetMemColumnId(DescriptorId_t onDiskId) const { return fOnDisk2MemColumnIDs[onDiskId]; }
0156 DescriptorId_t GetMemClusterId(DescriptorId_t onDiskId) const { return fOnDisk2MemClusterIDs[onDiskId]; }
0157 DescriptorId_t GetMemClusterGroupId(DescriptorId_t onDiskId) const
0158 {
0159 return fOnDisk2MemClusterGroupIDs[onDiskId];
0160 }
0161
0162
0163
0164 const std::vector<DescriptorId_t> &GetOnDiskFieldList() const { return fOnDisk2MemFieldIDs; }
0165
0166 void BeginHeaderExtension() { fHeaderExtensionOffset = fOnDisk2MemFieldIDs.size(); }
0167
0168 std::size_t GetHeaderExtensionOffset() const { return fHeaderExtensionOffset; }
0169 };
0170
0171
0172 static std::uint32_t
0173 SerializeXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3, void *buffer);
0174
0175 static RResult<void> VerifyXxHash3(const unsigned char *data, std::uint64_t length, std::uint64_t &xxhash3);
0176 static RResult<void> VerifyXxHash3(const unsigned char *data, std::uint64_t length);
0177
0178 static std::uint32_t SerializeInt16(std::int16_t val, void *buffer);
0179 static std::uint32_t DeserializeInt16(const void *buffer, std::int16_t &val);
0180 static std::uint32_t SerializeUInt16(std::uint16_t val, void *buffer);
0181 static std::uint32_t DeserializeUInt16(const void *buffer, std::uint16_t &val);
0182
0183 static std::uint32_t SerializeInt32(std::int32_t val, void *buffer);
0184 static std::uint32_t DeserializeInt32(const void *buffer, std::int32_t &val);
0185 static std::uint32_t SerializeUInt32(std::uint32_t val, void *buffer);
0186 static std::uint32_t DeserializeUInt32(const void *buffer, std::uint32_t &val);
0187
0188 static std::uint32_t SerializeInt64(std::int64_t val, void *buffer);
0189 static std::uint32_t DeserializeInt64(const void *buffer, std::int64_t &val);
0190 static std::uint32_t SerializeUInt64(std::uint64_t val, void *buffer);
0191 static std::uint32_t DeserializeUInt64(const void *buffer, std::uint64_t &val);
0192
0193 static std::uint32_t SerializeString(const std::string &val, void *buffer);
0194 static RResult<std::uint32_t> DeserializeString(const void *buffer, std::uint64_t bufSize, std::string &val);
0195
0196
0197
0198 static std::uint16_t SerializeFieldStructure(ROOT::Experimental::ENTupleStructure structure, void *buffer);
0199 static std::uint16_t SerializeColumnType(ROOT::Experimental::EColumnType type, void *buffer);
0200 static RResult<std::uint16_t> DeserializeFieldStructure(const void *buffer, ROOT::Experimental::ENTupleStructure &structure);
0201 static RResult<std::uint16_t> DeserializeColumnType(const void *buffer, ROOT::Experimental::EColumnType &type);
0202
0203 static std::uint32_t SerializeEnvelopePreamble(std::uint16_t envelopeType, void *buffer);
0204 static std::uint32_t SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size);
0205 static std::uint32_t
0206 SerializeEnvelopePostscript(unsigned char *envelope, std::uint64_t size, std::uint64_t &xxhash3);
0207
0208 static RResult<std::uint32_t>
0209 DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType);
0210 static RResult<std::uint32_t>
0211 DeserializeEnvelope(const void *buffer, std::uint64_t bufSize, std::uint16_t expectedType, std::uint64_t &xxhash3);
0212
0213 static std::uint32_t SerializeRecordFramePreamble(void *buffer);
0214 static std::uint32_t SerializeListFramePreamble(std::uint32_t nitems, void *buffer);
0215 static std::uint32_t SerializeFramePostscript(void *frame, std::uint64_t size);
0216 static RResult<std::uint32_t>
0217 DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize, std::uint32_t &nitems);
0218 static RResult<std::uint32_t>
0219 DeserializeFrameHeader(const void *buffer, std::uint64_t bufSize, std::uint64_t &frameSize);
0220
0221
0222
0223 static std::uint32_t SerializeFeatureFlags(const std::vector<std::uint64_t> &flags, void *buffer);
0224 static RResult<std::uint32_t>
0225 DeserializeFeatureFlags(const void *buffer, std::uint64_t bufSize, std::vector<std::uint64_t> &flags);
0226
0227 static std::uint32_t SerializeLocator(const RNTupleLocator &locator, void *buffer);
0228 static std::uint32_t SerializeEnvelopeLink(const REnvelopeLink &envelopeLink, void *buffer);
0229 static RResult<std::uint32_t> DeserializeLocator(const void *buffer, std::uint64_t bufSize, RNTupleLocator &locator);
0230 static RResult<std::uint32_t>
0231 DeserializeEnvelopeLink(const void *buffer, std::uint64_t bufSize, REnvelopeLink &envelopeLink);
0232
0233 static std::uint32_t SerializeClusterSummary(const RClusterSummary &clusterSummary, void *buffer);
0234 static std::uint32_t SerializeClusterGroup(const RClusterGroup &clusterGroup, void *buffer);
0235 static RResult<std::uint32_t>
0236 DeserializeClusterSummary(const void *buffer, std::uint64_t bufSize, RClusterSummary &clusterSummary);
0237 static RResult<std::uint32_t>
0238 DeserializeClusterGroup(const void *buffer, std::uint64_t bufSize, RClusterGroup &clusterGroup);
0239
0240
0241
0242 static std::uint32_t SerializeSchemaDescription(void *buffer, const RNTupleDescriptor &desc, const RContext &context,
0243 bool forHeaderExtension = false);
0244 static RResult<std::uint32_t>
0245 DeserializeSchemaDescription(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);
0246
0247 static RContext SerializeHeader(void *buffer, const RNTupleDescriptor &desc);
0248 static std::uint32_t SerializePageList(void *buffer, const RNTupleDescriptor &desc,
0249 std::span<DescriptorId_t> physClusterIDs, const RContext &context);
0250 static std::uint32_t SerializeFooter(void *buffer, const RNTupleDescriptor &desc, const RContext &context);
0251
0252 static RResult<void>
0253 DeserializeHeader(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);
0254 static RResult<void>
0255 DeserializeFooter(const void *buffer, std::uint64_t bufSize, RNTupleDescriptorBuilder &descBuilder);
0256
0257 static RResult<void> DeserializePageList(const void *buffer, std::uint64_t bufSize, DescriptorId_t clusterGroupId,
0258 RNTupleDescriptor &desc);
0259 };
0260
0261 }
0262 }
0263 }
0264
0265 #endif