File indexing completed on 2025-01-18 10:10:45
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #ifndef ROOT7_RNTupleDescriptor
0018 #define ROOT7_RNTupleDescriptor
0019
0020 #include <ROOT/RColumnModel.hxx>
0021 #include <ROOT/RError.hxx>
0022 #include <ROOT/RNTupleSerialize.hxx>
0023 #include <ROOT/RNTupleUtil.hxx>
0024 #include <ROOT/RSpan.hxx>
0025 #include <string_view>
0026
0027 #include <algorithm>
0028 #include <chrono>
0029 #include <functional>
0030 #include <iterator>
0031 #include <map>
0032 #include <memory>
0033 #include <ostream>
0034 #include <vector>
0035 #include <set>
0036 #include <string>
0037 #include <unordered_map>
0038 #include <unordered_set>
0039
0040 namespace ROOT {
0041 namespace Experimental {
0042
0043 class RFieldBase;
0044 class RNTupleDescriptor;
0045 class RNTupleModel;
0046
0047 namespace Internal {
0048 class RColumnElementBase;
0049 }
0050
0051 namespace Internal {
0052 class RColumnDescriptorBuilder;
0053 class RColumnGroupDescriptorBuilder;
0054 class RClusterDescriptorBuilder;
0055 class RClusterGroupDescriptorBuilder;
0056 class RFieldDescriptorBuilder;
0057 class RNTupleDescriptorBuilder;
0058 }
0059
0060
0061
0062
0063
0064
0065
0066
0067 class RFieldDescriptor {
0068 friend class Internal::RNTupleDescriptorBuilder;
0069 friend class Internal::RFieldDescriptorBuilder;
0070
0071 private:
0072 DescriptorId_t fFieldId = kInvalidDescriptorId;
0073
0074 std::uint32_t fFieldVersion = 0;
0075
0076 std::uint32_t fTypeVersion = 0;
0077
0078 std::string fFieldName;
0079
0080 std::string fFieldDescription;
0081
0082 std::string fTypeName;
0083
0084 std::string fTypeAlias;
0085
0086 std::uint64_t fNRepetitions = 0;
0087
0088 ENTupleStructure fStructure = ENTupleStructure::kInvalid;
0089
0090 DescriptorId_t fParentId = kInvalidDescriptorId;
0091
0092
0093 std::vector<DescriptorId_t> fLinkIds;
0094
0095 public:
0096 RFieldDescriptor() = default;
0097 RFieldDescriptor(const RFieldDescriptor &other) = delete;
0098 RFieldDescriptor &operator =(const RFieldDescriptor &other) = delete;
0099 RFieldDescriptor(RFieldDescriptor &&other) = default;
0100 RFieldDescriptor &operator =(RFieldDescriptor &&other) = default;
0101
0102 bool operator==(const RFieldDescriptor &other) const;
0103
0104 RFieldDescriptor Clone() const;
0105
0106
0107 std::unique_ptr<RFieldBase> CreateField(const RNTupleDescriptor &ntplDesc) const;
0108
0109 DescriptorId_t GetId() const { return fFieldId; }
0110 std::uint32_t GetFieldVersion() const { return fFieldVersion; }
0111 std::uint32_t GetTypeVersion() const { return fTypeVersion; }
0112 std::string GetFieldName() const { return fFieldName; }
0113 std::string GetFieldDescription() const { return fFieldDescription; }
0114 std::string GetTypeName() const { return fTypeName; }
0115 std::string GetTypeAlias() const { return fTypeAlias; }
0116 std::uint64_t GetNRepetitions() const { return fNRepetitions; }
0117 ENTupleStructure GetStructure() const { return fStructure; }
0118 DescriptorId_t GetParentId() const { return fParentId; }
0119 const std::vector<DescriptorId_t> &GetLinkIds() const { return fLinkIds; }
0120 };
0121
0122
0123
0124
0125
0126
0127
0128
0129
0130 class RColumnDescriptor {
0131 friend class Internal::RColumnDescriptorBuilder;
0132 friend class Internal::RNTupleDescriptorBuilder;
0133
0134 private:
0135
0136 DescriptorId_t fLogicalColumnId = kInvalidDescriptorId;
0137
0138 DescriptorId_t fPhysicalColumnId = kInvalidDescriptorId;
0139
0140 RColumnModel fModel;
0141
0142 DescriptorId_t fFieldId = kInvalidDescriptorId;
0143
0144 std::uint32_t fIndex;
0145
0146
0147 std::uint64_t fFirstElementIndex = 0U;
0148
0149 public:
0150 RColumnDescriptor() = default;
0151 RColumnDescriptor(const RColumnDescriptor &other) = delete;
0152 RColumnDescriptor &operator =(const RColumnDescriptor &other) = delete;
0153 RColumnDescriptor(RColumnDescriptor &&other) = default;
0154 RColumnDescriptor &operator =(RColumnDescriptor &&other) = default;
0155
0156 bool operator==(const RColumnDescriptor &other) const;
0157
0158 RColumnDescriptor Clone() const;
0159
0160 DescriptorId_t GetLogicalId() const { return fLogicalColumnId; }
0161 DescriptorId_t GetPhysicalId() const { return fPhysicalColumnId; }
0162 RColumnModel GetModel() const { return fModel; }
0163 std::uint32_t GetIndex() const { return fIndex; }
0164 DescriptorId_t GetFieldId() const { return fFieldId; }
0165 bool IsAliasColumn() const { return fPhysicalColumnId != fLogicalColumnId; }
0166 std::uint64_t GetFirstElementIndex() const { return fFirstElementIndex; }
0167 bool IsDeferredColumn() const { return fFirstElementIndex > 0; }
0168 };
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180 class RColumnGroupDescriptor {
0181 friend class Internal::RColumnGroupDescriptorBuilder;
0182
0183 private:
0184 DescriptorId_t fColumnGroupId = kInvalidDescriptorId;
0185 std::unordered_set<DescriptorId_t> fPhysicalColumnIds;
0186
0187 public:
0188 RColumnGroupDescriptor() = default;
0189 RColumnGroupDescriptor(const RColumnGroupDescriptor &other) = delete;
0190 RColumnGroupDescriptor &operator=(const RColumnGroupDescriptor &other) = delete;
0191 RColumnGroupDescriptor(RColumnGroupDescriptor &&other) = default;
0192 RColumnGroupDescriptor &operator=(RColumnGroupDescriptor &&other) = default;
0193
0194 bool operator==(const RColumnGroupDescriptor &other) const;
0195
0196 DescriptorId_t GetId() const { return fColumnGroupId; }
0197 const std::unordered_set<DescriptorId_t> &GetPhysicalColumnIds() const { return fPhysicalColumnIds; }
0198 bool Contains(DescriptorId_t physicalId) const
0199 {
0200 return fPhysicalColumnIds.empty() || fPhysicalColumnIds.count(physicalId) > 0;
0201 }
0202 bool HasAllColumns() const { return fPhysicalColumnIds.empty(); }
0203 };
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218 class RClusterDescriptor {
0219 friend class Internal::RClusterDescriptorBuilder;
0220
0221 public:
0222
0223 struct RColumnRange {
0224 DescriptorId_t fPhysicalColumnId = kInvalidDescriptorId;
0225
0226 NTupleSize_t fFirstElementIndex = kInvalidNTupleIndex;
0227
0228 ClusterSize_t fNElements = kInvalidClusterIndex;
0229
0230
0231 int fCompressionSettings = kUnknownCompressionSettings;
0232
0233
0234
0235
0236 bool operator==(const RColumnRange &other) const {
0237 return fPhysicalColumnId == other.fPhysicalColumnId && fFirstElementIndex == other.fFirstElementIndex &&
0238 fNElements == other.fNElements && fCompressionSettings == other.fCompressionSettings;
0239 }
0240
0241 bool Contains(NTupleSize_t index) const {
0242 return (fFirstElementIndex <= index && (fFirstElementIndex + fNElements) > index);
0243 }
0244 };
0245
0246
0247 class RPageRange {
0248 friend class Internal::RClusterDescriptorBuilder;
0249
0250
0251
0252
0253
0254 std::size_t ExtendToFitColumnRange(const RColumnRange &columnRange, const Internal::RColumnElementBase &element,
0255 std::size_t pageSize);
0256
0257 public:
0258
0259
0260 struct RPageInfo {
0261
0262 std::uint32_t fNElements = std::uint32_t(-1);
0263
0264 RNTupleLocator fLocator;
0265
0266 bool operator==(const RPageInfo &other) const {
0267 return fNElements == other.fNElements && fLocator == other.fLocator;
0268 }
0269 };
0270 struct RPageInfoExtended : RPageInfo {
0271
0272 ClusterSize_t::ValueType fFirstInPage = 0;
0273
0274 NTupleSize_t fPageNo = 0;
0275
0276 RPageInfoExtended() = default;
0277 RPageInfoExtended(const RPageInfo &pi, ClusterSize_t::ValueType i, NTupleSize_t n)
0278 : RPageInfo(pi), fFirstInPage(i), fPageNo(n)
0279 {
0280 }
0281 };
0282
0283 RPageRange() = default;
0284 RPageRange(const RPageRange &other) = delete;
0285 RPageRange &operator =(const RPageRange &other) = delete;
0286 RPageRange(RPageRange &&other) = default;
0287 RPageRange &operator =(RPageRange &&other) = default;
0288
0289 RPageRange Clone() const {
0290 RPageRange clone;
0291 clone.fPhysicalColumnId = fPhysicalColumnId;
0292 clone.fPageInfos = fPageInfos;
0293 return clone;
0294 }
0295
0296
0297 RPageInfoExtended Find(ClusterSize_t::ValueType idxInCluster) const;
0298
0299 DescriptorId_t fPhysicalColumnId = kInvalidDescriptorId;
0300 std::vector<RPageInfo> fPageInfos;
0301
0302 bool operator==(const RPageRange &other) const {
0303 return fPhysicalColumnId == other.fPhysicalColumnId && fPageInfos == other.fPageInfos;
0304 }
0305 };
0306
0307 private:
0308 DescriptorId_t fClusterId = kInvalidDescriptorId;
0309
0310 NTupleSize_t fFirstEntryIndex = kInvalidNTupleIndex;
0311
0312 ClusterSize_t fNEntries = kInvalidClusterIndex;
0313
0314 std::unordered_map<DescriptorId_t, RColumnRange> fColumnRanges;
0315 std::unordered_map<DescriptorId_t, RPageRange> fPageRanges;
0316
0317 public:
0318 RClusterDescriptor() = default;
0319 RClusterDescriptor(const RClusterDescriptor &other) = delete;
0320 RClusterDescriptor &operator =(const RClusterDescriptor &other) = delete;
0321 RClusterDescriptor(RClusterDescriptor &&other) = default;
0322 RClusterDescriptor &operator =(RClusterDescriptor &&other) = default;
0323
0324 RClusterDescriptor Clone() const;
0325
0326 bool operator==(const RClusterDescriptor &other) const;
0327
0328 DescriptorId_t GetId() const { return fClusterId; }
0329 NTupleSize_t GetFirstEntryIndex() const { return fFirstEntryIndex; }
0330 ClusterSize_t GetNEntries() const { return fNEntries; }
0331 const RColumnRange &GetColumnRange(DescriptorId_t physicalId) const { return fColumnRanges.at(physicalId); }
0332 const RPageRange &GetPageRange(DescriptorId_t physicalId) const { return fPageRanges.at(physicalId); }
0333 bool ContainsColumn(DescriptorId_t physicalId) const
0334 {
0335 return fColumnRanges.find(physicalId) != fColumnRanges.end();
0336 }
0337 std::unordered_set<DescriptorId_t> GetColumnIds() const;
0338 std::uint64_t GetBytesOnStorage() const;
0339 };
0340
0341
0342
0343
0344
0345
0346
0347
0348
0349
0350
0351
0352
0353 class RClusterGroupDescriptor {
0354 friend class Internal::RClusterGroupDescriptorBuilder;
0355
0356 private:
0357 DescriptorId_t fClusterGroupId = kInvalidDescriptorId;
0358
0359 std::vector<DescriptorId_t> fClusterIds;
0360
0361 RNTupleLocator fPageListLocator;
0362
0363 std::uint64_t fPageListLength = 0;
0364
0365 std::uint64_t fMinEntry = 0;
0366
0367 std::uint64_t fEntrySpan = 0;
0368
0369 std::uint32_t fNClusters = 0;
0370
0371 public:
0372 RClusterGroupDescriptor() = default;
0373 RClusterGroupDescriptor(const RClusterGroupDescriptor &other) = delete;
0374 RClusterGroupDescriptor &operator=(const RClusterGroupDescriptor &other) = delete;
0375 RClusterGroupDescriptor(RClusterGroupDescriptor &&other) = default;
0376 RClusterGroupDescriptor &operator=(RClusterGroupDescriptor &&other) = default;
0377
0378 RClusterGroupDescriptor Clone() const;
0379
0380 RClusterGroupDescriptor CloneSummary() const;
0381
0382 bool operator==(const RClusterGroupDescriptor &other) const;
0383
0384 DescriptorId_t GetId() const { return fClusterGroupId; }
0385 std::uint32_t GetNClusters() const { return fNClusters; }
0386 RNTupleLocator GetPageListLocator() const { return fPageListLocator; }
0387 std::uint64_t GetPageListLength() const { return fPageListLength; }
0388 const std::vector<DescriptorId_t> &GetClusterIds() const { return fClusterIds; }
0389 std::uint64_t GetMinEntry() const { return fMinEntry; }
0390 std::uint64_t GetEntrySpan() const { return fEntrySpan; }
0391
0392
0393 bool HasClusterDetails() const { return !fClusterIds.empty(); }
0394 };
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416 class RNTupleDescriptor {
0417 friend class Internal::RNTupleDescriptorBuilder;
0418
0419 public:
0420 class RHeaderExtension;
0421
0422 private:
0423
0424 std::string fName;
0425
0426 std::string fDescription;
0427
0428 std::uint64_t fOnDiskHeaderXxHash3 = 0;
0429 std::uint64_t fOnDiskHeaderSize = 0;
0430 std::uint64_t fOnDiskFooterSize = 0;
0431
0432 std::uint64_t fNEntries = 0;
0433 std::uint64_t fNClusters = 0;
0434 std::uint64_t fNPhysicalColumns = 0;
0435
0436
0437
0438
0439
0440
0441
0442
0443 std::uint64_t fGeneration = 0;
0444
0445 std::set<unsigned int> fFeatureFlags;
0446 std::unordered_map<DescriptorId_t, RFieldDescriptor> fFieldDescriptors;
0447 std::unordered_map<DescriptorId_t, RColumnDescriptor> fColumnDescriptors;
0448 std::unordered_map<DescriptorId_t, RClusterGroupDescriptor> fClusterGroupDescriptors;
0449
0450
0451 std::unordered_map<DescriptorId_t, RClusterDescriptor> fClusterDescriptors;
0452 std::unique_ptr<RHeaderExtension> fHeaderExtension;
0453
0454 public:
0455 static constexpr unsigned int kFeatureFlagTest = 137;
0456
0457
0458
0459
0460
0461
0462
0463
0464 class RHeaderExtension {
0465 friend class Internal::RNTupleDescriptorBuilder;
0466
0467 private:
0468
0469
0470 std::vector<DescriptorId_t> fFields;
0471
0472 std::uint64_t fNLogicalColumns = 0;
0473 std::uint64_t fNPhysicalColumns = 0;
0474
0475 void AddFieldId(DescriptorId_t id) { fFields.push_back(id); }
0476 void AddColumn(bool isAliasColumn)
0477 {
0478 fNLogicalColumns++;
0479 if (!isAliasColumn)
0480 fNPhysicalColumns++;
0481 }
0482
0483 public:
0484 std::size_t GetNFields() const { return fFields.size(); }
0485 std::size_t GetNLogicalColumns() const { return fNLogicalColumns; }
0486 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
0487
0488 std::vector<DescriptorId_t> GetTopLevelFields(const RNTupleDescriptor &desc) const;
0489 };
0490
0491
0492
0493
0494
0495
0496
0497
0498 class RColumnDescriptorIterable {
0499 private:
0500
0501 const RNTupleDescriptor &fNTuple;
0502
0503 std::vector<DescriptorId_t> fColumns = {};
0504
0505 void CollectColumnIds(DescriptorId_t fieldId);
0506 public:
0507 class RIterator {
0508 private:
0509
0510 const RNTupleDescriptor &fNTuple;
0511
0512 const std::vector<DescriptorId_t> &fColumns;
0513 std::size_t fIndex = 0;
0514 public:
0515 using iterator_category = std::forward_iterator_tag;
0516 using iterator = RIterator;
0517 using value_type = RFieldDescriptor;
0518 using difference_type = std::ptrdiff_t;
0519 using pointer = RColumnDescriptor *;
0520 using reference = const RColumnDescriptor &;
0521
0522 RIterator(const RNTupleDescriptor &ntuple, const std::vector<DescriptorId_t> &columns, std::size_t index)
0523 : fNTuple(ntuple), fColumns(columns), fIndex(index) {}
0524 iterator operator++() { ++fIndex; return *this; }
0525 reference operator*() { return fNTuple.GetColumnDescriptor(fColumns.at(fIndex)); }
0526 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0527 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0528 };
0529
0530 RColumnDescriptorIterable(const RNTupleDescriptor &ntuple, const RFieldDescriptor &field);
0531 RColumnDescriptorIterable(const RNTupleDescriptor &ntuple);
0532
0533 RIterator begin() { return RIterator(fNTuple, fColumns, 0); }
0534 RIterator end() { return RIterator(fNTuple, fColumns, fColumns.size()); }
0535 };
0536
0537
0538
0539
0540
0541
0542
0543
0544 class RFieldDescriptorIterable {
0545 private:
0546
0547 const RNTupleDescriptor& fNTuple;
0548
0549
0550 std::vector<DescriptorId_t> fFieldChildren = {};
0551
0552 public:
0553 class RIterator {
0554 private:
0555
0556 const RNTupleDescriptor& fNTuple;
0557
0558 const std::vector<DescriptorId_t>& fFieldChildren;
0559 std::size_t fIndex = 0;
0560 public:
0561 using iterator_category = std::forward_iterator_tag;
0562 using iterator = RIterator;
0563 using value_type = RFieldDescriptor;
0564 using difference_type = std::ptrdiff_t;
0565 using pointer = RFieldDescriptor*;
0566 using reference = const RFieldDescriptor&;
0567
0568 RIterator(const RNTupleDescriptor& ntuple, const std::vector<DescriptorId_t>& fieldChildren,
0569 std::size_t index) : fNTuple(ntuple), fFieldChildren(fieldChildren), fIndex(index) {}
0570 iterator operator++() { ++fIndex; return *this; }
0571 reference operator*() {
0572 return fNTuple.GetFieldDescriptor(
0573 fFieldChildren.at(fIndex)
0574 );
0575 }
0576 bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
0577 bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
0578 };
0579 RFieldDescriptorIterable(const RNTupleDescriptor& ntuple, const RFieldDescriptor& field)
0580 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds()) {}
0581
0582 RFieldDescriptorIterable(const RNTupleDescriptor& ntuple, const RFieldDescriptor& field,
0583 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator)
0584 : fNTuple(ntuple), fFieldChildren(field.GetLinkIds())
0585 {
0586 std::sort(fFieldChildren.begin(), fFieldChildren.end(), comparator);
0587 }
0588 RIterator begin() {
0589 return RIterator(fNTuple, fFieldChildren, 0);
0590 }
0591 RIterator end() {
0592 return RIterator(fNTuple, fFieldChildren, fFieldChildren.size());
0593 }
0594 };
0595
0596
0597
0598
0599
0600
0601
0602
0603
0604
0605
0606 class RClusterGroupDescriptorIterable {
0607 private:
0608
0609 const RNTupleDescriptor &fNTuple;
0610
0611 public:
0612 class RIterator {
0613 private:
0614
0615 const RNTupleDescriptor &fNTuple;
0616 std::size_t fIndex = 0;
0617
0618 public:
0619 using iterator_category = std::forward_iterator_tag;
0620 using iterator = RIterator;
0621 using value_type = RClusterGroupDescriptor;
0622 using difference_type = std::ptrdiff_t;
0623 using pointer = RClusterGroupDescriptor *;
0624 using reference = const RClusterGroupDescriptor &;
0625
0626 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
0627 iterator operator++()
0628 {
0629 ++fIndex;
0630 return *this;
0631 }
0632 reference operator*()
0633 {
0634 auto it = fNTuple.fClusterGroupDescriptors.begin();
0635 std::advance(it, fIndex);
0636 return it->second;
0637 }
0638 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0639 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0640 };
0641
0642 RClusterGroupDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) {}
0643 RIterator begin() { return RIterator(fNTuple, 0); }
0644 RIterator end() { return RIterator(fNTuple, fNTuple.GetNClusterGroups()); }
0645 };
0646
0647
0648
0649
0650
0651
0652
0653
0654
0655
0656
0657 class RClusterDescriptorIterable {
0658 private:
0659
0660 const RNTupleDescriptor &fNTuple;
0661 public:
0662 class RIterator {
0663 private:
0664
0665 const RNTupleDescriptor &fNTuple;
0666 std::size_t fIndex = 0;
0667 public:
0668 using iterator_category = std::forward_iterator_tag;
0669 using iterator = RIterator;
0670 using value_type = RClusterDescriptor;
0671 using difference_type = std::ptrdiff_t;
0672 using pointer = RClusterDescriptor *;
0673 using reference = const RClusterDescriptor &;
0674
0675 RIterator(const RNTupleDescriptor &ntuple, std::size_t index) : fNTuple(ntuple), fIndex(index) {}
0676 iterator operator++() { ++fIndex; return *this; }
0677 reference operator*() {
0678 auto it = fNTuple.fClusterDescriptors.begin();
0679 std::advance(it, fIndex);
0680 return it->second;
0681 }
0682 bool operator!=(const iterator &rh) const { return fIndex != rh.fIndex; }
0683 bool operator==(const iterator &rh) const { return fIndex == rh.fIndex; }
0684 };
0685
0686 RClusterDescriptorIterable(const RNTupleDescriptor &ntuple) : fNTuple(ntuple) { }
0687 RIterator begin() { return RIterator(fNTuple, 0); }
0688 RIterator end() { return RIterator(fNTuple, fNTuple.GetNActiveClusters()); }
0689 };
0690
0691 RNTupleDescriptor() = default;
0692 RNTupleDescriptor(const RNTupleDescriptor &other) = delete;
0693 RNTupleDescriptor &operator=(const RNTupleDescriptor &other) = delete;
0694 RNTupleDescriptor(RNTupleDescriptor &&other) = default;
0695 RNTupleDescriptor &operator=(RNTupleDescriptor &&other) = default;
0696
0697 std::unique_ptr<RNTupleDescriptor> Clone() const;
0698
0699 bool operator ==(const RNTupleDescriptor &other) const;
0700
0701 std::uint64_t GetOnDiskHeaderXxHash3() const { return fOnDiskHeaderXxHash3; }
0702 std::uint64_t GetOnDiskHeaderSize() const { return fOnDiskHeaderSize; }
0703 std::uint64_t GetOnDiskFooterSize() const { return fOnDiskFooterSize; }
0704
0705 const RFieldDescriptor& GetFieldDescriptor(DescriptorId_t fieldId) const {
0706 return fFieldDescriptors.at(fieldId);
0707 }
0708 const RColumnDescriptor& GetColumnDescriptor(DescriptorId_t columnId) const {
0709 return fColumnDescriptors.at(columnId);
0710 }
0711 const RClusterGroupDescriptor &GetClusterGroupDescriptor(DescriptorId_t clusterGroupId) const
0712 {
0713 return fClusterGroupDescriptors.at(clusterGroupId);
0714 }
0715 const RClusterDescriptor& GetClusterDescriptor(DescriptorId_t clusterId) const {
0716 return fClusterDescriptors.at(clusterId);
0717 }
0718
0719 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor& fieldDesc) const {
0720 return RFieldDescriptorIterable(*this, fieldDesc);
0721 }
0722 RFieldDescriptorIterable GetFieldIterable(const RFieldDescriptor& fieldDesc,
0723 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
0724 {
0725 return RFieldDescriptorIterable(*this, fieldDesc, comparator);
0726 }
0727 RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId) const {
0728 return GetFieldIterable(GetFieldDescriptor(fieldId));
0729 }
0730 RFieldDescriptorIterable GetFieldIterable(DescriptorId_t fieldId,
0731 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
0732 {
0733 return GetFieldIterable(GetFieldDescriptor(fieldId), comparator);
0734 }
0735 RFieldDescriptorIterable GetTopLevelFields() const {
0736 return GetFieldIterable(GetFieldZeroId());
0737 }
0738 RFieldDescriptorIterable GetTopLevelFields(
0739 const std::function<bool(DescriptorId_t, DescriptorId_t)>& comparator) const
0740 {
0741 return GetFieldIterable(GetFieldZeroId(), comparator);
0742 }
0743
0744 RColumnDescriptorIterable GetColumnIterable() const
0745 {
0746 return RColumnDescriptorIterable(*this);
0747 }
0748 RColumnDescriptorIterable GetColumnIterable(const RFieldDescriptor &fieldDesc) const
0749 {
0750 return RColumnDescriptorIterable(*this, fieldDesc);
0751 }
0752 RColumnDescriptorIterable GetColumnIterable(DescriptorId_t fieldId) const
0753 {
0754 return RColumnDescriptorIterable(*this, GetFieldDescriptor(fieldId));
0755 }
0756
0757 RClusterGroupDescriptorIterable GetClusterGroupIterable() const { return RClusterGroupDescriptorIterable(*this); }
0758
0759 RClusterDescriptorIterable GetClusterIterable() const
0760 {
0761 return RClusterDescriptorIterable(*this);
0762 }
0763
0764 std::string GetName() const { return fName; }
0765 std::string GetDescription() const { return fDescription; }
0766
0767 std::size_t GetNFields() const { return fFieldDescriptors.size(); }
0768 std::size_t GetNLogicalColumns() const { return fColumnDescriptors.size(); }
0769 std::size_t GetNPhysicalColumns() const { return fNPhysicalColumns; }
0770 std::size_t GetNClusterGroups() const { return fClusterGroupDescriptors.size(); }
0771 std::size_t GetNClusters() const { return fNClusters; }
0772 std::size_t GetNActiveClusters() const { return fClusterDescriptors.size(); }
0773
0774
0775 NTupleSize_t GetNEntries() const { return fNEntries; }
0776 NTupleSize_t GetNElements(DescriptorId_t physicalColumnId) const;
0777
0778
0779 DescriptorId_t GetFieldZeroId() const;
0780 const RFieldDescriptor &GetFieldZero() const { return GetFieldDescriptor(GetFieldZeroId()); }
0781 DescriptorId_t FindFieldId(std::string_view fieldName, DescriptorId_t parentId) const;
0782
0783 DescriptorId_t FindFieldId(std::string_view fieldName) const;
0784 DescriptorId_t FindLogicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
0785 DescriptorId_t FindPhysicalColumnId(DescriptorId_t fieldId, std::uint32_t columnIndex) const;
0786 DescriptorId_t FindClusterId(DescriptorId_t physicalColumnId, NTupleSize_t index) const;
0787 DescriptorId_t FindNextClusterId(DescriptorId_t clusterId) const;
0788 DescriptorId_t FindPrevClusterId(DescriptorId_t clusterId) const;
0789
0790
0791
0792 std::string GetQualifiedFieldName(DescriptorId_t fieldId) const;
0793
0794 bool HasFeature(unsigned int flag) const { return fFeatureFlags.count(flag) > 0; }
0795 std::vector<std::uint64_t> GetFeatureFlags() const;
0796
0797
0798 const RHeaderExtension *GetHeaderExtension() const { return fHeaderExtension.get(); }
0799
0800
0801 RResult<void> AddClusterGroupDetails(DescriptorId_t clusterGroupId, std::vector<RClusterDescriptor> &clusterDescs);
0802 RResult<void> DropClusterGroupDetails(DescriptorId_t clusterGroupId);
0803
0804 std::uint64_t GetGeneration() const { return fGeneration; }
0805 void IncGeneration() { fGeneration++; }
0806
0807
0808 std::unique_ptr<RNTupleModel> CreateModel() const;
0809 void PrintInfo(std::ostream &output) const;
0810 };
0811
0812 namespace Internal {
0813
0814
0815
0816
0817
0818
0819
0820
0821
0822
0823
0824 class RColumnDescriptorBuilder {
0825 private:
0826 RColumnDescriptor fColumn = RColumnDescriptor();
0827 public:
0828
0829 RColumnDescriptorBuilder() = default;
0830
0831 RColumnDescriptorBuilder &LogicalColumnId(DescriptorId_t logicalColumnId)
0832 {
0833 fColumn.fLogicalColumnId = logicalColumnId;
0834 return *this;
0835 }
0836 RColumnDescriptorBuilder &PhysicalColumnId(DescriptorId_t physicalColumnId)
0837 {
0838 fColumn.fPhysicalColumnId = physicalColumnId;
0839 return *this;
0840 }
0841 RColumnDescriptorBuilder& Model(const RColumnModel &model) {
0842 fColumn.fModel = model;
0843 return *this;
0844 }
0845 RColumnDescriptorBuilder& FieldId(DescriptorId_t fieldId) {
0846 fColumn.fFieldId = fieldId;
0847 return *this;
0848 }
0849 RColumnDescriptorBuilder& Index(std::uint32_t index) {
0850 fColumn.fIndex = index;
0851 return *this;
0852 }
0853 RColumnDescriptorBuilder &FirstElementIndex(std::uint64_t firstElementIdx)
0854 {
0855 fColumn.fFirstElementIndex = firstElementIdx;
0856 return *this;
0857 }
0858 DescriptorId_t GetFieldId() const { return fColumn.fFieldId; }
0859
0860
0861 RResult<RColumnDescriptor> MakeDescriptor() const;
0862 };
0863
0864
0865
0866
0867
0868
0869
0870
0871
0872
0873
0874
0875
0876
0877
0878
0879 class RFieldDescriptorBuilder {
0880 private:
0881 RFieldDescriptor fField = RFieldDescriptor();
0882 public:
0883
0884 RFieldDescriptorBuilder() = default;
0885
0886
0887
0888
0889
0890
0891
0892 explicit RFieldDescriptorBuilder(const RFieldDescriptor& fieldDesc);
0893
0894
0895 static RFieldDescriptorBuilder FromField(const RFieldBase &field);
0896
0897 RFieldDescriptorBuilder& FieldId(DescriptorId_t fieldId) {
0898 fField.fFieldId = fieldId;
0899 return *this;
0900 }
0901 RFieldDescriptorBuilder &FieldVersion(std::uint32_t fieldVersion)
0902 {
0903 fField.fFieldVersion = fieldVersion;
0904 return *this;
0905 }
0906 RFieldDescriptorBuilder &TypeVersion(std::uint32_t typeVersion)
0907 {
0908 fField.fTypeVersion = typeVersion;
0909 return *this;
0910 }
0911 RFieldDescriptorBuilder& ParentId(DescriptorId_t id) {
0912 fField.fParentId = id;
0913 return *this;
0914 }
0915 RFieldDescriptorBuilder& FieldName(const std::string& fieldName) {
0916 fField.fFieldName = fieldName;
0917 return *this;
0918 }
0919 RFieldDescriptorBuilder& FieldDescription(const std::string& fieldDescription) {
0920 fField.fFieldDescription = fieldDescription;
0921 return *this;
0922 }
0923 RFieldDescriptorBuilder& TypeName(const std::string& typeName) {
0924 fField.fTypeName = typeName;
0925 return *this;
0926 }
0927 RFieldDescriptorBuilder &TypeAlias(const std::string &typeAlias)
0928 {
0929 fField.fTypeAlias = typeAlias;
0930 return *this;
0931 }
0932 RFieldDescriptorBuilder& NRepetitions(std::uint64_t nRepetitions) {
0933 fField.fNRepetitions = nRepetitions;
0934 return *this;
0935 }
0936 RFieldDescriptorBuilder& Structure(const ENTupleStructure& structure) {
0937 fField.fStructure = structure;
0938 return *this;
0939 }
0940 DescriptorId_t GetParentId() const { return fField.fParentId; }
0941
0942
0943 RResult<RFieldDescriptor> MakeDescriptor() const;
0944 };
0945
0946
0947
0948
0949
0950
0951
0952
0953
0954
0955
0956
0957 class RClusterDescriptorBuilder {
0958 private:
0959 RClusterDescriptor fCluster;
0960
0961 public:
0962 RClusterDescriptorBuilder &ClusterId(DescriptorId_t clusterId)
0963 {
0964 fCluster.fClusterId = clusterId;
0965 return *this;
0966 }
0967
0968 RClusterDescriptorBuilder &FirstEntryIndex(std::uint64_t firstEntryIndex)
0969 {
0970 fCluster.fFirstEntryIndex = firstEntryIndex;
0971 return *this;
0972 }
0973
0974 RClusterDescriptorBuilder &NEntries(std::uint64_t nEntries)
0975 {
0976 fCluster.fNEntries = nEntries;
0977 return *this;
0978 }
0979
0980 RResult<void> CommitColumnRange(DescriptorId_t physicalId, std::uint64_t firstElementIndex,
0981 std::uint32_t compressionSettings, const RClusterDescriptor::RPageRange &pageRange);
0982
0983
0984
0985
0986 RClusterDescriptorBuilder &AddDeferredColumnRanges(const RNTupleDescriptor &desc);
0987
0988
0989 RResult<RClusterDescriptor> MoveDescriptor();
0990 };
0991
0992
0993
0994
0995
0996
0997
0998
0999 class RClusterGroupDescriptorBuilder {
1000 private:
1001 RClusterGroupDescriptor fClusterGroup;
1002
1003 public:
1004 RClusterGroupDescriptorBuilder() = default;
1005 static RClusterGroupDescriptorBuilder FromSummary(const RClusterGroupDescriptor &clusterGroupDesc);
1006
1007 RClusterGroupDescriptorBuilder &ClusterGroupId(DescriptorId_t clusterGroupId)
1008 {
1009 fClusterGroup.fClusterGroupId = clusterGroupId;
1010 return *this;
1011 }
1012 RClusterGroupDescriptorBuilder &PageListLocator(const RNTupleLocator &pageListLocator)
1013 {
1014 fClusterGroup.fPageListLocator = pageListLocator;
1015 return *this;
1016 }
1017 RClusterGroupDescriptorBuilder &PageListLength(std::uint64_t pageListLength)
1018 {
1019 fClusterGroup.fPageListLength = pageListLength;
1020 return *this;
1021 }
1022 RClusterGroupDescriptorBuilder &MinEntry(std::uint64_t minEntry)
1023 {
1024 fClusterGroup.fMinEntry = minEntry;
1025 return *this;
1026 }
1027 RClusterGroupDescriptorBuilder &EntrySpan(std::uint64_t entrySpan)
1028 {
1029 fClusterGroup.fEntrySpan = entrySpan;
1030 return *this;
1031 }
1032 RClusterGroupDescriptorBuilder &NClusters(std::uint32_t nClusters)
1033 {
1034 fClusterGroup.fNClusters = nClusters;
1035 return *this;
1036 }
1037 void AddClusters(const std::vector<DescriptorId_t> &clusterIds)
1038 {
1039 if (clusterIds.size() != fClusterGroup.GetNClusters())
1040 throw RException(R__FAIL("mismatch of number of clusters"));
1041 fClusterGroup.fClusterIds = clusterIds;
1042 }
1043
1044 RResult<RClusterGroupDescriptor> MoveDescriptor();
1045 };
1046
1047
1048
1049
1050
1051
1052
1053
1054 class RColumnGroupDescriptorBuilder {
1055 private:
1056 RColumnGroupDescriptor fColumnGroup;
1057
1058 public:
1059 RColumnGroupDescriptorBuilder() = default;
1060
1061 RColumnGroupDescriptorBuilder &ColumnGroupId(DescriptorId_t columnGroupId)
1062 {
1063 fColumnGroup.fColumnGroupId = columnGroupId;
1064 return *this;
1065 }
1066 void AddColumn(DescriptorId_t physicalId) { fColumnGroup.fPhysicalColumnIds.insert(physicalId); }
1067
1068 RResult<RColumnGroupDescriptor> MoveDescriptor();
1069 };
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080 class RNTupleDescriptorBuilder {
1081 private:
1082 RNTupleDescriptor fDescriptor;
1083 RResult<void> EnsureFieldExists(DescriptorId_t fieldId) const;
1084 public:
1085
1086
1087
1088 RResult<void> EnsureValidDescriptor() const;
1089 const RNTupleDescriptor& GetDescriptor() const { return fDescriptor; }
1090 RNTupleDescriptor MoveDescriptor();
1091
1092 void SetNTuple(const std::string_view name, const std::string_view description);
1093 void SetFeature(unsigned int flag);
1094
1095 void SetOnDiskHeaderXxHash3(std::uint64_t xxhash3) { fDescriptor.fOnDiskHeaderXxHash3 = xxhash3; }
1096 void SetOnDiskHeaderSize(std::uint64_t size) { fDescriptor.fOnDiskHeaderSize = size; }
1097
1098 void AddToOnDiskFooterSize(std::uint64_t size) { fDescriptor.fOnDiskFooterSize += size; }
1099
1100 void AddField(const RFieldDescriptor& fieldDesc);
1101 RResult<void> AddFieldLink(DescriptorId_t fieldId, DescriptorId_t linkId);
1102
1103 void AddColumn(DescriptorId_t logicalId, DescriptorId_t physicalId, DescriptorId_t fieldId,
1104 const RColumnModel &model, std::uint32_t index, std::uint64_t firstElementIdx = 0U);
1105 RResult<void> AddColumn(RColumnDescriptor &&columnDesc);
1106
1107 RResult<void> AddClusterGroup(RClusterGroupDescriptor &&clusterGroup);
1108 RResult<void> AddCluster(RClusterDescriptor &&clusterDesc);
1109
1110
1111 void Reset();
1112
1113
1114
1115 void BeginHeaderExtension();
1116 };
1117
1118 }
1119 }
1120 }
1121
1122 #endif