Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-16 09:08:27

0001 /// \file ROOT/RColumn.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2018-10-09
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RColumn
0015 #define ROOT_RColumn
0016 
0017 #include <ROOT/RConfig.hxx> // for R__likely
0018 #include <ROOT/RColumnElementBase.hxx>
0019 #include <ROOT/RNTupleUtil.hxx>
0020 #include <ROOT/RPage.hxx>
0021 #include <ROOT/RPageStorage.hxx>
0022 
0023 #include <TError.h>
0024 
0025 #include <cstring> // for memcpy
0026 #include <memory>
0027 #include <utility>
0028 
0029 namespace ROOT::Internal {
0030 
0031 // clang-format off
0032 /**
0033 \class ROOT::Internal::RColumn
0034 \ingroup NTuple
0035 \brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
0036 */
0037 // clang-format on
0038 class RColumn {
0039 private:
0040    ROOT::ENTupleColumnType fType;
0041    /// Columns belonging to the same field are distinguished by their order.  E.g. for an std::string field, there is
0042    /// the offset column with index 0 and the character value column with index 1.
0043    std::uint32_t fIndex;
0044    /// Fields can have multiple column representations, distinguished by representation index
0045    std::uint16_t fRepresentationIndex;
0046    ROOT::Internal::RPageSink *fPageSink = nullptr;
0047    ROOT::Internal::RPageSource *fPageSource = nullptr;
0048    ROOT::Internal::RPageStorage::ColumnHandle_t fHandleSink;
0049    ROOT::Internal::RPageStorage::ColumnHandle_t fHandleSource;
0050    /// The page into which new elements are being written. The page will initially be small
0051    /// (RNTupleWriteOptions::fInitialUnzippedPageSize, which corresponds to fInitialElements) and expand as needed and
0052    /// as memory for page buffers is still available (RNTupleWriteOptions::fPageBufferBudget) or the maximum page
0053    /// size is reached (RNTupleWriteOptions::fMaxUnzippedPageSize).
0054    ROOT::Internal::RPage fWritePage;
0055    /// The initial number of elements in a page
0056    ROOT::NTupleSize_t fInitialNElements = 1;
0057    /// The number of elements written resp. available in the column
0058    ROOT::NTupleSize_t fNElements = 0;
0059    /// The currently mapped page for reading
0060    ROOT::Internal::RPageRef fReadPageRef;
0061    /// The column id in the column descriptor, once connected to a sink or source
0062    ROOT::DescriptorId_t fOnDiskId = ROOT::kInvalidDescriptorId;
0063    /// Global index of the first element in this column; usually == 0, unless it is a deferred column
0064    ROOT::NTupleSize_t fFirstElementIndex = 0;
0065    /// Used to pack and unpack pages on writing/reading
0066    std::unique_ptr<ROOT::Internal::RColumnElementBase> fElement;
0067    /// The column team is a set of columns that serve the same column index for different representation IDs.
0068    /// Initially, the team has only one member, the very column it belongs to. Through MergeTeams(), two columns
0069    /// can join forces. The team is used to react on suppressed columns: if the current team member has a suppressed
0070    /// column for a MapPage() call, it get the page from the active column in the corresponding cluster.
0071    std::vector<RColumn *> fTeam;
0072    /// Points into fTeam to the column that successfully returned the last page.
0073    std::size_t fLastGoodTeamIdx = 0;
0074 
0075    RColumn(ROOT::ENTupleColumnType type, std::uint32_t columnIndex, std::uint16_t representationIndex);
0076 
0077    /// Used when trying to append to a full write page. If possible, expand the page. Otherwise, flush and reset
0078    /// to the minimal size.
0079    void HandleWritePageIfFull()
0080    {
0081       auto newMaxElements = fWritePage.GetMaxElements() * 2;
0082       if (newMaxElements * fElement->GetSize() > fPageSink->GetWriteOptions().GetMaxUnzippedPageSize()) {
0083          newMaxElements = fPageSink->GetWriteOptions().GetMaxUnzippedPageSize() / fElement->GetSize();
0084       }
0085 
0086       if (newMaxElements == fWritePage.GetMaxElements()) {
0087          // Maximum page size reached, flush and reset
0088          Flush();
0089       } else {
0090          auto expandedPage = fPageSink->ReservePage(fHandleSink, newMaxElements);
0091          if (expandedPage.IsNull()) {
0092             Flush();
0093          } else {
0094             memcpy(expandedPage.GetBuffer(), fWritePage.GetBuffer(), fWritePage.GetNBytes());
0095             expandedPage.Reset(fNElements);
0096             expandedPage.GrowUnchecked(fWritePage.GetNElements());
0097             fWritePage = std::move(expandedPage);
0098          }
0099       }
0100 
0101       assert(fWritePage.GetNElements() < fWritePage.GetMaxElements());
0102    }
0103 
0104 public:
0105    template <typename CppT>
0106    static std::unique_ptr<RColumn>
0107    Create(ROOT::ENTupleColumnType type, std::uint32_t columnIdx, std::uint16_t representationIdx)
0108    {
0109       auto column = std::unique_ptr<RColumn>(new RColumn(type, columnIdx, representationIdx));
0110       column->fElement = ROOT::Internal::RColumnElementBase::Generate<CppT>(type);
0111       return column;
0112    }
0113 
0114    RColumn(const RColumn &) = delete;
0115    RColumn &operator=(const RColumn &) = delete;
0116    ~RColumn();
0117 
0118    /// Connect the column to a page sink.  `firstElementIndex` can be used to specify the first column element index
0119    /// with backing storage for this column.  On read back, elements before `firstElementIndex` will cause the zero page
0120    /// to be mapped.
0121    void ConnectPageSink(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSink &pageSink,
0122                         ROOT::NTupleSize_t firstElementIndex = 0U);
0123    /// Connect the column to a page source.
0124    void ConnectPageSource(ROOT::DescriptorId_t fieldId, ROOT::Internal::RPageSource &pageSource);
0125 
0126    void Append(const void *from)
0127    {
0128       if (fWritePage.GetNElements() == fWritePage.GetMaxElements()) {
0129          HandleWritePageIfFull();
0130       }
0131 
0132       void *dst = fWritePage.GrowUnchecked(1);
0133 
0134       std::memcpy(dst, from, fElement->GetSize());
0135       fNElements++;
0136    }
0137 
0138    void AppendV(const void *from, std::size_t count)
0139    {
0140       auto src = reinterpret_cast<const unsigned char *>(from);
0141       // TODO(jblomer): A future optimization should grow the page in one go, up to the maximum unzipped page size
0142       while (count > 0) {
0143          std::size_t nElementsRemaining = fWritePage.GetMaxElements() - fWritePage.GetNElements();
0144          if (nElementsRemaining == 0) {
0145             HandleWritePageIfFull();
0146             nElementsRemaining = fWritePage.GetMaxElements() - fWritePage.GetNElements();
0147          }
0148 
0149          assert(nElementsRemaining > 0);
0150          auto nBatch = std::min(count, nElementsRemaining);
0151 
0152          void *dst = fWritePage.GrowUnchecked(nBatch);
0153          std::memcpy(dst, src, nBatch * fElement->GetSize());
0154          src += nBatch * fElement->GetSize();
0155          count -= nBatch;
0156          fNElements += nBatch;
0157       }
0158    }
0159 
0160    void Read(const ROOT::NTupleSize_t globalIndex, void *to)
0161    {
0162       if (!fReadPageRef.Get().Contains(globalIndex)) {
0163          MapPage(globalIndex);
0164       }
0165       const auto elemSize = fElement->GetSize();
0166       void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
0167                    (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * elemSize;
0168       std::memcpy(to, from, elemSize);
0169    }
0170 
0171    void Read(RNTupleLocalIndex localIndex, void *to)
0172    {
0173       if (!fReadPageRef.Get().Contains(localIndex)) {
0174          MapPage(localIndex);
0175       }
0176       const auto elemSize = fElement->GetSize();
0177       void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
0178                    (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) * elemSize;
0179       std::memcpy(to, from, elemSize);
0180    }
0181 
0182    void ReadV(ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t count, void *to)
0183    {
0184       const auto elemSize = fElement->GetSize();
0185       auto tail = static_cast<unsigned char *>(to);
0186 
0187       while (count > 0) {
0188          if (!fReadPageRef.Get().Contains(globalIndex)) {
0189             MapPage(globalIndex);
0190          }
0191          const ROOT::NTupleSize_t idxInPage = globalIndex - fReadPageRef.Get().GetGlobalRangeFirst();
0192 
0193          const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
0194          const ROOT::NTupleSize_t nBatch = std::min(fReadPageRef.Get().GetNElements() - idxInPage, count);
0195 
0196          std::memcpy(tail, from, elemSize * nBatch);
0197 
0198          tail += nBatch * elemSize;
0199          count -= nBatch;
0200          globalIndex += nBatch;
0201       }
0202    }
0203 
0204    void ReadV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t count, void *to)
0205    {
0206       const auto elemSize = fElement->GetSize();
0207       auto tail = static_cast<unsigned char *>(to);
0208 
0209       while (count > 0) {
0210          if (!fReadPageRef.Get().Contains(localIndex)) {
0211             MapPage(localIndex);
0212          }
0213          ROOT::NTupleSize_t idxInPage = localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst();
0214 
0215          const void *from = static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) + idxInPage * elemSize;
0216          const ROOT::NTupleSize_t nBatch = std::min(count, fReadPageRef.Get().GetNElements() - idxInPage);
0217 
0218          std::memcpy(tail, from, elemSize * nBatch);
0219 
0220          tail += nBatch * elemSize;
0221          count -= nBatch;
0222          localIndex = RNTupleLocalIndex(localIndex.GetClusterId(), localIndex.GetIndexInCluster() + nBatch);
0223       }
0224    }
0225 
0226    template <typename CppT>
0227    CppT *Map(const ROOT::NTupleSize_t globalIndex)
0228    {
0229       ROOT::NTupleSize_t nItems;
0230       return MapV<CppT>(globalIndex, nItems);
0231    }
0232 
0233    template <typename CppT>
0234    CppT *Map(RNTupleLocalIndex localIndex)
0235    {
0236       ROOT::NTupleSize_t nItems;
0237       return MapV<CppT>(localIndex, nItems);
0238    }
0239 
0240    template <typename CppT>
0241    CppT *MapV(const ROOT::NTupleSize_t globalIndex, ROOT::NTupleSize_t &nItems)
0242    {
0243       if (R__unlikely(!fReadPageRef.Get().Contains(globalIndex))) {
0244          MapPage(globalIndex);
0245       }
0246       // +1 to go from 0-based indexing to 1-based number of items
0247       nItems = fReadPageRef.Get().GetGlobalRangeLast() - globalIndex + 1;
0248       return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
0249                                       (globalIndex - fReadPageRef.Get().GetGlobalRangeFirst()) * sizeof(CppT));
0250    }
0251 
0252    template <typename CppT>
0253    CppT *MapV(RNTupleLocalIndex localIndex, ROOT::NTupleSize_t &nItems)
0254    {
0255       if (!fReadPageRef.Get().Contains(localIndex)) {
0256          MapPage(localIndex);
0257       }
0258       // +1 to go from 0-based indexing to 1-based number of items
0259       nItems = fReadPageRef.Get().GetLocalRangeLast() - localIndex.GetIndexInCluster() + 1;
0260       return reinterpret_cast<CppT *>(static_cast<unsigned char *>(fReadPageRef.Get().GetBuffer()) +
0261                                       (localIndex.GetIndexInCluster() - fReadPageRef.Get().GetLocalRangeFirst()) *
0262                                          sizeof(CppT));
0263    }
0264 
0265    ROOT::NTupleSize_t GetGlobalIndex(RNTupleLocalIndex clusterIndex)
0266    {
0267       if (!fReadPageRef.Get().Contains(clusterIndex)) {
0268          MapPage(clusterIndex);
0269       }
0270       return fReadPageRef.Get().GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndexInCluster();
0271    }
0272 
0273    RNTupleLocalIndex GetClusterIndex(ROOT::NTupleSize_t globalIndex)
0274    {
0275       if (!fReadPageRef.Get().Contains(globalIndex)) {
0276          MapPage(globalIndex);
0277       }
0278       return RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(),
0279                                globalIndex - fReadPageRef.Get().GetClusterInfo().GetIndexOffset());
0280    }
0281 
0282    /// For offset columns only, look at the two adjacent values that define a collection's coordinates
0283    void GetCollectionInfo(const ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *collectionStart,
0284                           ROOT::NTupleSize_t *collectionSize)
0285    {
0286       ROOT::NTupleSize_t idxStart = 0;
0287       ROOT::NTupleSize_t idxEnd;
0288       // Try to avoid jumping back to the previous page and jumping back to the previous cluster
0289       if (R__likely(globalIndex > 0)) {
0290          if (R__likely(fReadPageRef.Get().Contains(globalIndex - 1))) {
0291             idxStart = *Map<ROOT::Internal::RColumnIndex>(globalIndex - 1);
0292             idxEnd = *Map<ROOT::Internal::RColumnIndex>(globalIndex);
0293             if (R__unlikely(fReadPageRef.Get().GetClusterInfo().GetIndexOffset() == globalIndex))
0294                idxStart = 0;
0295          } else {
0296             idxEnd = *Map<ROOT::Internal::RColumnIndex>(globalIndex);
0297             auto selfOffset = fReadPageRef.Get().GetClusterInfo().GetIndexOffset();
0298             idxStart = (globalIndex == selfOffset) ? 0 : *Map<ROOT::Internal::RColumnIndex>(globalIndex - 1);
0299          }
0300       } else {
0301          idxEnd = *Map<ROOT::Internal::RColumnIndex>(globalIndex);
0302       }
0303       *collectionSize = idxEnd - idxStart;
0304       *collectionStart = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), idxStart);
0305    }
0306 
0307    void GetCollectionInfo(RNTupleLocalIndex localIndex, RNTupleLocalIndex *collectionStart,
0308                           ROOT::NTupleSize_t *collectionSize)
0309    {
0310       auto index = localIndex.GetIndexInCluster();
0311       auto idxStart = (index == 0) ? 0 : *Map<ROOT::Internal::RColumnIndex>(localIndex - 1);
0312       auto idxEnd = *Map<ROOT::Internal::RColumnIndex>(localIndex);
0313       *collectionSize = idxEnd - idxStart;
0314       *collectionStart = RNTupleLocalIndex(localIndex.GetClusterId(), idxStart);
0315    }
0316 
0317    /// Get the currently active cluster id
0318    void GetSwitchInfo(ROOT::NTupleSize_t globalIndex, RNTupleLocalIndex *varIndex, std::uint32_t *tag)
0319    {
0320       auto varSwitch = Map<ROOT::Internal::RColumnSwitch>(globalIndex);
0321       *varIndex = RNTupleLocalIndex(fReadPageRef.Get().GetClusterInfo().GetId(), varSwitch->GetIndex());
0322       *tag = varSwitch->GetTag();
0323    }
0324 
0325    void Flush();
0326    void CommitSuppressed();
0327 
0328    void MapPage(ROOT::NTupleSize_t globalIndex) { R__ASSERT(TryMapPage(globalIndex)); }
0329    void MapPage(RNTupleLocalIndex localIndex) { R__ASSERT(TryMapPage(localIndex)); }
0330    bool TryMapPage(ROOT::NTupleSize_t globalIndex);
0331    bool TryMapPage(RNTupleLocalIndex localIndex);
0332 
0333    bool ReadPageContains(ROOT::NTupleSize_t globalIndex) const { return fReadPageRef.Get().Contains(globalIndex); }
0334    bool ReadPageContains(RNTupleLocalIndex localIndex) const { return fReadPageRef.Get().Contains(localIndex); }
0335 
0336    void MergeTeams(RColumn &other);
0337 
0338    ROOT::NTupleSize_t GetNElements() const { return fNElements; }
0339    ROOT::Internal::RColumnElementBase *GetElement() const { return fElement.get(); }
0340    ROOT::ENTupleColumnType GetType() const { return fType; }
0341    std::uint16_t GetBitsOnStorage() const
0342    {
0343       assert(fElement);
0344       return static_cast<std::uint16_t>(fElement->GetBitsOnStorage());
0345    }
0346    std::optional<std::pair<double, double>> GetValueRange() const
0347    {
0348       assert(fElement);
0349       return fElement->GetValueRange();
0350    }
0351    std::uint32_t GetIndex() const { return fIndex; }
0352    std::uint16_t GetRepresentationIndex() const { return fRepresentationIndex; }
0353    ROOT::DescriptorId_t GetOnDiskId() const { return fOnDiskId; }
0354    ROOT::NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0355    ROOT::Internal::RPageSource *GetPageSource() const { return fPageSource; }
0356    ROOT::Internal::RPageSink *GetPageSink() const { return fPageSink; }
0357    ROOT::Internal::RPageStorage::ColumnHandle_t GetHandleSource() const { return fHandleSource; }
0358    ROOT::Internal::RPageStorage::ColumnHandle_t GetHandleSink() const { return fHandleSink; }
0359 
0360    void SetBitsOnStorage(std::size_t bits) { fElement->SetBitsOnStorage(bits); }
0361    std::size_t GetWritePageCapacity() const { return fWritePage.GetCapacity(); }
0362    void SetValueRange(double min, double max) { fElement->SetValueRange(min, max); }
0363 }; // class RColumn
0364 
0365 } // namespace ROOT::Internal
0366 
0367 #endif