Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:37

0001 /// \file ROOT/RColumn.hxx
0002 /// \ingroup NTuple ROOT7
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2018-10-09
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT7_RColumn
0017 #define ROOT7_RColumn
0018 
0019 #include <ROOT/RConfig.hxx> // for R__likely
0020 #include <ROOT/RColumnElement.hxx>
0021 #include <ROOT/RColumnModel.hxx>
0022 #include <ROOT/RNTupleUtil.hxx>
0023 #include <ROOT/RPage.hxx>
0024 #include <ROOT/RPageStorage.hxx>
0025 
0026 #include <TError.h>
0027 
0028 #include <cstring> // for memcpy
0029 #include <memory>
0030 #include <utility>
0031 
0032 namespace ROOT {
0033 namespace Experimental {
0034 namespace Internal {
0035 
0036 // clang-format off
0037 /**
0038 \class ROOT::Internal::RColumn
0039 \ingroup NTuple
0040 \brief A column is a storage-backed array of a simple, fixed-size type, from which pages can be mapped into memory.
0041 */
0042 // clang-format on
0043 class RColumn {
0044 private:
0045    RColumnModel fModel;
0046    /**
0047     * Columns belonging to the same field are distinguished by their order.  E.g. for an std::string field, there is
0048     * the offset column with index 0 and the character value column with index 1.
0049     */
0050    std::uint32_t fIndex;
0051    RPageSink *fPageSink = nullptr;
0052    RPageSource *fPageSource = nullptr;
0053    RPageStorage::ColumnHandle_t fHandleSink;
0054    RPageStorage::ColumnHandle_t fHandleSource;
0055    /// A set of open pages into which new elements are being written. The pages are used
0056    /// in rotation. They are 50% bigger than the target size given by the write options.
0057    /// The current page is filled until the target size, but it is only committed once the other
0058    /// write page is filled at least 50%. If a flush occurs earlier, a slightly oversized, single
0059    /// page will be committed.
0060    RPage fWritePage[2];
0061    /// Index of the current write page
0062    int fWritePageIdx = 0;
0063    /// For writing, the targeted number of elements, given by `fApproxNElementsPerPage` (in the write options) and the element size.
0064    /// We ensure this value to be >= 2 in Connect() so that we have meaningful
0065    /// "page full" and "page half full" events when writing the page.
0066    std::uint32_t fApproxNElementsPerPage = 0;
0067    /// The number of elements written resp. available in the column
0068    NTupleSize_t fNElements = 0;
0069    /// The currently mapped page for reading
0070    RPage fReadPage;
0071    /// The column id is used to find matching pages with content when reading
0072    ColumnId_t fColumnIdSource = kInvalidColumnId;
0073    /// Global index of the first element in this column; usually == 0, unless it is a deferred column
0074    NTupleSize_t fFirstElementIndex = 0;
0075    /// Used to pack and unpack pages on writing/reading
0076    std::unique_ptr<RColumnElementBase> fElement;
0077 
0078    RColumn(const RColumnModel &model, std::uint32_t index);
0079 
0080    /// Used in Append() and AppendV() to switch pages when the main page reached the target size
0081    /// The other page has been flushed when the main page reached 50%.
0082    void SwapWritePagesIfFull() {
0083       if (R__likely(fWritePage[fWritePageIdx].GetNElements() < fApproxNElementsPerPage))
0084          return;
0085 
0086       fWritePageIdx = 1 - fWritePageIdx; // == (fWritePageIdx + 1) % 2
0087       R__ASSERT(fWritePage[fWritePageIdx].IsEmpty());
0088       fWritePage[fWritePageIdx].Reset(fNElements);
0089    }
0090 
0091    /// When the main write page surpasses the 50% fill level, the (full) shadow write page gets flushed
0092    void FlushShadowWritePage() {
0093       auto otherIdx = 1 - fWritePageIdx;
0094       if (fWritePage[otherIdx].IsEmpty())
0095          return;
0096       fPageSink->CommitPage(fHandleSink, fWritePage[otherIdx]);
0097       // Mark the page as flushed; the rangeFirst is zero for now but will be reset to
0098       // fNElements in SwapWritePagesIfFull() when the pages swap
0099       fWritePage[otherIdx].Reset(0);
0100    }
0101 
0102 public:
0103    template <typename CppT>
0104    static std::unique_ptr<RColumn> Create(const RColumnModel &model, std::uint32_t index)
0105    {
0106       auto column = std::unique_ptr<RColumn>(new RColumn(model, index));
0107       column->fElement = RColumnElementBase::Generate<CppT>(model.GetType());
0108       return column;
0109    }
0110 
0111    RColumn(const RColumn&) = delete;
0112    RColumn &operator =(const RColumn&) = delete;
0113    ~RColumn();
0114 
0115    /// Connect the column to a page sink.  `firstElementIndex` can be used to specify the first column element index
0116    /// with backing storage for this column.  On read back, elements before `firstElementIndex` will cause the zero page
0117    /// to be mapped.
0118    void ConnectPageSink(DescriptorId_t fieldId, RPageSink &pageSink, NTupleSize_t firstElementIndex = 0U);
0119    /// Connect the column to a page source.
0120    void ConnectPageSource(DescriptorId_t fieldId, RPageSource &pageSource);
0121 
0122    void Append(const void *from)
0123    {
0124       void *dst = fWritePage[fWritePageIdx].GrowUnchecked(1);
0125 
0126       if (fWritePage[fWritePageIdx].GetNElements() == fApproxNElementsPerPage / 2) {
0127          FlushShadowWritePage();
0128       }
0129 
0130       std::memcpy(dst, from, fElement->GetSize());
0131       fNElements++;
0132 
0133       SwapWritePagesIfFull();
0134    }
0135 
0136    void AppendV(const void *from, std::size_t count)
0137    {
0138       // We might not have enough space in the current page. In this case, fall back to one by one filling.
0139       if (fWritePage[fWritePageIdx].GetNElements() + count > fApproxNElementsPerPage) {
0140          // TODO(jblomer): use (fewer) calls to AppendV to write the data page-by-page
0141          for (unsigned i = 0; i < count; ++i) {
0142             Append(static_cast<const unsigned char *>(from) + fElement->GetSize() * i);
0143          }
0144          return;
0145       }
0146 
0147       // The check for flushing the shadow page is more complicated than for the Append() case
0148       // because we don't necessarily fill up to exactly fApproxNElementsPerPage / 2 elements;
0149       // we might instead jump over the 50% fill level.
0150       // This check should be done before calling `RPage::GrowUnchecked()` as the latter affects the return value of
0151       // `RPage::GetNElements()`.
0152       if ((fWritePage[fWritePageIdx].GetNElements() < fApproxNElementsPerPage / 2) &&
0153           (fWritePage[fWritePageIdx].GetNElements() + count >= fApproxNElementsPerPage / 2))
0154       {
0155          FlushShadowWritePage();
0156       }
0157 
0158       void *dst = fWritePage[fWritePageIdx].GrowUnchecked(count);
0159 
0160       std::memcpy(dst, from, fElement->GetSize() * count);
0161       fNElements += count;
0162 
0163       // Note that by the very first check in AppendV, we cannot have filled more than fApproxNElementsPerPage elements
0164       SwapWritePagesIfFull();
0165    }
0166 
0167    void Read(const NTupleSize_t globalIndex, void *to)
0168    {
0169       if (!fReadPage.Contains(globalIndex)) {
0170          MapPage(globalIndex);
0171       }
0172       const auto elemSize = fElement->GetSize();
0173       void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
0174                    (globalIndex - fReadPage.GetGlobalRangeFirst()) * elemSize;
0175       std::memcpy(to, from, elemSize);
0176    }
0177 
0178    void Read(RClusterIndex clusterIndex, void *to)
0179    {
0180       if (!fReadPage.Contains(clusterIndex)) {
0181          MapPage(clusterIndex);
0182       }
0183       const auto elemSize = fElement->GetSize();
0184       void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) +
0185                    (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * elemSize;
0186       std::memcpy(to, from, elemSize);
0187    }
0188 
0189    void ReadV(const NTupleSize_t globalIndex, const ClusterSize_t::ValueType count, void *to)
0190    {
0191       if (!fReadPage.Contains(globalIndex)) {
0192          MapPage(globalIndex);
0193       }
0194       NTupleSize_t idxInPage = globalIndex - fReadPage.GetGlobalRangeFirst();
0195 
0196       const auto elemSize = fElement->GetSize();
0197       const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
0198       if (globalIndex + count <= fReadPage.GetGlobalRangeLast() + 1) {
0199          std::memcpy(to, from, elemSize * count);
0200       } else {
0201          ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
0202          std::memcpy(to, from, elemSize * nBatch);
0203          auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
0204          ReadV(globalIndex + nBatch, count - nBatch, tail);
0205       }
0206    }
0207 
0208    void ReadV(RClusterIndex clusterIndex, const ClusterSize_t::ValueType count, void *to)
0209    {
0210       if (!fReadPage.Contains(clusterIndex)) {
0211          MapPage(clusterIndex);
0212       }
0213       NTupleSize_t idxInPage = clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst();
0214 
0215       const auto elemSize = fElement->GetSize();
0216       const void *from = static_cast<unsigned char *>(fReadPage.GetBuffer()) + idxInPage * elemSize;
0217       if (clusterIndex.GetIndex() + count <= fReadPage.GetClusterRangeLast() + 1) {
0218          std::memcpy(to, from, elemSize * count);
0219       } else {
0220          ClusterSize_t::ValueType nBatch = fReadPage.GetNElements() - idxInPage;
0221          std::memcpy(to, from, elemSize * nBatch);
0222          auto tail = static_cast<unsigned char *>(to) + nBatch * elemSize;
0223          ReadV(RClusterIndex(clusterIndex.GetClusterId(), clusterIndex.GetIndex() + nBatch), count - nBatch, tail);
0224       }
0225    }
0226 
0227    template <typename CppT>
0228    CppT *Map(const NTupleSize_t globalIndex) {
0229       NTupleSize_t nItems;
0230       return MapV<CppT>(globalIndex, nItems);
0231    }
0232 
0233    template <typename CppT>
0234    CppT *Map(RClusterIndex clusterIndex)
0235    {
0236       NTupleSize_t nItems;
0237       return MapV<CppT>(clusterIndex, nItems);
0238    }
0239 
0240    template <typename CppT>
0241    CppT *MapV(const NTupleSize_t globalIndex, NTupleSize_t &nItems) {
0242       if (R__unlikely(!fReadPage.Contains(globalIndex))) {
0243          MapPage(globalIndex);
0244       }
0245       // +1 to go from 0-based indexing to 1-based number of items
0246       nItems = fReadPage.GetGlobalRangeLast() - globalIndex + 1;
0247       return reinterpret_cast<CppT*>(
0248          static_cast<unsigned char *>(fReadPage.GetBuffer()) +
0249          (globalIndex - fReadPage.GetGlobalRangeFirst()) * RColumnElement<CppT>::kSize);
0250    }
0251 
0252    template <typename CppT>
0253    CppT *MapV(RClusterIndex clusterIndex, NTupleSize_t &nItems)
0254    {
0255       if (!fReadPage.Contains(clusterIndex)) {
0256          MapPage(clusterIndex);
0257       }
0258       // +1 to go from 0-based indexing to 1-based number of items
0259       nItems = fReadPage.GetClusterRangeLast() - clusterIndex.GetIndex() + 1;
0260       return reinterpret_cast<CppT*>(
0261          static_cast<unsigned char *>(fReadPage.GetBuffer()) +
0262          (clusterIndex.GetIndex() - fReadPage.GetClusterRangeFirst()) * RColumnElement<CppT>::kSize);
0263    }
0264 
0265    NTupleSize_t GetGlobalIndex(RClusterIndex clusterIndex)
0266    {
0267       if (!fReadPage.Contains(clusterIndex)) {
0268          MapPage(clusterIndex);
0269       }
0270       return fReadPage.GetClusterInfo().GetIndexOffset() + clusterIndex.GetIndex();
0271    }
0272 
0273    RClusterIndex GetClusterIndex(NTupleSize_t globalIndex) {
0274       if (!fReadPage.Contains(globalIndex)) {
0275          MapPage(globalIndex);
0276       }
0277       return RClusterIndex(fReadPage.GetClusterInfo().GetId(),
0278                            globalIndex - fReadPage.GetClusterInfo().GetIndexOffset());
0279    }
0280 
0281    /// For offset columns only, look at the two adjacent values that define a collection's coordinates
0282    void GetCollectionInfo(const NTupleSize_t globalIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
0283    {
0284       NTupleSize_t idxStart = 0;
0285       NTupleSize_t idxEnd;
0286       // Try to avoid jumping back to the previous page and jumping back to the previous cluster
0287       if (R__likely(globalIndex > 0)) {
0288          if (R__likely(fReadPage.Contains(globalIndex - 1))) {
0289             idxStart = *Map<ClusterSize_t>(globalIndex - 1);
0290             idxEnd = *Map<ClusterSize_t>(globalIndex);
0291             if (R__unlikely(fReadPage.GetClusterInfo().GetIndexOffset() == globalIndex))
0292                idxStart = 0;
0293          } else {
0294             idxEnd = *Map<ClusterSize_t>(globalIndex);
0295             auto selfOffset = fReadPage.GetClusterInfo().GetIndexOffset();
0296             idxStart = (globalIndex == selfOffset) ? 0 : *Map<ClusterSize_t>(globalIndex - 1);
0297          }
0298       } else {
0299          idxEnd = *Map<ClusterSize_t>(globalIndex);
0300       }
0301       *collectionSize = idxEnd - idxStart;
0302       *collectionStart = RClusterIndex(fReadPage.GetClusterInfo().GetId(), idxStart);
0303    }
0304 
0305    void GetCollectionInfo(RClusterIndex clusterIndex, RClusterIndex *collectionStart, ClusterSize_t *collectionSize)
0306    {
0307       auto index = clusterIndex.GetIndex();
0308       auto idxStart = (index == 0) ? 0 : *Map<ClusterSize_t>(clusterIndex - 1);
0309       auto idxEnd = *Map<ClusterSize_t>(clusterIndex);
0310       *collectionSize = idxEnd - idxStart;
0311       *collectionStart = RClusterIndex(clusterIndex.GetClusterId(), idxStart);
0312    }
0313 
0314    /// Get the currently active cluster id
0315    void GetSwitchInfo(NTupleSize_t globalIndex, RClusterIndex *varIndex, std::uint32_t *tag) {
0316       auto varSwitch = Map<RColumnSwitch>(globalIndex);
0317       *varIndex = RClusterIndex(fReadPage.GetClusterInfo().GetId(), varSwitch->GetIndex());
0318       *tag = varSwitch->GetTag();
0319    }
0320 
0321    void Flush();
0322    void MapPage(const NTupleSize_t index);
0323    void MapPage(RClusterIndex clusterIndex);
0324    NTupleSize_t GetNElements() const { return fNElements; }
0325    RColumnElementBase *GetElement() const { return fElement.get(); }
0326    const RColumnModel &GetModel() const { return fModel; }
0327    std::uint32_t GetIndex() const { return fIndex; }
0328    ColumnId_t GetColumnIdSource() const { return fColumnIdSource; }
0329    NTupleSize_t GetFirstElementIndex() const { return fFirstElementIndex; }
0330    RPageSource *GetPageSource() const { return fPageSource; }
0331    RPageSink *GetPageSink() const { return fPageSink; }
0332    RPageStorage::ColumnHandle_t GetHandleSource() const { return fHandleSource; }
0333    RPageStorage::ColumnHandle_t GetHandleSink() const { return fHandleSink; }
0334 }; // class RColumn
0335 
0336 } // namespace Internal
0337 } // namespace Experimental
0338 } // namespace ROOT
0339 
0340 #endif