Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-16 09:08:35

0001 /// \file ROOT/RPagePool.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2018-10-09
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RPagePool
0015 #define ROOT_RPagePool
0016 
0017 #include <ROOT/RPage.hxx>
0018 #include <ROOT/RPageAllocator.hxx>
0019 #include <ROOT/RNTupleUtil.hxx>
0020 
0021 #include <cstddef>
0022 #include <map>
0023 #include <mutex>
0024 #include <typeindex>
0025 #include <typeinfo>
0026 #include <unordered_map>
0027 #include <unordered_set>
0028 #include <vector>
0029 
0030 namespace ROOT {
0031 namespace Internal {
0032 
0033 // clang-format off
0034 /**
0035 \class ROOT::Internal::RPagePool
0036 \ingroup NTuple
0037 \brief A thread-safe cache of pages loaded from the page source.
0038 
0039 The page pool is used as a cache for pages loaded from a page source.
0040 In this way, identical page needed at the same time, only need to be loaded once.
0041 Page sources also use the page pool to stage (preload) pages unsealed by IMT tasks.
0042 */
0043 // clang-format on
0044 class RPagePool {
0045    friend class RPageRef;
0046 
0047 public:
0048    // Search key for a set of pages covering the same column and in-memory target type.
0049    // Within the set of pages, one needs to find the page of a given index.
0050    struct RKey {
0051       ROOT::DescriptorId_t fColumnId = ROOT::kInvalidDescriptorId;
0052       std::type_index fInMemoryType = std::type_index(typeid(void));
0053 
0054       bool operator==(const RKey &other) const
0055       {
0056          return this->fColumnId == other.fColumnId && this->fInMemoryType == other.fInMemoryType;
0057       }
0058 
0059       bool operator!=(const RKey &other) const { return !(*this == other); }
0060    };
0061 
0062 private:
0063    /// Hash function to be used in the unordered map fLookupByKey
0064    struct RKeyHasher {
0065       /// Like boost::hash_combine
0066       std::size_t operator()(const RKey &k) const
0067       {
0068          auto seed = std::hash<ROOT::DescriptorId_t>()(k.fColumnId);
0069          return seed ^ (std::hash<std::type_index>()(k.fInMemoryType) + 0x9e3779b9 + (seed << 6) + (seed >> 2));
0070       }
0071    };
0072 
0073    /// Every page in the page pool is annotated with a search key and a reference counter.
0074    struct REntry {
0075       RPage fPage;
0076       RKey fKey;
0077       std::int64_t fRefCounter = 0;
0078    };
0079 
0080    /// Used in fLookupByKey to store both the absolute and the cluster-local page index of the referenced page.
0081    /// This allows to do binary search for one or the other. Note that elements in fLookupByKey must have
0082    /// _both_ values to be valid. If RPagePosition is used as a search key, only one of the two needs to be set.
0083    struct RPagePosition {
0084       ROOT::NTupleSize_t fGlobalFirstElement = ROOT::kInvalidNTupleIndex;
0085       RNTupleLocalIndex fClusterFirstElement;
0086 
0087       bool operator<(const RPagePosition &other) const
0088       {
0089          if ((fGlobalFirstElement != ROOT::kInvalidNTupleIndex) &&
0090              (other.fGlobalFirstElement != ROOT::kInvalidNTupleIndex)) {
0091             return fGlobalFirstElement < other.fGlobalFirstElement;
0092          }
0093 
0094          assert(fClusterFirstElement.GetClusterId() != ROOT::kInvalidDescriptorId &&
0095                 fClusterFirstElement.GetIndexInCluster() != ROOT::kInvalidNTupleIndex);
0096          assert(other.fClusterFirstElement.GetClusterId() != ROOT::kInvalidDescriptorId &&
0097                 other.fClusterFirstElement.GetIndexInCluster() != ROOT::kInvalidNTupleIndex);
0098          if (fClusterFirstElement.GetClusterId() == other.fClusterFirstElement.GetClusterId())
0099             return fClusterFirstElement.GetIndexInCluster() < other.fClusterFirstElement.GetIndexInCluster();
0100          return fClusterFirstElement.GetClusterId() < other.fClusterFirstElement.GetClusterId();
0101       }
0102 
0103       // Constructor used to store a page in fLookupByKey
0104       explicit RPagePosition(const RPage &page)
0105          : fGlobalFirstElement(page.GetGlobalRangeFirst()),
0106            fClusterFirstElement({page.GetClusterInfo().GetId(), page.GetLocalRangeFirst()})
0107       {
0108       }
0109 
0110       // Search key constructors
0111       explicit RPagePosition(ROOT::NTupleSize_t globalIndex) : fGlobalFirstElement(globalIndex) {}
0112       explicit RPagePosition(RNTupleLocalIndex localIndex) : fClusterFirstElement(localIndex) {}
0113    };
0114 
0115    std::vector<REntry> fEntries; ///< All cached pages in the page pool
0116    /// Used in ReleasePage() to find the page index in fPages
0117    std::unordered_map<void *, std::size_t> fLookupByBuffer;
0118    /// Used in GetPage() to find the right page in fEntries. Lookup for the key (pair of on-disk and in-memory type)
0119    /// takes place in O(1). The selected pages are identified by index into the fEntries vector (map's value)
0120    /// and sorted by the position of the page in the column (map's key). Thus, access to pages of the page set
0121    /// has logarithmic complexity.
0122    std::unordered_map<RKey, std::map<RPagePosition, std::size_t>, RKeyHasher> fLookupByKey;
0123    /// Remembers pages with reference counter 0, organized by the page's cluster id. The pages are identified
0124    /// by their page buffer address. The fLookupByBuffer map can be used to resolve the address to a page.
0125    /// Once a page gets used, it is removed from the unused pages list. Evict will remove all unused pages
0126    /// from a given cluster id.
0127    std::unordered_map<ROOT::DescriptorId_t, std::unordered_set<void *>> fUnusedPages;
0128    std::mutex fLock; ///< The page pool is accessed concurrently due to parallel decompression
0129 
0130    /// Add a new page to the fLookupByBuffer and fLookupByKey data structures.
0131    REntry &AddPage(RPage page, const RKey &key, std::int64_t initialRefCounter);
0132 
0133    /// Give back a page to the pool and decrease the reference counter. There must not be any pointers anymore into
0134    /// this page. If the reference counter drops to zero, the page pool might decide to call the deleter given in
0135    /// during registration. Called by the RPageRef destructor.
0136    void ReleasePage(const RPage &page);
0137 
0138    /// Called by GetPage(), when the reference counter increases from zero to one
0139    void RemoveFromUnusedPages(const RPage &page);
0140 
0141    /// Called both by ReleasePage() and by Evict() to remove an unused page from the pool
0142    void ErasePage(std::size_t entryIdx, decltype(fLookupByBuffer)::iterator lookupByBufferItr);
0143 
0144 public:
0145    RPagePool() = default;
0146    RPagePool(const RPagePool&) = delete;
0147    RPagePool& operator =(const RPagePool&) = delete;
0148    ~RPagePool() = default;
0149 
0150    /// Adds a new page to the pool. Upon registration, the page pool takes ownership of the page's memory.
0151    /// The new page has its reference counter set to 1.
0152    RPageRef RegisterPage(RPage page, RKey key);
0153    /// Like RegisterPage() but the reference counter is initialized to 0. In addition, the page is added
0154    /// to the set of unused pages of the page's cluster (see Evict()).
0155    void PreloadPage(RPage page, RKey key);
0156    /// Removes unused pages (pages with reference counter 0) from the page pool. Users of PreloadPage() should
0157    /// use Evict() appropriately to avoid accumulation of unused pages.
0158    void Evict(ROOT::DescriptorId_t clusterId);
0159    /// Tries to find the page corresponding to column and index in the cache. If the page is found, its reference
0160    /// counter is increased
0161    RPageRef GetPage(RKey key, ROOT::NTupleSize_t globalIndex);
0162    RPageRef GetPage(RKey key, RNTupleLocalIndex localIndex);
0163 };
0164 
0165 // clang-format off
0166 /**
0167 \class ROOT::Internal::RPageRef
0168 \ingroup NTuple
0169 \brief Reference to a page stored in the page pool
0170 
0171 The referenced page knows about its page pool and decreases the reference counter on destruction.
0172 */
0173 // clang-format on
0174 class RPageRef {
0175    friend class RPagePool;
0176 
0177    RPage fPage;
0178    RPagePool *fPagePool = nullptr;
0179 
0180    // Called as delegated constructor and directly by the page pool
0181    RPageRef(const RPage &page, RPagePool *pagePool) : fPagePool(pagePool)
0182    {
0183       // We leave the fPage::fPageAllocator member unset (nullptr), since fPage is a non-owning view on the page
0184       fPage.fBuffer = page.fBuffer;
0185       fPage.fElementSize = page.fElementSize;
0186       fPage.fNElements = page.fNElements;
0187       fPage.fMaxElements = page.fMaxElements;
0188       fPage.fRangeFirst = page.fRangeFirst;
0189       fPage.fClusterInfo = page.fClusterInfo;
0190    }
0191 
0192 public:
0193    RPageRef() = default;
0194    RPageRef(const RPageRef &other) = delete;
0195    RPageRef &operator=(const RPageRef &other) = delete;
0196 
0197    RPageRef(RPageRef &&other) : RPageRef(other.fPage, other.fPagePool) { other.fPagePool = nullptr; }
0198 
0199    RPageRef &operator=(RPageRef &&other)
0200    {
0201       if (this != &other) {
0202          std::swap(fPage, other.fPage);
0203          std::swap(fPagePool, other.fPagePool);
0204       }
0205       return *this;
0206    }
0207 
0208    ~RPageRef()
0209    {
0210       if (fPagePool)
0211          fPagePool->ReleasePage(fPage);
0212    }
0213 
0214    const RPage &Get() const { return fPage; }
0215 };
0216 
0217 } // namespace Internal
0218 } // namespace ROOT
0219 
0220 #endif