Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:47

0001 /// \file ROOT/RPageStorageFile.hxx
0002 /// \ingroup NTuple ROOT7
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2019-11-21
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT7_RPageStorageFile
0017 #define ROOT7_RPageStorageFile
0018 
0019 #include <ROOT/RMiniFile.hxx>
0020 #include <ROOT/RNTupleSerialize.hxx>
0021 #include <ROOT/RNTupleZip.hxx>
0022 #include <ROOT/RPageStorage.hxx>
0023 #include <ROOT/RRawFile.hxx>
0024 #include <string_view>
0025 
0026 #include <array>
0027 #include <cstdio>
0028 #include <memory>
0029 #include <string>
0030 #include <utility>
0031 
0032 class TFile;
0033 
0034 namespace ROOT {
0035 
0036 namespace Internal {
0037 class RRawFile;
0038 }
0039 
0040 namespace Experimental {
0041 class RNTuple; // for making RPageSourceFile a friend of RNTuple
0042 
0043 namespace Internal {
0044 class RClusterPool;
0045 class RPageAllocatorHeap;
0046 class RPagePool;
0047 
0048 // clang-format off
0049 /**
0050 \class ROOT::Experimental::Internal::RPageSinkFile
0051 \ingroup NTuple
0052 \brief Storage provider that write ntuple pages into a file
0053 
0054 The written file can be either in ROOT format or in RNTuple bare format.
0055 */
0056 // clang-format on
0057 class RPageSinkFile : public RPagePersistentSink {
0058 private:
0059    std::unique_ptr<RPageAllocatorHeap> fPageAllocator;
0060 
0061    std::unique_ptr<RNTupleFileWriter> fWriter;
0062    /// Number of bytes committed to storage in the current cluster
0063    std::uint64_t fNBytesCurrentCluster = 0;
0064    RPageSinkFile(std::string_view ntupleName, const RNTupleWriteOptions &options);
0065 
0066    RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage,
0067                                                 std::size_t bytesPacked);
0068 
0069 protected:
0070    using RPagePersistentSink::InitImpl;
0071    void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
0072    RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) final;
0073    RNTupleLocator
0074    CommitSealedPageImpl(DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
0075    std::vector<RNTupleLocator> CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges) final;
0076    std::uint64_t CommitClusterImpl() final;
0077    RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
0078    void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
0079 
0080 public:
0081    RPageSinkFile(std::string_view ntupleName, std::string_view path, const RNTupleWriteOptions &options);
0082    RPageSinkFile(std::string_view ntupleName, TFile &file, const RNTupleWriteOptions &options);
0083    RPageSinkFile(const RPageSinkFile&) = delete;
0084    RPageSinkFile& operator=(const RPageSinkFile&) = delete;
0085    RPageSinkFile(RPageSinkFile&&) = default;
0086    RPageSinkFile& operator=(RPageSinkFile&&) = default;
0087    ~RPageSinkFile() override;
0088 
0089    RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements) final;
0090    void ReleasePage(RPage &page) final;
0091 }; // class RPageSinkFile
0092 
0093 // clang-format off
0094 /**
0095 \class ROOT::Experimental::Internal::RPageSourceFile
0096 \ingroup NTuple
0097 \brief Storage provider that reads ntuple pages from a file
0098 */
0099 // clang-format on
0100 class RPageSourceFile : public RPageSource {
0101    friend class ROOT::Experimental::RNTuple;
0102 
0103 private:
0104    /// Summarizes cluster-level information that are necessary to populate a certain page.
0105    /// Used by PopulatePageFromCluster().
0106    struct RClusterInfo {
0107       DescriptorId_t fClusterId = 0;
0108       /// Location of the page on disk
0109       RClusterDescriptor::RPageRange::RPageInfoExtended fPageInfo;
0110       /// The first element number of the page's column in the given cluster
0111       std::uint64_t fColumnOffset = 0;
0112    };
0113 
0114    /// Populated pages might be shared; the page pool might, at some point, be used by multiple page sources
0115    std::shared_ptr<RPagePool> fPagePool;
0116    /// The last cluster from which a page got populated.  Points into fClusterPool->fPool
0117    RCluster *fCurrentCluster = nullptr;
0118    /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
0119    std::unique_ptr<ROOT::Internal::RRawFile> fFile;
0120    /// Takes the fFile to read ntuple blobs from it
0121    RMiniFileReader fReader;
0122    /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
0123    RNTupleDescriptorBuilder fDescriptorBuilder;
0124    /// The cluster pool asynchronously preloads the next few clusters
0125    std::unique_ptr<RClusterPool> fClusterPool;
0126 
0127    /// Deserialized header and footer into a minimal descriptor held by fDescriptorBuilder
0128    void InitDescriptor(const RNTuple &anchor);
0129 
0130    RPageSourceFile(std::string_view ntupleName, const RNTupleReadOptions &options);
0131 
0132    RPage PopulatePageFromCluster(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo,
0133                                  ClusterSize_t::ValueType idxInCluster);
0134 
0135    /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
0136    /// read requests for a given cluster and columns.  The reead requests are appended to
0137    /// the provided vector.  This way, requests can be collected for multiple clusters before
0138    /// sending them to RRawFile::ReadV().
0139    std::unique_ptr<RCluster> PrepareSingleCluster(
0140       const RCluster::RKey &clusterKey,
0141       std::vector<ROOT::Internal::RRawFile::RIOVec> &readRequests);
0142 
0143 protected:
0144    RNTupleDescriptor AttachImpl() final;
0145    void UnzipClusterImpl(RCluster *cluster) final;
0146 
0147 public:
0148    RPageSourceFile(std::string_view ntupleName, std::string_view path, const RNTupleReadOptions &options);
0149    RPageSourceFile(std::string_view ntupleName, std::unique_ptr<ROOT::Internal::RRawFile> file,
0150                    const RNTupleReadOptions &options);
0151    /// Used from the RNTuple class to build a datasource if the anchor is already available.
0152    /// Requires the RNTuple object to be streamed from a file.
0153    static std::unique_ptr<RPageSourceFile>
0154    CreateFromAnchor(const RNTuple &anchor, const RNTupleReadOptions &options = RNTupleReadOptions());
0155    /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
0156    /// The meta-data (header and footer) is reread and parsed by the clone.
0157    std::unique_ptr<RPageSource> Clone() const final;
0158 
0159    RPageSourceFile(const RPageSourceFile&) = delete;
0160    RPageSourceFile& operator=(const RPageSourceFile&) = delete;
0161    RPageSourceFile(RPageSourceFile &&) = delete;
0162    RPageSourceFile &operator=(RPageSourceFile &&) = delete;
0163    ~RPageSourceFile() override;
0164 
0165    RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) final;
0166    RPage PopulatePage(ColumnHandle_t columnHandle, RClusterIndex clusterIndex) final;
0167    void ReleasePage(RPage &page) final;
0168 
0169    void LoadSealedPage(DescriptorId_t physicalColumnId, RClusterIndex clusterIndex, RSealedPage &sealedPage) final;
0170 
0171    std::vector<std::unique_ptr<RCluster>> LoadClusters(std::span<RCluster::RKey> clusterKeys) final;
0172 }; // class RPageSourceFile
0173 
0174 } // namespace Internal
0175 
0176 } // namespace Experimental
0177 } // namespace ROOT
0178 
0179 #endif