Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:14:32

0001 /// \file ROOT/RPageStorageFile.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2019-11-21
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RPageStorageFile
0015 #define ROOT_RPageStorageFile
0016 
0017 #include <ROOT/RMiniFile.hxx>
0018 #include <ROOT/RNTuple.hxx>
0019 #include <ROOT/RNTupleSerialize.hxx>
0020 #include <ROOT/RNTupleZip.hxx>
0021 #include <ROOT/RPageStorage.hxx>
0022 #include <ROOT/RRawFile.hxx>
0023 #include <string_view>
0024 
0025 #include <array>
0026 #include <cstdio>
0027 #include <memory>
0028 #include <optional>
0029 #include <string>
0030 #include <utility>
0031 
0032 class TDirectory;
0033 
0034 namespace ROOT {
0035 class RNTuple; // for making RPageSourceFile a friend of RNTuple
0036 class RNTupleLocator;
0037 
0038 namespace Internal {
0039 class RClusterPool;
0040 class RRawFile;
0041 class RPageAllocatorHeap;
0042 
0043 // clang-format off
0044 /**
0045 \class ROOT::Internal::RPageSinkFile
0046 \ingroup NTuple
0047 \brief Storage provider that write ntuple pages into a file
0048 
0049 The written file can be either in ROOT format or in RNTuple bare format.
0050 */
0051 // clang-format on
0052 class RPageSinkFile : public RPagePersistentSink {
0053 private:
0054    // A set of pages to be committed together in a vector write.
0055    // Currently we assume they're all sequential (although they may span multiple ranges).
0056    struct CommitBatch {
0057       /// The list of pages to commit
0058       std::vector<const RSealedPage *> fSealedPages;
0059       /// Total size in bytes of the batch
0060       size_t fSize;
0061       /// Total uncompressed size of the elements in the page batch
0062       size_t fBytesPacked;
0063    };
0064 
0065    std::unique_ptr<ROOT::Internal::RNTupleFileWriter> fWriter;
0066    /// Number of bytes committed to storage in the current cluster
0067    std::uint64_t fNBytesCurrentCluster = 0;
0068    RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options);
0069 
0070    /// We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the corresponding
0071    /// key. It is not strictly necessary to write and read the sealed page.
0072    RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked);
0073 
0074    /// Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages
0075    /// contained in `batch`. The locators for the written pages are appended to `locators`.
0076    /// This procedure also updates some internal metrics of the page sink, hence it's not const.
0077    /// `batch` gets reset to size 0 after the writing is done (but its begin and end are not updated).
0078    void CommitBatchOfPages(CommitBatch &batch, std::vector<RNTupleLocator> &locators);
0079 
0080 protected:
0081    using RPagePersistentSink::InitImpl;
0082    void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
0083    RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override;
0084    RNTupleLocator
0085    CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
0086    std::vector<RNTupleLocator>
0087    CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges, const std::vector<bool> &mask) final;
0088    std::uint64_t StageClusterImpl() final;
0089    RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
0090    using RPagePersistentSink::CommitDatasetImpl;
0091    void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
0092 
0093 public:
0094    RPageSinkFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleWriteOptions &options);
0095    RPageSinkFile(std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options);
0096    RPageSinkFile(const RPageSinkFile &) = delete;
0097    RPageSinkFile &operator=(const RPageSinkFile &) = delete;
0098    RPageSinkFile(RPageSinkFile &&) = default;
0099    RPageSinkFile &operator=(RPageSinkFile &&) = default;
0100    ~RPageSinkFile() override;
0101 }; // class RPageSinkFile
0102 
0103 // clang-format off
0104 /**
0105 \class ROOT::Internal::RPageSourceFile
0106 \ingroup NTuple
0107 \brief Storage provider that reads ntuple pages from a file
0108 */
0109 // clang-format on
0110 class RPageSourceFile : public RPageSource {
0111    friend class ROOT::RNTuple;
0112 
0113 private:
0114    /// Holds the uncompressed header and footer
0115    struct RStructureBuffer {
0116       std::unique_ptr<unsigned char[]> fBuffer; ///< single buffer for both header and footer
0117       void *fPtrHeader = nullptr;               ///< either nullptr or points into fBuffer
0118       void *fPtrFooter = nullptr;               ///< either nullptr or points into fBuffer
0119 
0120       /// Called at the end of Attach(), i.e. when the header and footer are processed
0121       void Reset()
0122       {
0123          RStructureBuffer empty;
0124          std::swap(empty, *this);
0125       }
0126    };
0127 
0128    /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
0129    std::optional<RNTuple> fAnchor;
0130    /// The last cluster from which a page got loaded.  Points into fClusterPool->fPool
0131    ROOT::Internal::RCluster *fCurrentCluster = nullptr;
0132    /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
0133    std::unique_ptr<RRawFile> fFile;
0134    /// Takes the fFile to read ntuple blobs from it
0135    ROOT::Internal::RMiniFileReader fReader;
0136    /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
0137    RNTupleDescriptorBuilder fDescriptorBuilder;
0138    /// The cluster pool asynchronously preloads the next few clusters
0139    std::unique_ptr<ROOT::Internal::RClusterPool> fClusterPool;
0140    /// Populated by LoadStructureImpl(), reset at the end of Attach()
0141    RStructureBuffer fStructureBuffer;
0142 
0143    RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options);
0144 
0145    /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
0146    /// read requests for a given cluster and columns.  The reead requests are appended to
0147    /// the provided vector.  This way, requests can be collected for multiple clusters before
0148    /// sending them to RRawFile::ReadV().
0149    std::unique_ptr<ROOT::Internal::RCluster>
0150    PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
0151 
0152 protected:
0153    void LoadStructureImpl() final;
0154    ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
0155    /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
0156    std::unique_ptr<RPageSource> CloneImpl() const final;
0157 
0158    RPageRef
0159    LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
0160 
0161 public:
0162    RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
0163    RPageSourceFile(std::string_view ntupleName, std::unique_ptr<RRawFile> file,
0164                    const ROOT::RNTupleReadOptions &options);
0165    /// Used from the RNTuple class to build a datasource if the anchor is already available.
0166    /// Requires the RNTuple object to be streamed from a file.
0167    static std::unique_ptr<RPageSourceFile>
0168    CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0169 
0170    RPageSourceFile(const RPageSourceFile &) = delete;
0171    RPageSourceFile &operator=(const RPageSourceFile &) = delete;
0172    RPageSourceFile(RPageSourceFile &&) = delete;
0173    RPageSourceFile &operator=(RPageSourceFile &&) = delete;
0174    ~RPageSourceFile() override;
0175 
0176    void
0177    LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;
0178 
0179    std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
0180    LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;
0181 }; // class RPageSourceFile
0182 
0183 } // namespace Internal
0184 } // namespace ROOT
0185 
0186 #endif