Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-08 10:36:30

0001 /// \file ROOT/RPageStorageFile.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2019-11-21
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RPageStorageFile
0015 #define ROOT_RPageStorageFile
0016 
0017 #include <ROOT/RMiniFile.hxx>
0018 #include <ROOT/RNTuple.hxx>
0019 #include <ROOT/RNTupleSerialize.hxx>
0020 #include <ROOT/RNTupleZip.hxx>
0021 #include <ROOT/RPageStorage.hxx>
0022 #include <ROOT/RRawFile.hxx>
0023 #include <string_view>
0024 
0025 #include <array>
0026 #include <cstdio>
0027 #include <memory>
0028 #include <optional>
0029 #include <string>
0030 #include <utility>
0031 
0032 class TDirectory;
0033 
0034 namespace ROOT {
0035 class RNTuple; // for making RPageSourceFile a friend of RNTuple
0036 class RNTupleLocator;
0037 
0038 namespace Internal {
0039 class RClusterPool;
0040 class RRawFile;
0041 class RPageAllocatorHeap;
0042 
0043 // clang-format off
0044 /**
0045 \class ROOT::Internal::RPageSinkFile
0046 \ingroup NTuple
0047 \brief Storage provider that write ntuple pages into a file
0048 
0049 The written file can be either in ROOT format or in RNTuple bare format.
0050 */
0051 // clang-format on
0052 class RPageSinkFile : public RPagePersistentSink {
0053 private:
0054    // A set of pages to be committed together in a vector write.
0055    // Currently we assume they're all sequential (although they may span multiple ranges).
0056    struct CommitBatch {
0057       /// The list of pages to commit
0058       std::vector<const RSealedPage *> fSealedPages;
0059       /// Total size in bytes of the batch
0060       size_t fSize;
0061       /// Total uncompressed size of the elements in the page batch
0062       size_t fBytesPacked;
0063    };
0064 
0065    std::unique_ptr<ROOT::Internal::RNTupleFileWriter> fWriter;
0066    /// Number of bytes committed to storage in the current cluster
0067    std::uint64_t fNBytesCurrentCluster = 0;
0068    /// On UpdateSchema(), the new class fields register the corresponding streamer info here so that the
0069    /// streamer info records in the file can be properly updated on dataset commit
0070    ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fInfosOfClassFields;
0071 
0072    RPageSinkFile(std::string_view ntupleName, const ROOT::RNTupleWriteOptions &options);
0073 
0074    /// We pass bytesPacked so that TFile::ls() reports a reasonable value for the compression ratio of the corresponding
0075    /// key. It is not strictly necessary to write and read the sealed page.
0076    RNTupleLocator WriteSealedPage(const RPageStorage::RSealedPage &sealedPage, std::size_t bytesPacked);
0077 
0078    /// Subroutine of CommitSealedPageVImpl, used to perform a vector write of the (multi-)range of pages
0079    /// contained in `batch`. The locators for the written pages are appended to `locators`.
0080    /// This procedure also updates some internal metrics of the page sink, hence it's not const.
0081    /// `batch` gets reset to size 0 after the writing is done (but its begin and end are not updated).
0082    void CommitBatchOfPages(CommitBatch &batch, std::vector<RNTupleLocator> &locators);
0083 
0084 protected:
0085    using RPagePersistentSink::InitImpl;
0086    void InitImpl(unsigned char *serializedHeader, std::uint32_t length) final;
0087    RNTupleLocator CommitPageImpl(ColumnHandle_t columnHandle, const RPage &page) override;
0088    RNTupleLocator
0089    CommitSealedPageImpl(ROOT::DescriptorId_t physicalColumnId, const RPageStorage::RSealedPage &sealedPage) final;
0090    std::vector<RNTupleLocator>
0091    CommitSealedPageVImpl(std::span<RPageStorage::RSealedPageGroup> ranges, const std::vector<bool> &mask) final;
0092    std::uint64_t StageClusterImpl() final;
0093    RNTupleLocator CommitClusterGroupImpl(unsigned char *serializedPageList, std::uint32_t length) final;
0094    using RPagePersistentSink::CommitDatasetImpl;
0095    void CommitDatasetImpl(unsigned char *serializedFooter, std::uint32_t length) final;
0096 
0097 public:
0098    RPageSinkFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleWriteOptions &options);
0099    RPageSinkFile(std::string_view ntupleName, TDirectory &fileOrDirectory, const ROOT::RNTupleWriteOptions &options);
0100    RPageSinkFile(const RPageSinkFile &) = delete;
0101    RPageSinkFile &operator=(const RPageSinkFile &) = delete;
0102    RPageSinkFile(RPageSinkFile &&) = default;
0103    RPageSinkFile &operator=(RPageSinkFile &&) = default;
0104    ~RPageSinkFile() override;
0105 
0106    void UpdateSchema(const ROOT::Internal::RNTupleModelChangeset &changeset, ROOT::NTupleSize_t firstEntry) final;
0107 }; // class RPageSinkFile
0108 
0109 // clang-format off
0110 /**
0111 \class ROOT::Internal::RPageSourceFile
0112 \ingroup NTuple
0113 \brief Storage provider that reads ntuple pages from a file
0114 */
0115 // clang-format on
0116 class RPageSourceFile : public RPageSource {
0117    friend class ROOT::RNTuple;
0118 
0119 private:
0120    /// Holds the uncompressed header and footer
0121    struct RStructureBuffer {
0122       std::unique_ptr<unsigned char[]> fBuffer; ///< single buffer for both header and footer
0123       void *fPtrHeader = nullptr;               ///< either nullptr or points into fBuffer
0124       void *fPtrFooter = nullptr;               ///< either nullptr or points into fBuffer
0125 
0126       /// Called at the end of Attach(), i.e. when the header and footer are processed
0127       void Reset()
0128       {
0129          RStructureBuffer empty;
0130          std::swap(empty, *this);
0131       }
0132    };
0133 
0134    /// Either provided by CreateFromAnchor, or read from the ROOT file given the ntuple name
0135    std::optional<RNTuple> fAnchor;
0136    /// The last cluster from which a page got loaded.  Points into fClusterPool->fPool
0137    ROOT::Internal::RCluster *fCurrentCluster = nullptr;
0138    /// An RRawFile is used to request the necessary byte ranges from a local or a remote file
0139    std::unique_ptr<RRawFile> fFile;
0140    /// Takes the fFile to read ntuple blobs from it
0141    ROOT::Internal::RMiniFileReader fReader;
0142    /// The descriptor is created from the header and footer either in AttachImpl or in CreateFromAnchor
0143    RNTupleDescriptorBuilder fDescriptorBuilder;
0144    /// The cluster pool asynchronously preloads the next few clusters
0145    std::unique_ptr<ROOT::Internal::RClusterPool> fClusterPool;
0146    /// Populated by LoadStructureImpl(), reset at the end of Attach()
0147    RStructureBuffer fStructureBuffer;
0148 
0149    RPageSourceFile(std::string_view ntupleName, const ROOT::RNTupleReadOptions &options);
0150 
0151    /// Helper function for LoadClusters: it prepares the memory buffer (page map) and the
0152    /// read requests for a given cluster and columns.  The reead requests are appended to
0153    /// the provided vector.  This way, requests can be collected for multiple clusters before
0154    /// sending them to RRawFile::ReadV().
0155    std::unique_ptr<ROOT::Internal::RCluster>
0156    PrepareSingleCluster(const ROOT::Internal::RCluster::RKey &clusterKey, std::vector<RRawFile::RIOVec> &readRequests);
0157 
0158 protected:
0159    void LoadStructureImpl() final;
0160    ROOT::RNTupleDescriptor AttachImpl(RNTupleSerializer::EDescriptorDeserializeMode mode) final;
0161    /// The cloned page source creates a new raw file and reader and opens its own file descriptor to the data.
0162    std::unique_ptr<RPageSource> CloneImpl() const final;
0163 
0164    RPageRef
0165    LoadPageImpl(ColumnHandle_t columnHandle, const RClusterInfo &clusterInfo, ROOT::NTupleSize_t idxInCluster) final;
0166 
0167 public:
0168    RPageSourceFile(std::string_view ntupleName, std::string_view path, const ROOT::RNTupleReadOptions &options);
0169    RPageSourceFile(std::string_view ntupleName, std::unique_ptr<RRawFile> file,
0170                    const ROOT::RNTupleReadOptions &options);
0171    /// Used from the RNTuple class to build a datasource if the anchor is already available.
0172    /// Requires the RNTuple object to be streamed from a file.
0173    static std::unique_ptr<RPageSourceFile>
0174    CreateFromAnchor(const RNTuple &anchor, const ROOT::RNTupleReadOptions &options = ROOT::RNTupleReadOptions());
0175 
0176    RPageSourceFile(const RPageSourceFile &) = delete;
0177    RPageSourceFile &operator=(const RPageSourceFile &) = delete;
0178    RPageSourceFile(RPageSourceFile &&) = delete;
0179    RPageSourceFile &operator=(RPageSourceFile &&) = delete;
0180    ~RPageSourceFile() override;
0181 
0182    void
0183    LoadSealedPage(ROOT::DescriptorId_t physicalColumnId, RNTupleLocalIndex localIndex, RSealedPage &sealedPage) final;
0184 
0185    std::vector<std::unique_ptr<ROOT::Internal::RCluster>>
0186    LoadClusters(std::span<ROOT::Internal::RCluster::RKey> clusterKeys) final;
0187 }; // class RPageSourceFile
0188 
0189 } // namespace Internal
0190 } // namespace ROOT
0191 
0192 #endif