Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-18 09:32:09

0001 /// \file ROOT/RMiniFile.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2019-12-22
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RMiniFile
0015 #define ROOT_RMiniFile
0016 
0017 #include <ROOT/RError.hxx>
0018 #include <ROOT/RNTuple.hxx>
0019 #include <ROOT/RNTupleSerialize.hxx>
0020 #include <ROOT/RSpan.hxx>
0021 #include <Compression.h>
0022 #include <string_view>
0023 
0024 #include <cstdint>
0025 #include <cstdio>
0026 #include <memory>
0027 #include <string>
0028 
0029 class TDirectory;
0030 class TFileMergeInfo;
0031 class TVirtualStreamerInfo;
0032 
0033 namespace ROOT {
0034 
0035 namespace Internal {
0036 class RRawFile;
0037 }
0038 
0039 class RNTupleWriteOptions;
0040 
0041 namespace Internal {
0042 /// Holds status information of an open ROOT file during writing
0043 struct RTFileControlBlock;
0044 
0045 // clang-format off
0046 /**
0047 \class ROOT::Internal::RMiniFileReader
0048 \ingroup NTuple
0049 \brief Read RNTuple data blocks from a TFile container, provided by a RRawFile
0050 
0051 A RRawFile is used for the byte access.  The class implements a minimal subset of TFile, enough to extract
0052 RNTuple data keys.
0053 */
0054 // clang-format on
0055 class RMiniFileReader {
0056 private:
0057    /// The raw file used to read byte ranges
0058    ROOT::Internal::RRawFile *fRawFile = nullptr;
0059    /// Indicates whether the file is a TFile container or an RNTuple bare file
0060    bool fIsBare = false;
0061    /// If `fMaxKeySize > 0` and ReadBuffer attempts to read `nbytes > maxKeySize`, it will assume the
0062    /// blob being read is chunked and read all the chunks into the buffer. This is symmetrical to
0063    /// what happens in `RNTupleFileWriter::WriteBlob()`.
0064    std::uint64_t fMaxKeySize = 0;
0065 
0066    /// Used when the file container turns out to be a bare file
0067    RResult<RNTuple> GetNTupleBare(std::string_view ntupleName);
0068    /// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name
0069    /// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`)
0070    RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath);
0071 
0072    /// Searches for a key with the given name and type in the key index of the directory starting at offsetDir.
0073    /// The offset points to the start of the TDirectory DATA section, without the key and without the name and title
0074    /// of the TFile record (the root directory).
0075    /// Return 0 if the key was not found. Otherwise returns the offset of found key.
0076    std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName);
0077 
0078 public:
0079    RMiniFileReader() = default;
0080    /// Uses the given raw file to read byte ranges
0081    explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile);
0082    /// Extracts header and footer location for the RNTuple identified by ntupleName
0083    RResult<RNTuple> GetNTuple(std::string_view ntupleName);
0084    /// Reads a given byte range from the file into the provided memory buffer.
0085    /// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs,
0086    /// whose addresses are listed at the end of the first chunk.
0087    void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset);
0088 
0089    std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
0090    /// If the reader is not used to retrieve the anchor, we need to set the max key size manually
0091    void SetMaxKeySize(std::uint64_t maxKeySize) { fMaxKeySize = maxKeySize; }
0092 };
0093 
0094 // clang-format off
0095 /**
0096 \class ROOT::Internal::RNTupleFileWriter
0097 \ingroup NTuple
0098 \brief Write RNTuple data blocks in a TFile or a bare file container
0099 
0100 The writer can create a new TFile container for an RNTuple or add an RNTuple to an existing TFile.
0101 Creating a single RNTuple in a new TFile container can be done with a C file stream without a TFile class.
0102 Updating an existing TFile requires a proper TFile object.  Also, writing a remote file requires a proper TFile object.
0103 A stand-alone version of RNTuple can remove the TFile based writer.
0104 */
0105 // clang-format on
0106 class RNTupleFileWriter {
0107 public:
0108    /// The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
0109    static constexpr std::size_t kBlobKeyLen = 42;
0110 
0111 private:
0112    struct RFileProper {
0113       /// A sub directory in fFile or nullptr if the data is stored in the root directory of the file
0114       TDirectory *fDirectory = nullptr;
0115       /// Low-level writing using a TFile
0116       void Write(const void *buffer, size_t nbytes, std::int64_t offset);
0117       /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified,
0118       /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the
0119       /// argument is actually just a pointer.)
0120       std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
0121       operator bool() const { return fDirectory; }
0122    };
0123 
0124    struct RFileSimple {
0125       /// Direct I/O requires that all buffers and write lengths are aligned. It seems 512 byte alignment is the minimum
0126       /// for Direct I/O to work, but further testing showed that it results in worse performance than 4kB.
0127       static constexpr int kBlockAlign = 4096;
0128       /// During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at
0129       /// fSeekFileRecord. Given that the TFile key starts at offset 100 and the file name, which is written twice,
0130       /// is shorter than 255 characters, we should need at most ~600 bytes. However, the header also needs to be
0131       /// aligned to kBlockAlign...
0132       static constexpr std::size_t kHeaderBlockSize = 4096;
0133 
0134       // fHeaderBlock and fBlock are raw pointers because we have to manually call operator new and delete.
0135       unsigned char *fHeaderBlock = nullptr;
0136       std::size_t fBlockSize = 0;
0137       std::uint64_t fBlockOffset = 0;
0138       unsigned char *fBlock = nullptr;
0139 
0140       /// For the simplest cases, a C file stream can be used for writing
0141       FILE *fFile = nullptr;
0142       /// Whether the C file stream has been opened with Direct I/O, introducing alignment requirements.
0143       bool fDirectIO = false;
0144       /// Keeps track of the seek offset
0145       std::uint64_t fFilePos = 0;
0146       /// Keeps track of the next key offset
0147       std::uint64_t fKeyOffset = 0;
0148       /// Keeps track of TFile control structures, which need to be updated on committing the data set
0149       std::unique_ptr<ROOT::Internal::RTFileControlBlock> fControlBlock;
0150 
0151       RFileSimple();
0152       RFileSimple(const RFileSimple &other) = delete;
0153       RFileSimple(RFileSimple &&other) = delete;
0154       RFileSimple &operator=(const RFileSimple &other) = delete;
0155       RFileSimple &operator=(RFileSimple &&other) = delete;
0156       ~RFileSimple();
0157 
0158       void AllocateBuffers(std::size_t bufferSize);
0159       void Flush();
0160 
0161       /// Writes bytes in the open stream, either at fFilePos or at the given offset
0162       void Write(const void *buffer, size_t nbytes, std::int64_t offset = -1);
0163       /// Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the payload.
0164       /// The payload is already compressed
0165       std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset = -1,
0166                              std::uint64_t directoryOffset = 100, const std::string &className = "",
0167                              const std::string &objectName = "", const std::string &title = "");
0168       /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified,
0169       /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the
0170       /// argument is actually just a pointer.)
0171       std::uint64_t ReserveBlobKey(std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
0172       operator bool() const { return fFile; }
0173    };
0174 
0175    /// RFileSimple: for simple use cases, survives without libRIO dependency
0176    /// RFileProper: for updating existing files and for storing more than just an RNTuple in the file
0177    std::variant<RFileSimple, RFileProper> fFile;
0178    /// A simple file can either be written as TFile container or as NTuple bare file
0179    bool fIsBare = false;
0180    /// The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple
0181    /// writers can operate on the same file if (and only if) they use a proper TFile object for writing.
0182    std::string fNTupleName;
0183    /// The file name without parent directory; only required when writing with a C file stream
0184    std::string fFileName;
0185    /// Header and footer location of the ntuple, written on Commit()
0186    RNTuple fNTupleAnchor;
0187    /// Set of streamer info records that should be written to the file.
0188    /// The RNTuple class description is always present.
0189    ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap;
0190 
0191    explicit RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize);
0192 
0193    /// For a TFile container written by a C file stream, write the header and TFile object
0194    void WriteTFileSkeleton(int defaultCompression);
0195    /// The only key that will be visible in file->ls()
0196    /// Returns the size on disk of the anchor object
0197    std::uint64_t WriteTFileNTupleKey(int compression);
0198    /// Write the TList with the RNTuple key
0199    void WriteTFileKeysList(std::uint64_t anchorSize);
0200    /// Write the compressed streamer info record with the description of the RNTuple class
0201    void WriteTFileStreamerInfo(int compression);
0202    /// Last record in the file
0203    void WriteTFileFreeList();
0204    /// For a bare file, which is necessarily written by a C file stream, write file header
0205    void WriteBareFileSkeleton(int defaultCompression);
0206 
0207 public:
0208    /// For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file
0209    enum class EContainerFormat {
0210       kTFile, // ROOT TFile
0211       kBare,  // A thin envelope supporting a single RNTuple only
0212    };
0213 
0214    /// Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
0215    /// Uses a C stream for writing
0216    static std::unique_ptr<RNTupleFileWriter> Recreate(std::string_view ntupleName, std::string_view path,
0217                                                       EContainerFormat containerFormat,
0218                                                       const ROOT::RNTupleWriteOptions &options);
0219    /// The directory parameter can also be a TFile object (TFile inherits from TDirectory).
0220    static std::unique_ptr<RNTupleFileWriter>
0221    Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize);
0222 
0223    RNTupleFileWriter(const RNTupleFileWriter &other) = delete;
0224    RNTupleFileWriter(RNTupleFileWriter &&other) = delete;
0225    RNTupleFileWriter &operator=(const RNTupleFileWriter &other) = delete;
0226    RNTupleFileWriter &operator=(RNTupleFileWriter &&other) = delete;
0227    ~RNTupleFileWriter();
0228 
0229    /// Seek a simple writer to offset. Note that previous data is not flushed immediately, but only by the next write
0230    /// (if necessary).
0231    void Seek(std::uint64_t offset);
0232 
0233    /// Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed header.
0234    std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader);
0235    /// Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed footer.
0236    std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter);
0237    /// Writes a new record as an RBlob key into the file
0238    std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len);
0239 
0240    /// Prepares buffer for a new record as an RBlob key at offset.
0241    /// (Note that the array type is purely documentation, the argument is actually just a pointer.)
0242    static void PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len, unsigned char buffer[kBlobKeyLen]);
0243 
0244    /// Reserves a new record as an RBlob key in the file. If keyBuffer is specified, it must be written *before* the
0245    /// returned offset. (Note that the array type is purely documentation, the argument is actually just a pointer.)
0246    std::uint64_t ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
0247    /// Write into a reserved record; the caller is responsible for making sure that the written byte range is in the
0248    /// previously reserved key.
0249    void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset);
0250    /// Ensures that the streamer info records passed as argument are written to the file
0251    void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos);
0252    /// Writes the RNTuple key to the file so that the header and footer keys can be found
0253    void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose);
0254 };
0255 
0256 } // namespace Internal
0257 } // namespace ROOT
0258 
0259 #endif