Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:29:59

0001 /// \file ROOT/RMiniFile.hxx
0002 /// \ingroup NTuple
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2019-12-22
0005 
0006 /*************************************************************************
0007  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers.               *
0008  * All rights reserved.                                                  *
0009  *                                                                       *
0010  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0011  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0012  *************************************************************************/
0013 
0014 #ifndef ROOT_RMiniFile
0015 #define ROOT_RMiniFile
0016 
0017 #include <ROOT/RError.hxx>
0018 #include <ROOT/RNTuple.hxx>
0019 #include <ROOT/RNTupleSerialize.hxx>
0020 #include <ROOT/RSpan.hxx>
0021 #include <Compression.h>
0022 #include <string_view>
0023 
0024 #include <cstdint>
0025 #include <cstdio>
0026 #include <memory>
0027 #include <string>
0028 
0029 class TDirectory;
0030 class TFileMergeInfo;
0031 class TVirtualStreamerInfo;
0032 
0033 namespace ROOT {
0034 
0035 namespace Internal {
0036 class RRawFile;
0037 }
0038 
0039 class RNTupleWriteOptions;
0040 
0041 namespace Internal {
0042 /// Holds status information of an open ROOT file during writing
0043 struct RTFileControlBlock;
0044 
0045 // clang-format off
0046 /**
0047 \class ROOT::Internal::RMiniFileReader
0048 \ingroup NTuple
0049 \brief Read RNTuple data blocks from a TFile container, provided by a RRawFile
0050 
0051 A RRawFile is used for the byte access.  The class implements a minimal subset of TFile, enough to extract
0052 RNTuple data keys.
0053 */
0054 // clang-format on
0055 class RMiniFileReader {
0056 private:
0057    /// The raw file used to read byte ranges
0058    ROOT::Internal::RRawFile *fRawFile = nullptr;
0059    /// Indicates whether the file is a TFile container or an RNTuple bare file
0060    bool fIsBare = false;
0061    /// If `fMaxKeySize > 0` and ReadBuffer attempts to read `nbytes > maxKeySize`, it will assume the
0062    /// blob being read is chunked and read all the chunks into the buffer. This is symmetrical to
0063    /// what happens in `RNTupleFileWriter::WriteBlob()`.
0064    std::uint64_t fMaxKeySize = 0;
0065 
0066    /// Used when the file container turns out to be a bare file
0067    RResult<RNTuple> GetNTupleBare(std::string_view ntupleName);
0068    /// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name
0069    /// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`)
0070    RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath);
0071    /// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary).
0072    RResult<RNTuple>
0073    GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen);
0074 
0075    /// Searches for a key with the given name and type in the key index of the directory starting at offsetDir.
0076    /// The offset points to the start of the TDirectory DATA section, without the key and without the name and title
0077    /// of the TFile record (the root directory).
0078    /// Return 0 if the key was not found. Otherwise returns the offset of found key.
0079    std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName);
0080 
0081 public:
0082    RMiniFileReader() = default;
0083    /// Uses the given raw file to read byte ranges
0084    explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile);
0085    /// Extracts header and footer location for the RNTuple identified by ntupleName
0086    RResult<RNTuple> GetNTuple(std::string_view ntupleName);
0087    /// Reads a given byte range from the file into the provided memory buffer.
0088    /// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs,
0089    /// whose addresses are listed at the end of the first chunk.
0090    void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset);
0091    /// Attempts to load the streamer info from the file.
0092    void LoadStreamerInfo();
0093 
0094    std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
0095    /// If the reader is not used to retrieve the anchor, we need to set the max key size manually
0096    void SetMaxKeySize(std::uint64_t maxKeySize) { fMaxKeySize = maxKeySize; }
0097 };
0098 
0099 // clang-format off
0100 /**
0101 \class ROOT::Internal::RNTupleFileWriter
0102 \ingroup NTuple
0103 \brief Write RNTuple data blocks in a TFile or a bare file container
0104 
0105 The writer can create a new TFile container for an RNTuple or add an RNTuple to an existing TFile.
0106 Creating a single RNTuple in a new TFile container can be done with a C file stream without a TFile class.
0107 Updating an existing TFile requires a proper TFile object.  Also, writing a remote file requires a proper TFile object.
0108 A stand-alone version of RNTuple can remove the TFile based writer.
0109 */
0110 // clang-format on
0111 class RNTupleFileWriter {
0112 public:
0113    /// The key length of a blob. It is always a big key (version > 1000) with class name RBlob.
0114    static constexpr std::size_t kBlobKeyLen = 42;
0115 
0116 private:
0117    struct RFileProper {
0118       /// A sub directory in fFile or nullptr if the data is stored in the root directory of the file
0119       TDirectory *fDirectory = nullptr;
0120       /// Low-level writing using a TFile
0121       void Write(const void *buffer, size_t nbytes, std::int64_t offset);
0122       /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified,
0123       /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the
0124       /// argument is actually just a pointer.)
0125       std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
0126       operator bool() const { return fDirectory; }
0127    };
0128 
0129    struct RFileSimple {
0130       /// Direct I/O requires that all buffers and write lengths are aligned. It seems 512 byte alignment is the minimum
0131       /// for Direct I/O to work, but further testing showed that it results in worse performance than 4kB.
0132       static constexpr int kBlockAlign = 4096;
0133       /// During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at
0134       /// fSeekFileRecord. Given that the TFile key starts at offset 100 and the file name, which is written twice,
0135       /// is shorter than 255 characters, we should need at most ~600 bytes. However, the header also needs to be
0136       /// aligned to kBlockAlign...
0137       static constexpr std::size_t kHeaderBlockSize = 4096;
0138 
0139       // fHeaderBlock and fBlock are raw pointers because we have to manually call operator new and delete.
0140       unsigned char *fHeaderBlock = nullptr;
0141       std::size_t fBlockSize = 0;
0142       std::uint64_t fBlockOffset = 0;
0143       unsigned char *fBlock = nullptr;
0144 
0145       /// For the simplest cases, a C file stream can be used for writing
0146       FILE *fFile = nullptr;
0147       /// Whether the C file stream has been opened with Direct I/O, introducing alignment requirements.
0148       bool fDirectIO = false;
0149       /// Keeps track of the seek offset
0150       std::uint64_t fFilePos = 0;
0151       /// Keeps track of the next key offset
0152       std::uint64_t fKeyOffset = 0;
0153       /// Keeps track of TFile control structures, which need to be updated on committing the data set
0154       std::unique_ptr<ROOT::Internal::RTFileControlBlock> fControlBlock;
0155 
0156       RFileSimple();
0157       RFileSimple(const RFileSimple &other) = delete;
0158       RFileSimple(RFileSimple &&other) = delete;
0159       RFileSimple &operator=(const RFileSimple &other) = delete;
0160       RFileSimple &operator=(RFileSimple &&other) = delete;
0161       ~RFileSimple();
0162 
0163       void AllocateBuffers(std::size_t bufferSize);
0164       void Flush();
0165 
0166       /// Writes bytes in the open stream, either at fFilePos or at the given offset
0167       void Write(const void *buffer, size_t nbytes, std::int64_t offset = -1);
0168       /// Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the payload.
0169       /// The payload is already compressed
0170       std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset = -1,
0171                              std::uint64_t directoryOffset = 100, const std::string &className = "",
0172                              const std::string &objectName = "", const std::string &title = "");
0173       /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified,
0174       /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the
0175       /// argument is actually just a pointer.)
0176       std::uint64_t ReserveBlobKey(std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
0177       operator bool() const { return fFile; }
0178    };
0179 
0180    /// RFileSimple: for simple use cases, survives without libRIO dependency
0181    /// RFileProper: for updating existing files and for storing more than just an RNTuple in the file
0182    std::variant<RFileSimple, RFileProper> fFile;
0183    /// A simple file can either be written as TFile container or as NTuple bare file
0184    bool fIsBare = false;
0185    /// The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple
0186    /// writers can operate on the same file if (and only if) they use a proper TFile object for writing.
0187    std::string fNTupleName;
0188    /// The file name without parent directory; only required when writing with a C file stream
0189    std::string fFileName;
0190    /// Header and footer location of the ntuple, written on Commit()
0191    RNTuple fNTupleAnchor;
0192    /// Set of streamer info records that should be written to the file.
0193    /// The RNTuple class description is always present.
0194    ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap;
0195 
0196    explicit RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize);
0197 
0198    /// For a TFile container written by a C file stream, write the header and TFile object
0199    void WriteTFileSkeleton(int defaultCompression);
0200    /// The only key that will be visible in file->ls()
0201    /// Returns the size on disk of the anchor object
0202    std::uint64_t WriteTFileNTupleKey(int compression);
0203    /// Write the TList with the RNTuple key
0204    void WriteTFileKeysList(std::uint64_t anchorSize);
0205    /// Write the compressed streamer info record with the description of the RNTuple class
0206    void WriteTFileStreamerInfo(int compression);
0207    /// Last record in the file
0208    void WriteTFileFreeList();
0209    /// For a bare file, which is necessarily written by a C file stream, write file header
0210    void WriteBareFileSkeleton(int defaultCompression);
0211 
0212 public:
0213    /// For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file
0214    enum class EContainerFormat {
0215       kTFile, // ROOT TFile
0216       kBare,  // A thin envelope supporting a single RNTuple only
0217    };
0218 
0219    /// Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName.
0220    /// Uses a C stream for writing
0221    static std::unique_ptr<RNTupleFileWriter> Recreate(std::string_view ntupleName, std::string_view path,
0222                                                       EContainerFormat containerFormat,
0223                                                       const ROOT::RNTupleWriteOptions &options);
0224    /// The directory parameter can also be a TFile object (TFile inherits from TDirectory).
0225    static std::unique_ptr<RNTupleFileWriter>
0226    Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize);
0227 
0228    RNTupleFileWriter(const RNTupleFileWriter &other) = delete;
0229    RNTupleFileWriter(RNTupleFileWriter &&other) = delete;
0230    RNTupleFileWriter &operator=(const RNTupleFileWriter &other) = delete;
0231    RNTupleFileWriter &operator=(RNTupleFileWriter &&other) = delete;
0232    ~RNTupleFileWriter();
0233 
0234    /// Seek a simple writer to offset. Note that previous data is not flushed immediately, but only by the next write
0235    /// (if necessary).
0236    void Seek(std::uint64_t offset);
0237 
0238    /// Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed header.
0239    std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader);
0240    /// Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed footer.
0241    std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter);
0242    /// Writes a new record as an RBlob key into the file
0243    std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len);
0244 
0245    /// Prepares buffer for a new record as an RBlob key at offset.
0246    /// (Note that the array type is purely documentation, the argument is actually just a pointer.)
0247    static void PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len, unsigned char buffer[kBlobKeyLen]);
0248 
0249    /// Reserves a new record as an RBlob key in the file. If keyBuffer is specified, it must be written *before* the
0250    /// returned offset. (Note that the array type is purely documentation, the argument is actually just a pointer.)
0251    std::uint64_t ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr);
0252    /// Write into a reserved record; the caller is responsible for making sure that the written byte range is in the
0253    /// previously reserved key.
0254    void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset);
0255    /// Ensures that the streamer info records passed as argument are written to the file
0256    void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos);
0257    /// Writes the RNTuple key to the file so that the header and footer keys can be found
0258    void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose);
0259 };
0260 
0261 } // namespace Internal
0262 } // namespace ROOT
0263 
0264 #endif