![]() |
|
|||
File indexing completed on 2025-09-18 09:32:09
0001 /// \file ROOT/RMiniFile.hxx 0002 /// \ingroup NTuple 0003 /// \author Jakob Blomer <jblomer@cern.ch> 0004 /// \date 2019-12-22 0005 0006 /************************************************************************* 0007 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. * 0008 * All rights reserved. * 0009 * * 0010 * For the licensing terms see $ROOTSYS/LICENSE. * 0011 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0012 *************************************************************************/ 0013 0014 #ifndef ROOT_RMiniFile 0015 #define ROOT_RMiniFile 0016 0017 #include <ROOT/RError.hxx> 0018 #include <ROOT/RNTuple.hxx> 0019 #include <ROOT/RNTupleSerialize.hxx> 0020 #include <ROOT/RSpan.hxx> 0021 #include <Compression.h> 0022 #include <string_view> 0023 0024 #include <cstdint> 0025 #include <cstdio> 0026 #include <memory> 0027 #include <string> 0028 0029 class TDirectory; 0030 class TFileMergeInfo; 0031 class TVirtualStreamerInfo; 0032 0033 namespace ROOT { 0034 0035 namespace Internal { 0036 class RRawFile; 0037 } 0038 0039 class RNTupleWriteOptions; 0040 0041 namespace Internal { 0042 /// Holds status information of an open ROOT file during writing 0043 struct RTFileControlBlock; 0044 0045 // clang-format off 0046 /** 0047 \class ROOT::Internal::RMiniFileReader 0048 \ingroup NTuple 0049 \brief Read RNTuple data blocks from a TFile container, provided by a RRawFile 0050 0051 A RRawFile is used for the byte access. The class implements a minimal subset of TFile, enough to extract 0052 RNTuple data keys. 0053 */ 0054 // clang-format on 0055 class RMiniFileReader { 0056 private: 0057 /// The raw file used to read byte ranges 0058 ROOT::Internal::RRawFile *fRawFile = nullptr; 0059 /// Indicates whether the file is a TFile container or an RNTuple bare file 0060 bool fIsBare = false; 0061 /// If `fMaxKeySize > 0` and ReadBuffer attempts to read `nbytes > maxKeySize`, it will assume the 0062 /// blob being read is chunked and read all the chunks into the buffer. This is symmetrical to 0063 /// what happens in `RNTupleFileWriter::WriteBlob()`. 0064 std::uint64_t fMaxKeySize = 0; 0065 0066 /// Used when the file container turns out to be a bare file 0067 RResult<RNTuple> GetNTupleBare(std::string_view ntupleName); 0068 /// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name 0069 /// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`) 0070 RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath); 0071 0072 /// Searches for a key with the given name and type in the key index of the directory starting at offsetDir. 0073 /// The offset points to the start of the TDirectory DATA section, without the key and without the name and title 0074 /// of the TFile record (the root directory). 0075 /// Return 0 if the key was not found. Otherwise returns the offset of found key. 0076 std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName); 0077 0078 public: 0079 RMiniFileReader() = default; 0080 /// Uses the given raw file to read byte ranges 0081 explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile); 0082 /// Extracts header and footer location for the RNTuple identified by ntupleName 0083 RResult<RNTuple> GetNTuple(std::string_view ntupleName); 0084 /// Reads a given byte range from the file into the provided memory buffer. 0085 /// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs, 0086 /// whose addresses are listed at the end of the first chunk. 0087 void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset); 0088 0089 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; } 0090 /// If the reader is not used to retrieve the anchor, we need to set the max key size manually 0091 void SetMaxKeySize(std::uint64_t maxKeySize) { fMaxKeySize = maxKeySize; } 0092 }; 0093 0094 // clang-format off 0095 /** 0096 \class ROOT::Internal::RNTupleFileWriter 0097 \ingroup NTuple 0098 \brief Write RNTuple data blocks in a TFile or a bare file container 0099 0100 The writer can create a new TFile container for an RNTuple or add an RNTuple to an existing TFile. 0101 Creating a single RNTuple in a new TFile container can be done with a C file stream without a TFile class. 0102 Updating an existing TFile requires a proper TFile object. Also, writing a remote file requires a proper TFile object. 0103 A stand-alone version of RNTuple can remove the TFile based writer. 0104 */ 0105 // clang-format on 0106 class RNTupleFileWriter { 0107 public: 0108 /// The key length of a blob. It is always a big key (version > 1000) with class name RBlob. 0109 static constexpr std::size_t kBlobKeyLen = 42; 0110 0111 private: 0112 struct RFileProper { 0113 /// A sub directory in fFile or nullptr if the data is stored in the root directory of the file 0114 TDirectory *fDirectory = nullptr; 0115 /// Low-level writing using a TFile 0116 void Write(const void *buffer, size_t nbytes, std::int64_t offset); 0117 /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified, 0118 /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the 0119 /// argument is actually just a pointer.) 0120 std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr); 0121 operator bool() const { return fDirectory; } 0122 }; 0123 0124 struct RFileSimple { 0125 /// Direct I/O requires that all buffers and write lengths are aligned. It seems 512 byte alignment is the minimum 0126 /// for Direct I/O to work, but further testing showed that it results in worse performance than 4kB. 0127 static constexpr int kBlockAlign = 4096; 0128 /// During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at 0129 /// fSeekFileRecord. Given that the TFile key starts at offset 100 and the file name, which is written twice, 0130 /// is shorter than 255 characters, we should need at most ~600 bytes. However, the header also needs to be 0131 /// aligned to kBlockAlign... 0132 static constexpr std::size_t kHeaderBlockSize = 4096; 0133 0134 // fHeaderBlock and fBlock are raw pointers because we have to manually call operator new and delete. 0135 unsigned char *fHeaderBlock = nullptr; 0136 std::size_t fBlockSize = 0; 0137 std::uint64_t fBlockOffset = 0; 0138 unsigned char *fBlock = nullptr; 0139 0140 /// For the simplest cases, a C file stream can be used for writing 0141 FILE *fFile = nullptr; 0142 /// Whether the C file stream has been opened with Direct I/O, introducing alignment requirements. 0143 bool fDirectIO = false; 0144 /// Keeps track of the seek offset 0145 std::uint64_t fFilePos = 0; 0146 /// Keeps track of the next key offset 0147 std::uint64_t fKeyOffset = 0; 0148 /// Keeps track of TFile control structures, which need to be updated on committing the data set 0149 std::unique_ptr<ROOT::Internal::RTFileControlBlock> fControlBlock; 0150 0151 RFileSimple(); 0152 RFileSimple(const RFileSimple &other) = delete; 0153 RFileSimple(RFileSimple &&other) = delete; 0154 RFileSimple &operator=(const RFileSimple &other) = delete; 0155 RFileSimple &operator=(RFileSimple &&other) = delete; 0156 ~RFileSimple(); 0157 0158 void AllocateBuffers(std::size_t bufferSize); 0159 void Flush(); 0160 0161 /// Writes bytes in the open stream, either at fFilePos or at the given offset 0162 void Write(const void *buffer, size_t nbytes, std::int64_t offset = -1); 0163 /// Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the payload. 0164 /// The payload is already compressed 0165 std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset = -1, 0166 std::uint64_t directoryOffset = 100, const std::string &className = "", 0167 const std::string &objectName = "", const std::string &title = ""); 0168 /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified, 0169 /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the 0170 /// argument is actually just a pointer.) 0171 std::uint64_t ReserveBlobKey(std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr); 0172 operator bool() const { return fFile; } 0173 }; 0174 0175 /// RFileSimple: for simple use cases, survives without libRIO dependency 0176 /// RFileProper: for updating existing files and for storing more than just an RNTuple in the file 0177 std::variant<RFileSimple, RFileProper> fFile; 0178 /// A simple file can either be written as TFile container or as NTuple bare file 0179 bool fIsBare = false; 0180 /// The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple 0181 /// writers can operate on the same file if (and only if) they use a proper TFile object for writing. 0182 std::string fNTupleName; 0183 /// The file name without parent directory; only required when writing with a C file stream 0184 std::string fFileName; 0185 /// Header and footer location of the ntuple, written on Commit() 0186 RNTuple fNTupleAnchor; 0187 /// Set of streamer info records that should be written to the file. 0188 /// The RNTuple class description is always present. 0189 ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap; 0190 0191 explicit RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize); 0192 0193 /// For a TFile container written by a C file stream, write the header and TFile object 0194 void WriteTFileSkeleton(int defaultCompression); 0195 /// The only key that will be visible in file->ls() 0196 /// Returns the size on disk of the anchor object 0197 std::uint64_t WriteTFileNTupleKey(int compression); 0198 /// Write the TList with the RNTuple key 0199 void WriteTFileKeysList(std::uint64_t anchorSize); 0200 /// Write the compressed streamer info record with the description of the RNTuple class 0201 void WriteTFileStreamerInfo(int compression); 0202 /// Last record in the file 0203 void WriteTFileFreeList(); 0204 /// For a bare file, which is necessarily written by a C file stream, write file header 0205 void WriteBareFileSkeleton(int defaultCompression); 0206 0207 public: 0208 /// For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file 0209 enum class EContainerFormat { 0210 kTFile, // ROOT TFile 0211 kBare, // A thin envelope supporting a single RNTuple only 0212 }; 0213 0214 /// Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName. 0215 /// Uses a C stream for writing 0216 static std::unique_ptr<RNTupleFileWriter> Recreate(std::string_view ntupleName, std::string_view path, 0217 EContainerFormat containerFormat, 0218 const ROOT::RNTupleWriteOptions &options); 0219 /// The directory parameter can also be a TFile object (TFile inherits from TDirectory). 0220 static std::unique_ptr<RNTupleFileWriter> 0221 Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize); 0222 0223 RNTupleFileWriter(const RNTupleFileWriter &other) = delete; 0224 RNTupleFileWriter(RNTupleFileWriter &&other) = delete; 0225 RNTupleFileWriter &operator=(const RNTupleFileWriter &other) = delete; 0226 RNTupleFileWriter &operator=(RNTupleFileWriter &&other) = delete; 0227 ~RNTupleFileWriter(); 0228 0229 /// Seek a simple writer to offset. Note that previous data is not flushed immediately, but only by the next write 0230 /// (if necessary). 0231 void Seek(std::uint64_t offset); 0232 0233 /// Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed header. 0234 std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader); 0235 /// Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed footer. 0236 std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter); 0237 /// Writes a new record as an RBlob key into the file 0238 std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len); 0239 0240 /// Prepares buffer for a new record as an RBlob key at offset. 0241 /// (Note that the array type is purely documentation, the argument is actually just a pointer.) 0242 static void PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len, unsigned char buffer[kBlobKeyLen]); 0243 0244 /// Reserves a new record as an RBlob key in the file. If keyBuffer is specified, it must be written *before* the 0245 /// returned offset. (Note that the array type is purely documentation, the argument is actually just a pointer.) 0246 std::uint64_t ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr); 0247 /// Write into a reserved record; the caller is responsible for making sure that the written byte range is in the 0248 /// previously reserved key. 0249 void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset); 0250 /// Ensures that the streamer info records passed as argument are written to the file 0251 void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos); 0252 /// Writes the RNTuple key to the file so that the header and footer keys can be found 0253 void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose); 0254 }; 0255 0256 } // namespace Internal 0257 } // namespace ROOT 0258 0259 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |