|
|
|||
File indexing completed on 2025-12-16 10:29:59
0001 /// \file ROOT/RMiniFile.hxx 0002 /// \ingroup NTuple 0003 /// \author Jakob Blomer <jblomer@cern.ch> 0004 /// \date 2019-12-22 0005 0006 /************************************************************************* 0007 * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. * 0008 * All rights reserved. * 0009 * * 0010 * For the licensing terms see $ROOTSYS/LICENSE. * 0011 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0012 *************************************************************************/ 0013 0014 #ifndef ROOT_RMiniFile 0015 #define ROOT_RMiniFile 0016 0017 #include <ROOT/RError.hxx> 0018 #include <ROOT/RNTuple.hxx> 0019 #include <ROOT/RNTupleSerialize.hxx> 0020 #include <ROOT/RSpan.hxx> 0021 #include <Compression.h> 0022 #include <string_view> 0023 0024 #include <cstdint> 0025 #include <cstdio> 0026 #include <memory> 0027 #include <string> 0028 0029 class TDirectory; 0030 class TFileMergeInfo; 0031 class TVirtualStreamerInfo; 0032 0033 namespace ROOT { 0034 0035 namespace Internal { 0036 class RRawFile; 0037 } 0038 0039 class RNTupleWriteOptions; 0040 0041 namespace Internal { 0042 /// Holds status information of an open ROOT file during writing 0043 struct RTFileControlBlock; 0044 0045 // clang-format off 0046 /** 0047 \class ROOT::Internal::RMiniFileReader 0048 \ingroup NTuple 0049 \brief Read RNTuple data blocks from a TFile container, provided by a RRawFile 0050 0051 A RRawFile is used for the byte access. The class implements a minimal subset of TFile, enough to extract 0052 RNTuple data keys. 0053 */ 0054 // clang-format on 0055 class RMiniFileReader { 0056 private: 0057 /// The raw file used to read byte ranges 0058 ROOT::Internal::RRawFile *fRawFile = nullptr; 0059 /// Indicates whether the file is a TFile container or an RNTuple bare file 0060 bool fIsBare = false; 0061 /// If `fMaxKeySize > 0` and ReadBuffer attempts to read `nbytes > maxKeySize`, it will assume the 0062 /// blob being read is chunked and read all the chunks into the buffer. This is symmetrical to 0063 /// what happens in `RNTupleFileWriter::WriteBlob()`. 0064 std::uint64_t fMaxKeySize = 0; 0065 0066 /// Used when the file container turns out to be a bare file 0067 RResult<RNTuple> GetNTupleBare(std::string_view ntupleName); 0068 /// Used when the file turns out to be a TFile container. The ntuplePath variable is either the ntuple name 0069 /// or an ntuple name preceded by a directory (`myNtuple` or `foo/bar/myNtuple` or `/foo/bar/myNtuple`) 0070 RResult<RNTuple> GetNTupleProper(std::string_view ntuplePath); 0071 /// Loads an RNTuple anchor from a TFile at the given file offset (unzipping it if necessary). 0072 RResult<RNTuple> 0073 GetNTupleProperAtOffset(std::uint64_t payloadOffset, std::uint64_t compSize, std::uint64_t uncompLen); 0074 0075 /// Searches for a key with the given name and type in the key index of the directory starting at offsetDir. 0076 /// The offset points to the start of the TDirectory DATA section, without the key and without the name and title 0077 /// of the TFile record (the root directory). 0078 /// Return 0 if the key was not found. Otherwise returns the offset of found key. 0079 std::uint64_t SearchInDirectory(std::uint64_t &offsetDir, std::string_view keyName, std::string_view typeName); 0080 0081 public: 0082 RMiniFileReader() = default; 0083 /// Uses the given raw file to read byte ranges 0084 explicit RMiniFileReader(ROOT::Internal::RRawFile *rawFile); 0085 /// Extracts header and footer location for the RNTuple identified by ntupleName 0086 RResult<RNTuple> GetNTuple(std::string_view ntupleName); 0087 /// Reads a given byte range from the file into the provided memory buffer. 0088 /// If `nbytes > fMaxKeySize` it will perform chunked read from multiple blobs, 0089 /// whose addresses are listed at the end of the first chunk. 0090 void ReadBuffer(void *buffer, size_t nbytes, std::uint64_t offset); 0091 /// Attempts to load the streamer info from the file. 0092 void LoadStreamerInfo(); 0093 0094 std::uint64_t GetMaxKeySize() const { return fMaxKeySize; } 0095 /// If the reader is not used to retrieve the anchor, we need to set the max key size manually 0096 void SetMaxKeySize(std::uint64_t maxKeySize) { fMaxKeySize = maxKeySize; } 0097 }; 0098 0099 // clang-format off 0100 /** 0101 \class ROOT::Internal::RNTupleFileWriter 0102 \ingroup NTuple 0103 \brief Write RNTuple data blocks in a TFile or a bare file container 0104 0105 The writer can create a new TFile container for an RNTuple or add an RNTuple to an existing TFile. 0106 Creating a single RNTuple in a new TFile container can be done with a C file stream without a TFile class. 0107 Updating an existing TFile requires a proper TFile object. Also, writing a remote file requires a proper TFile object. 0108 A stand-alone version of RNTuple can remove the TFile based writer. 0109 */ 0110 // clang-format on 0111 class RNTupleFileWriter { 0112 public: 0113 /// The key length of a blob. It is always a big key (version > 1000) with class name RBlob. 0114 static constexpr std::size_t kBlobKeyLen = 42; 0115 0116 private: 0117 struct RFileProper { 0118 /// A sub directory in fFile or nullptr if the data is stored in the root directory of the file 0119 TDirectory *fDirectory = nullptr; 0120 /// Low-level writing using a TFile 0121 void Write(const void *buffer, size_t nbytes, std::int64_t offset); 0122 /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified, 0123 /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the 0124 /// argument is actually just a pointer.) 0125 std::uint64_t ReserveBlobKey(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr); 0126 operator bool() const { return fDirectory; } 0127 }; 0128 0129 struct RFileSimple { 0130 /// Direct I/O requires that all buffers and write lengths are aligned. It seems 512 byte alignment is the minimum 0131 /// for Direct I/O to work, but further testing showed that it results in worse performance than 4kB. 0132 static constexpr int kBlockAlign = 4096; 0133 /// During commit, WriteTFileKeysList() updates fNBytesKeys and fSeekKeys of the RTFFile located at 0134 /// fSeekFileRecord. Given that the TFile key starts at offset 100 and the file name, which is written twice, 0135 /// is shorter than 255 characters, we should need at most ~600 bytes. However, the header also needs to be 0136 /// aligned to kBlockAlign... 0137 static constexpr std::size_t kHeaderBlockSize = 4096; 0138 0139 // fHeaderBlock and fBlock are raw pointers because we have to manually call operator new and delete. 0140 unsigned char *fHeaderBlock = nullptr; 0141 std::size_t fBlockSize = 0; 0142 std::uint64_t fBlockOffset = 0; 0143 unsigned char *fBlock = nullptr; 0144 0145 /// For the simplest cases, a C file stream can be used for writing 0146 FILE *fFile = nullptr; 0147 /// Whether the C file stream has been opened with Direct I/O, introducing alignment requirements. 0148 bool fDirectIO = false; 0149 /// Keeps track of the seek offset 0150 std::uint64_t fFilePos = 0; 0151 /// Keeps track of the next key offset 0152 std::uint64_t fKeyOffset = 0; 0153 /// Keeps track of TFile control structures, which need to be updated on committing the data set 0154 std::unique_ptr<ROOT::Internal::RTFileControlBlock> fControlBlock; 0155 0156 RFileSimple(); 0157 RFileSimple(const RFileSimple &other) = delete; 0158 RFileSimple(RFileSimple &&other) = delete; 0159 RFileSimple &operator=(const RFileSimple &other) = delete; 0160 RFileSimple &operator=(RFileSimple &&other) = delete; 0161 ~RFileSimple(); 0162 0163 void AllocateBuffers(std::size_t bufferSize); 0164 void Flush(); 0165 0166 /// Writes bytes in the open stream, either at fFilePos or at the given offset 0167 void Write(const void *buffer, size_t nbytes, std::int64_t offset = -1); 0168 /// Writes a TKey including the data record, given by buffer, into fFile; returns the file offset to the payload. 0169 /// The payload is already compressed 0170 std::uint64_t WriteKey(const void *buffer, std::size_t nbytes, std::size_t len, std::int64_t offset = -1, 0171 std::uint64_t directoryOffset = 100, const std::string &className = "", 0172 const std::string &objectName = "", const std::string &title = ""); 0173 /// Reserves an RBlob opaque key as data record and returns the offset of the record. If keyBuffer is specified, 0174 /// it must be written *before* the returned offset. (Note that the array type is purely documentation, the 0175 /// argument is actually just a pointer.) 0176 std::uint64_t ReserveBlobKey(std::size_t nbytes, std::size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr); 0177 operator bool() const { return fFile; } 0178 }; 0179 0180 /// RFileSimple: for simple use cases, survives without libRIO dependency 0181 /// RFileProper: for updating existing files and for storing more than just an RNTuple in the file 0182 std::variant<RFileSimple, RFileProper> fFile; 0183 /// A simple file can either be written as TFile container or as NTuple bare file 0184 bool fIsBare = false; 0185 /// The identifier of the RNTuple; A single writer object can only write a single RNTuple but multiple 0186 /// writers can operate on the same file if (and only if) they use a proper TFile object for writing. 0187 std::string fNTupleName; 0188 /// The file name without parent directory; only required when writing with a C file stream 0189 std::string fFileName; 0190 /// Header and footer location of the ntuple, written on Commit() 0191 RNTuple fNTupleAnchor; 0192 /// Set of streamer info records that should be written to the file. 0193 /// The RNTuple class description is always present. 0194 ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t fStreamerInfoMap; 0195 0196 explicit RNTupleFileWriter(std::string_view name, std::uint64_t maxKeySize); 0197 0198 /// For a TFile container written by a C file stream, write the header and TFile object 0199 void WriteTFileSkeleton(int defaultCompression); 0200 /// The only key that will be visible in file->ls() 0201 /// Returns the size on disk of the anchor object 0202 std::uint64_t WriteTFileNTupleKey(int compression); 0203 /// Write the TList with the RNTuple key 0204 void WriteTFileKeysList(std::uint64_t anchorSize); 0205 /// Write the compressed streamer info record with the description of the RNTuple class 0206 void WriteTFileStreamerInfo(int compression); 0207 /// Last record in the file 0208 void WriteTFileFreeList(); 0209 /// For a bare file, which is necessarily written by a C file stream, write file header 0210 void WriteBareFileSkeleton(int defaultCompression); 0211 0212 public: 0213 /// For testing purposes, RNTuple data can be written into a bare file container instead of a ROOT file 0214 enum class EContainerFormat { 0215 kTFile, // ROOT TFile 0216 kBare, // A thin envelope supporting a single RNTuple only 0217 }; 0218 0219 /// Create or truncate the local file given by path with the new empty RNTuple identified by ntupleName. 0220 /// Uses a C stream for writing 0221 static std::unique_ptr<RNTupleFileWriter> Recreate(std::string_view ntupleName, std::string_view path, 0222 EContainerFormat containerFormat, 0223 const ROOT::RNTupleWriteOptions &options); 0224 /// The directory parameter can also be a TFile object (TFile inherits from TDirectory). 0225 static std::unique_ptr<RNTupleFileWriter> 0226 Append(std::string_view ntupleName, TDirectory &fileOrDirectory, std::uint64_t maxKeySize); 0227 0228 RNTupleFileWriter(const RNTupleFileWriter &other) = delete; 0229 RNTupleFileWriter(RNTupleFileWriter &&other) = delete; 0230 RNTupleFileWriter &operator=(const RNTupleFileWriter &other) = delete; 0231 RNTupleFileWriter &operator=(RNTupleFileWriter &&other) = delete; 0232 ~RNTupleFileWriter(); 0233 0234 /// Seek a simple writer to offset. Note that previous data is not flushed immediately, but only by the next write 0235 /// (if necessary). 0236 void Seek(std::uint64_t offset); 0237 0238 /// Writes the compressed header and registeres its location; lenHeader is the size of the uncompressed header. 0239 std::uint64_t WriteNTupleHeader(const void *data, size_t nbytes, size_t lenHeader); 0240 /// Writes the compressed footer and registeres its location; lenFooter is the size of the uncompressed footer. 0241 std::uint64_t WriteNTupleFooter(const void *data, size_t nbytes, size_t lenFooter); 0242 /// Writes a new record as an RBlob key into the file 0243 std::uint64_t WriteBlob(const void *data, size_t nbytes, size_t len); 0244 0245 /// Prepares buffer for a new record as an RBlob key at offset. 0246 /// (Note that the array type is purely documentation, the argument is actually just a pointer.) 0247 static void PrepareBlobKey(std::int64_t offset, size_t nbytes, size_t len, unsigned char buffer[kBlobKeyLen]); 0248 0249 /// Reserves a new record as an RBlob key in the file. If keyBuffer is specified, it must be written *before* the 0250 /// returned offset. (Note that the array type is purely documentation, the argument is actually just a pointer.) 0251 std::uint64_t ReserveBlob(size_t nbytes, size_t len, unsigned char keyBuffer[kBlobKeyLen] = nullptr); 0252 /// Write into a reserved record; the caller is responsible for making sure that the written byte range is in the 0253 /// previously reserved key. 0254 void WriteIntoReservedBlob(const void *buffer, size_t nbytes, std::int64_t offset); 0255 /// Ensures that the streamer info records passed as argument are written to the file 0256 void UpdateStreamerInfos(const ROOT::Internal::RNTupleSerializer::StreamerInfoMap_t &streamerInfos); 0257 /// Writes the RNTuple key to the file so that the header and footer keys can be found 0258 void Commit(int compression = RCompressionSetting::EDefaults::kUseGeneralPurpose); 0259 }; 0260 0261 } // namespace Internal 0262 } // namespace ROOT 0263 0264 #endif
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|