![]() |
|
|||
File indexing completed on 2025-09-13 09:10:20
0001 // @(#)root/io:$Id$ 0002 // Author: Jakob Blomer 0003 0004 /************************************************************************* 0005 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. * 0006 * All rights reserved. * 0007 * * 0008 * For the licensing terms see $ROOTSYS/LICENSE. * 0009 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0010 *************************************************************************/ 0011 0012 #ifndef ROOT_RRawFile 0013 #define ROOT_RRawFile 0014 0015 #include <string_view> 0016 0017 #include <cstddef> 0018 #include <cstdint> 0019 #include <memory> 0020 #include <string> 0021 0022 namespace ROOT { 0023 namespace Internal { 0024 0025 /** 0026 * \class RRawFile RRawFile.hxx 0027 * \ingroup IO 0028 * 0029 * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise. 0030 * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the 0031 * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT 0032 * RDataSource implementations and for RNTuple. 0033 * 0034 * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data 0035 * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are 0036 * opened when required (on reading, getting file size) and closed on object destruction. 0037 * 0038 * RRawFiles manage system resources and are therefore made non-copyable. They can be explicitly cloned though. 0039 * 0040 * RRawFile objects are conditionally thread safe. See the user manual for further details: 0041 * https://root.cern/manual/thread_safety/ 0042 */ 0043 class RRawFile { 0044 public: 0045 /// kAuto detects the line break from the first line, kSystem picks the system's default 0046 enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows }; 0047 0048 /// On construction, an ROptions parameter can customize the RRawFile behavior 0049 struct ROptions { 0050 static constexpr size_t kUseDefaultBlockSize = std::size_t(-1); ///< Use protocol-dependent default block size 0051 0052 ELineBreaks fLineBreak = ELineBreaks::kAuto; 0053 /// Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. 0054 size_t fBlockSize = kUseDefaultBlockSize; 0055 // Define an empty constructor to work around a bug in Clang: https://github.com/llvm/llvm-project/issues/36032 0056 ROptions() {} 0057 }; 0058 0059 /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a 0060 /// single byte range from disk into multiple buffers. 0061 struct RIOVec { 0062 /// The destination for reading 0063 void *fBuffer = nullptr; 0064 /// The file offset 0065 std::uint64_t fOffset = 0; 0066 /// The number of desired bytes 0067 std::size_t fSize = 0; 0068 /// The number of actually read bytes, set by ReadV() 0069 std::size_t fOutBytes = 0; 0070 }; 0071 0072 /// Implementations may enforce limits on the use of vector reads. These limits can depend on the server or 0073 /// the specific file opened and can be queried per RRawFile object through GetReadVLimits(). 0074 /// Note that due to such limits, a vector read with a single request can behave differently from a Read() call. 0075 struct RIOVecLimits { 0076 /// Maximum number of elements in a ReadV request vector 0077 std::size_t fMaxReqs = static_cast<std::size_t>(-1); 0078 /// Maximum size in bytes of any single request in the request vector 0079 std::size_t fMaxSingleSize = static_cast<std::size_t>(-1); 0080 /// Maximum size in bytes of the sum of requests in the vector 0081 std::uint64_t fMaxTotalSize = static_cast<std::uint64_t>(-1); 0082 0083 bool HasReqsLimit() const { return fMaxReqs != static_cast<std::size_t>(-1); } 0084 bool HasSizeLimit() const 0085 { 0086 return fMaxSingleSize != static_cast<std::size_t>(-1) || fMaxTotalSize != static_cast<std::uint64_t>(-1); 0087 } 0088 }; 0089 0090 private: 0091 /// Don't change without adapting ReadAt() 0092 static constexpr unsigned int kNumBlockBuffers = 2; 0093 struct RBlockBuffer { 0094 /// Where in the open file does fBuffer start 0095 std::uint64_t fBufferOffset = 0; 0096 /// The number of currently buffered bytes in fBuffer 0097 size_t fBufferSize = 0; 0098 /// Points into the I/O buffer with data from the file, not owned. 0099 unsigned char *fBuffer = nullptr; 0100 0101 RBlockBuffer() = default; 0102 RBlockBuffer(const RBlockBuffer &) = delete; 0103 RBlockBuffer &operator=(const RBlockBuffer &) = delete; 0104 ~RBlockBuffer() = default; 0105 0106 /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied. 0107 size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset); 0108 }; 0109 /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers 0110 unsigned int fBlockBufferIdx = 0; 0111 /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file 0112 RBlockBuffer fBlockBuffers[kNumBlockBuffers]; 0113 /// Memory block containing the block buffers consecutively 0114 std::unique_ptr<unsigned char[]> fBufferSpace; 0115 /// Used as a marker that the file size was not yet queried 0116 static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1); 0117 /// The cached file size 0118 std::uint64_t fFileSize = kUnknownFileSize; 0119 /// Files are opened lazily and only when required; the open state is kept by this flag 0120 bool fIsOpen = false; 0121 /// Runtime switch to decide if reads are buffered or directly sent to ReadAtImpl() 0122 bool fIsBuffering = true; 0123 0124 protected: 0125 std::string fUrl; 0126 ROptions fOptions; 0127 /// The current position in the file, which can be changed by Seek, Read, and Readln 0128 std::uint64_t fFilePos = 0; 0129 0130 /// OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize 0131 /// is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(), 0132 /// fOptions.fBlocksize must be larger or equal to zero. 0133 virtual void OpenImpl() = 0; 0134 /// Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file, 0135 /// therefore derived classes should return nbytes bytes if available. 0136 virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0; 0137 /// Derived classes should return the file size 0138 virtual std::uint64_t GetSizeImpl() = 0; 0139 0140 /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations 0141 virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq); 0142 0143 /// Open the file if not already open. Otherwise noop. 0144 void EnsureOpen(); 0145 0146 public: 0147 RRawFile(std::string_view url, ROptions options); 0148 RRawFile(const RRawFile &) = delete; 0149 RRawFile &operator=(const RRawFile &) = delete; 0150 virtual ~RRawFile() = default; 0151 0152 /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero. 0153 virtual std::unique_ptr<RRawFile> Clone() const = 0; 0154 0155 /// Factory method that returns a suitable concrete implementation according to the transport in the url 0156 static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions()); 0157 /// Returns only the file location, e.g. "server/file" for http://server/file 0158 static std::string GetLocation(std::string_view url); 0159 /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file 0160 static std::string GetTransport(std::string_view url); 0161 0162 /// Buffered read from a random position. Returns the actual number of bytes read. 0163 /// Short reads indicate the end of the file 0164 size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset); 0165 /// Read from fFilePos offset. Returns the actual number of bytes read. 0166 size_t Read(void *buffer, size_t nbytes); 0167 /// Change the cursor fFilePos 0168 void Seek(std::uint64_t offset); 0169 /// Returns the offset for the next Read/Readln call 0170 std::uint64_t GetFilePos() const { return fFilePos; } 0171 /// Returns the size of the file 0172 std::uint64_t GetSize(); 0173 /// Returns the url of the file 0174 std::string GetUrl() const; 0175 0176 /// Opens the file if necessary and calls ReadVImpl 0177 void ReadV(RIOVec *ioVec, unsigned int nReq); 0178 /// Returns the limits regarding the ioVec input to ReadV for this specific file; may open the file as a side-effect. 0179 virtual RIOVecLimits GetReadVLimits() { return RIOVecLimits(); } 0180 0181 /// Turn off buffered reads; all scalar read requests go directly to the implementation. Buffering can be turned 0182 /// back on. 0183 void SetBuffering(bool value); 0184 bool IsBuffering() const { return fIsBuffering; } 0185 0186 /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached. 0187 bool Readln(std::string &line); 0188 0189 /// Once opened, the file stay open until destruction of the RRawFile object 0190 bool IsOpen() const { return fIsOpen; } 0191 }; // class RRawFile 0192 0193 } // namespace Internal 0194 } // namespace ROOT 0195 0196 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |