|
||||
File indexing completed on 2025-01-18 10:10:48
0001 // @(#)root/io:$Id$ 0002 // Author: Jakob Blomer 0003 0004 /************************************************************************* 0005 * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. * 0006 * All rights reserved. * 0007 * * 0008 * For the licensing terms see $ROOTSYS/LICENSE. * 0009 * For the list of contributors see $ROOTSYS/README/CREDITS. * 0010 *************************************************************************/ 0011 0012 #ifndef ROOT_RRawFile 0013 #define ROOT_RRawFile 0014 0015 #include <string_view> 0016 0017 #include <cstddef> 0018 #include <cstdint> 0019 #include <memory> 0020 #include <string> 0021 0022 namespace ROOT { 0023 namespace Internal { 0024 0025 /** 0026 * \class RRawFile RRawFile.hxx 0027 * \ingroup IO 0028 * 0029 * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise. 0030 * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the 0031 * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT 0032 * RDataSource implementations and for RNTuple. 0033 * 0034 * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data 0035 * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are 0036 * opened when required (on reading, getting file size) and closed on object destruction. 0037 * 0038 * RRawFiles manage system resources and are therefore made non-copyable. They can be explicitly cloned though. 0039 * 0040 * RRawFile objects are conditionally thread safe. See the user manual for further details: 0041 * https://root.cern/manual/thread_safety/ 0042 */ 0043 class RRawFile { 0044 public: 0045 /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead 0046 static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1); 0047 /// kAuto detects the line break from the first line, kSystem picks the system's default 0048 enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows }; 0049 0050 // Combination of flags provided by derived classes about the nature of the file 0051 /// GetSize() does not return kUnknownFileSize 0052 static constexpr int kFeatureHasSize = 0x01; 0053 /// Map() and Unmap() are implemented 0054 static constexpr int kFeatureHasMmap = 0x02; 0055 /// File supports async IO 0056 static constexpr int kFeatureHasAsyncIo = 0x04; 0057 0058 /// On construction, an ROptions parameter can customize the RRawFile behavior 0059 struct ROptions { 0060 ELineBreaks fLineBreak; 0061 /** 0062 * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates 0063 * that the protocol-dependent default block size should be used. 0064 */ 0065 int fBlockSize; 0066 ROptions() : fLineBreak(ELineBreaks::kAuto), fBlockSize(-1) {} 0067 }; 0068 0069 /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a 0070 /// single byte range from disk into multiple buffers. 0071 struct RIOVec { 0072 /// The destination for reading 0073 void *fBuffer = nullptr; 0074 /// The file offset 0075 std::uint64_t fOffset = 0; 0076 /// The number of desired bytes 0077 std::size_t fSize = 0; 0078 /// The number of actually read bytes, set by ReadV() 0079 std::size_t fOutBytes = 0; 0080 }; 0081 0082 /// Implementations may enforce limits on the use of vector reads. These limits can depend on the server or 0083 /// the specific file opened and can be queried per RRawFile object through GetReadVLimits(). 0084 /// Note that due to such limits, a vector read with a single request can behave differently from a Read() call. 0085 struct RIOVecLimits { 0086 /// Maximum number of elements in a ReadV request vector 0087 std::size_t fMaxReqs = static_cast<std::size_t>(-1); 0088 /// Maximum size in bytes of any single request in the request vector 0089 std::size_t fMaxSingleSize = static_cast<std::size_t>(-1); 0090 /// Maximum size in bytes of the sum of requests in the vector 0091 std::uint64_t fMaxTotalSize = static_cast<std::uint64_t>(-1); 0092 0093 bool HasReqsLimit() const { return fMaxReqs != static_cast<std::size_t>(-1); } 0094 bool HasSizeLimit() const 0095 { 0096 return fMaxSingleSize != static_cast<std::size_t>(-1) || fMaxTotalSize != static_cast<std::uint64_t>(-1); 0097 } 0098 }; 0099 0100 private: 0101 /// Don't change without adapting ReadAt() 0102 static constexpr unsigned int kNumBlockBuffers = 2; 0103 struct RBlockBuffer { 0104 /// Where in the open file does fBuffer start 0105 std::uint64_t fBufferOffset; 0106 /// The number of currently buffered bytes in fBuffer 0107 size_t fBufferSize; 0108 /// Points into the I/O buffer with data from the file, not owned. 0109 unsigned char *fBuffer; 0110 0111 RBlockBuffer() : fBufferOffset(0), fBufferSize(0), fBuffer(nullptr) {} 0112 RBlockBuffer(const RBlockBuffer &) = delete; 0113 RBlockBuffer &operator=(const RBlockBuffer &) = delete; 0114 ~RBlockBuffer() = default; 0115 0116 /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied. 0117 size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset); 0118 }; 0119 /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers 0120 unsigned int fBlockBufferIdx; 0121 /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file 0122 RBlockBuffer fBlockBuffers[kNumBlockBuffers]; 0123 /// Memory block containing the block buffers consecutively 0124 unsigned char *fBufferSpace; 0125 /// The cached file size 0126 std::uint64_t fFileSize; 0127 /// Files are opened lazily and only when required; the open state is kept by this flag 0128 bool fIsOpen; 0129 0130 protected: 0131 std::string fUrl; 0132 ROptions fOptions; 0133 /// The current position in the file, which can be changed by Seek, Read, and Readln 0134 std::uint64_t fFilePos; 0135 0136 /** 0137 * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize 0138 * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(), 0139 * fOptions.fBlocksize must be larger or equal to zero. 0140 */ 0141 virtual void OpenImpl() = 0; 0142 /** 0143 * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file, 0144 * therefore derived classes should return nbytes bytes if available. 0145 */ 0146 virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0; 0147 /// Derived classes should return the file size or kUnknownFileSize 0148 virtual std::uint64_t GetSizeImpl() = 0; 0149 0150 /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too 0151 /// The default implementation throws an error 0152 virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset); 0153 /// Derived classes with mmap support must be able to unmap the memory area handed out by Map() 0154 virtual void UnmapImpl(void *region, size_t nbytes); 0155 0156 /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations 0157 virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq); 0158 0159 /// Open the file if not already open. Otherwise noop. 0160 void EnsureOpen(); 0161 0162 public: 0163 RRawFile(std::string_view url, ROptions options); 0164 RRawFile(const RRawFile &) = delete; 0165 RRawFile &operator=(const RRawFile &) = delete; 0166 virtual ~RRawFile(); 0167 0168 /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero. 0169 virtual std::unique_ptr<RRawFile> Clone() const = 0; 0170 0171 /// Factory method that returns a suitable concrete implementation according to the transport in the url 0172 static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions()); 0173 /// Returns only the file location, e.g. "server/file" for http://server/file 0174 static std::string GetLocation(std::string_view url); 0175 /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file 0176 static std::string GetTransport(std::string_view url); 0177 0178 /** 0179 * Buffered read from a random position. Returns the actual number of bytes read. 0180 * Short reads indicate the end of the file 0181 */ 0182 size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset); 0183 /// Read from fFilePos offset. Returns the actual number of bytes read. 0184 size_t Read(void *buffer, size_t nbytes); 0185 /// Change the cursor fFilePos 0186 void Seek(std::uint64_t offset); 0187 /// Returns the offset for the next Read/Readln call 0188 std::uint64_t GetFilePos() const { return fFilePos; } 0189 /// Returns the size of the file 0190 std::uint64_t GetSize(); 0191 /// Returns the url of the file 0192 std::string GetUrl() const; 0193 0194 /// Opens the file if necessary and calls ReadVImpl 0195 void ReadV(RIOVec *ioVec, unsigned int nReq); 0196 /// Returns the limits regarding the ioVec input to ReadV for this specific file; may open the file as a side-effect. 0197 virtual RIOVecLimits GetReadVLimits() { return RIOVecLimits(); } 0198 0199 /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones. 0200 /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value. 0201 /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length. 0202 void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset); 0203 /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping 0204 void Unmap(void *region, size_t nbytes); 0205 0206 /// Derived classes shall inform the user about the supported functionality, which can possibly depend 0207 /// on the file at hand 0208 virtual int GetFeatures() const = 0; 0209 0210 /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached. 0211 bool Readln(std::string &line); 0212 0213 /// Once opened, the file stay open until destruction of the RRawFile object 0214 bool IsOpen() const { return fIsOpen; } 0215 }; // class RRawFile 0216 0217 } // namespace Internal 0218 } // namespace ROOT 0219 0220 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |