Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:10:48

0001 // @(#)root/io:$Id$
0002 // Author: Jakob Blomer
0003 
0004 /*************************************************************************
0005  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers.               *
0006  * All rights reserved.                                                  *
0007  *                                                                       *
0008  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0009  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0010  *************************************************************************/
0011 
0012 #ifndef ROOT_RRawFile
0013 #define ROOT_RRawFile
0014 
0015 #include <string_view>
0016 
0017 #include <cstddef>
0018 #include <cstdint>
0019 #include <memory>
0020 #include <string>
0021 
0022 namespace ROOT {
0023 namespace Internal {
0024 
0025 /**
0026  * \class RRawFile RRawFile.hxx
0027  * \ingroup IO
0028  *
0029  * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
0030  * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
0031  * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
0032  * RDataSource implementations and for RNTuple.
0033  *
0034  * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
0035  * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
0036  * opened when required (on reading, getting file size) and closed on object destruction.
0037  *
0038  * RRawFiles manage system resources and are therefore made non-copyable. They can be explicitly cloned though.
0039  *
0040  * RRawFile objects are conditionally thread safe. See the user manual for further details:
0041  * https://root.cern/manual/thread_safety/
0042  */
0043 class RRawFile {
0044 public:
0045    /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
0046    static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
0047    /// kAuto detects the line break from the first line, kSystem picks the system's default
0048    enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows };
0049 
0050    // Combination of flags provided by derived classes about the nature of the file
0051    /// GetSize() does not return kUnknownFileSize
0052    static constexpr int kFeatureHasSize = 0x01;
0053    /// Map() and Unmap() are implemented
0054    static constexpr int kFeatureHasMmap = 0x02;
0055    /// File supports async IO
0056    static constexpr int kFeatureHasAsyncIo = 0x04;
0057 
0058    /// On construction, an ROptions parameter can customize the RRawFile behavior
0059    struct ROptions {
0060       ELineBreaks fLineBreak;
0061       /**
0062        * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
0063        * that the protocol-dependent default block size should be used.
0064        */
0065       int fBlockSize;
0066       ROptions() : fLineBreak(ELineBreaks::kAuto), fBlockSize(-1) {}
0067    };
0068 
0069    /// Used for vector reads from multiple offsets into multiple buffers. This is unlike readv(), which scatters a
0070    /// single byte range from disk into multiple buffers.
0071    struct RIOVec {
0072       /// The destination for reading
0073       void *fBuffer = nullptr;
0074       /// The file offset
0075       std::uint64_t fOffset = 0;
0076       /// The number of desired bytes
0077       std::size_t fSize = 0;
0078       /// The number of actually read bytes, set by ReadV()
0079       std::size_t fOutBytes = 0;
0080    };
0081 
0082    /// Implementations may enforce limits on the use of vector reads. These limits can depend on the server or
0083    /// the specific file opened and can be queried per RRawFile object through GetReadVLimits().
0084    /// Note that due to such limits, a vector read with a single request can behave differently from a Read() call.
0085    struct RIOVecLimits {
0086       /// Maximum number of elements in a ReadV request vector
0087       std::size_t fMaxReqs = static_cast<std::size_t>(-1);
0088       /// Maximum size in bytes of any single request in the request vector
0089       std::size_t fMaxSingleSize = static_cast<std::size_t>(-1);
0090       /// Maximum size in bytes of the sum of requests in the vector
0091       std::uint64_t fMaxTotalSize = static_cast<std::uint64_t>(-1);
0092 
0093       bool HasReqsLimit() const { return fMaxReqs != static_cast<std::size_t>(-1); }
0094       bool HasSizeLimit() const
0095       {
0096          return fMaxSingleSize != static_cast<std::size_t>(-1) || fMaxTotalSize != static_cast<std::uint64_t>(-1);
0097       }
0098    };
0099 
0100 private:
0101    /// Don't change without adapting ReadAt()
0102    static constexpr unsigned int kNumBlockBuffers = 2;
0103    struct RBlockBuffer {
0104       /// Where in the open file does fBuffer start
0105       std::uint64_t fBufferOffset;
0106       /// The number of currently buffered bytes in fBuffer
0107       size_t fBufferSize;
0108       /// Points into the I/O buffer with data from the file, not owned.
0109       unsigned char *fBuffer;
0110 
0111       RBlockBuffer() : fBufferOffset(0), fBufferSize(0), fBuffer(nullptr) {}
0112       RBlockBuffer(const RBlockBuffer &) = delete;
0113       RBlockBuffer &operator=(const RBlockBuffer &) = delete;
0114       ~RBlockBuffer() = default;
0115 
0116       /// Tries to copy up to nbytes starting at offset from fBuffer into buffer.  Returns number of bytes copied.
0117       size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
0118    };
0119    /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
0120    unsigned int fBlockBufferIdx;
0121    /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
0122    RBlockBuffer fBlockBuffers[kNumBlockBuffers];
0123    /// Memory block containing the block buffers consecutively
0124    unsigned char *fBufferSpace;
0125    /// The cached file size
0126    std::uint64_t fFileSize;
0127    /// Files are opened lazily and only when required; the open state is kept by this flag
0128    bool fIsOpen;
0129 
0130 protected:
0131    std::string fUrl;
0132    ROptions fOptions;
0133    /// The current position in the file, which can be changed by Seek, Read, and Readln
0134    std::uint64_t fFilePos;
0135 
0136    /**
0137     * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
0138     * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
0139     * fOptions.fBlocksize must be larger or equal to zero.
0140     */
0141    virtual void OpenImpl() = 0;
0142    /**
0143     * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
0144     * therefore derived classes should return nbytes bytes if available.
0145     */
0146    virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
0147    /// Derived classes should return the file size or kUnknownFileSize
0148    virtual std::uint64_t GetSizeImpl() = 0;
0149 
0150    /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
0151    /// The default implementation throws an error
0152    virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
0153    /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
0154    virtual void UnmapImpl(void *region, size_t nbytes);
0155 
0156    /// By default implemented as a loop of ReadAt calls but can be overwritten, e.g. XRootD or DAVIX implementations
0157    virtual void ReadVImpl(RIOVec *ioVec, unsigned int nReq);
0158 
0159    /// Open the file if not already open. Otherwise noop.
0160    void EnsureOpen();
0161 
0162 public:
0163    RRawFile(std::string_view url, ROptions options);
0164    RRawFile(const RRawFile &) = delete;
0165    RRawFile &operator=(const RRawFile &) = delete;
0166    virtual ~RRawFile();
0167 
0168    /// Create a new RawFile that accesses the same resource.  The file pointer is reset to zero.
0169    virtual std::unique_ptr<RRawFile> Clone() const = 0;
0170 
0171    /// Factory method that returns a suitable concrete implementation according to the transport in the url
0172    static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
0173    /// Returns only the file location, e.g. "server/file" for http://server/file
0174    static std::string GetLocation(std::string_view url);
0175    /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
0176    static std::string GetTransport(std::string_view url);
0177 
0178    /**
0179     * Buffered read from a random position. Returns the actual number of bytes read.
0180     * Short reads indicate the end of the file
0181     */
0182    size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
0183    /// Read from fFilePos offset. Returns the actual number of bytes read.
0184    size_t Read(void *buffer, size_t nbytes);
0185    /// Change the cursor fFilePos
0186    void Seek(std::uint64_t offset);
0187    /// Returns the offset for the next Read/Readln call
0188    std::uint64_t GetFilePos() const { return fFilePos; }
0189    /// Returns the size of the file
0190    std::uint64_t GetSize();
0191    /// Returns the url of the file
0192    std::string GetUrl() const;
0193 
0194    /// Opens the file if necessary and calls ReadVImpl
0195    void ReadV(RIOVec *ioVec, unsigned int nReq);
0196    /// Returns the limits regarding the ioVec input to ReadV for this specific file; may open the file as a side-effect.
0197    virtual RIOVecLimits GetReadVLimits() { return RIOVecLimits(); }
0198 
0199    /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
0200    /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
0201    /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
0202    void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
0203    /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
0204    void Unmap(void *region, size_t nbytes);
0205 
0206    /// Derived classes shall inform the user about the supported functionality, which can possibly depend
0207    /// on the file at hand
0208    virtual int GetFeatures() const = 0;
0209 
0210    /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
0211    bool Readln(std::string &line);
0212 
0213    /// Once opened, the file stay open until destruction of the RRawFile object
0214    bool IsOpen() const { return fIsOpen; }
0215 }; // class RRawFile
0216 
0217 } // namespace Internal
0218 } // namespace ROOT
0219 
0220 #endif