Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-16 10:29:58

0001 /// \file ROOT/RFile.hxx
0002 /// \ingroup Base ROOT7
0003 /// \author Giacomo Parolini <giacomo.parolini@cern.ch>
0004 /// \date 2025-03-19
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 #ifndef ROOT7_RFile
0009 #define ROOT7_RFile
0010 
0011 #include <ROOT/RError.hxx>
0012 
0013 #include <deque>
0014 #include <functional>
0015 #include <iostream>
0016 #include <memory>
0017 #include <string_view>
0018 #include <typeinfo>
0019 #include <variant>
0020 
0021 class TFile;
0022 class TIterator;
0023 class TKey;
0024 
0025 namespace ROOT {
0026 namespace Experimental {
0027 
0028 class RKeyInfo;
0029 class RFile;
0030 
0031 namespace Internal {
0032 
0033 ROOT::RLogChannel &RFileLog();
0034 
0035 /// Returns an **owning** pointer to the object referenced by `key`. The caller must delete this pointer.
0036 /// This method is meant to only be used by the pythonization.
0037 [[nodiscard]] void *RFile_GetObjectFromKey(RFile &file, const RKeyInfo &key);
0038 
0039 } // namespace Internal
0040 
0041 namespace Detail {
0042 
0043 /// Given a "path-like" string (like foo/bar/baz), returns a pair `{ dirName, baseName }`.
0044 /// `baseName` will be empty if the string ends with '/'.
0045 /// `dirName` will be empty if the string contains no '/'.
0046 /// `dirName`, if not empty, always ends with a '/'.
0047 /// NOTE: this function does no semantic checking or path expansion, nor does it interact with the
0048 /// filesystem in any way (so it won't follow symlink or anything like that).
0049 /// Moreover it doesn't trim the path in any way, so any leading or trailing whitespaces will be preserved.
0050 /// This function does not perform any copy: the returned string_views have the same lifetime as `path`.
0051 std::pair<std::string_view, std::string_view> DecomposePath(std::string_view path);
0052 
0053 }
0054 
0055 class RFileKeyIterable;
0056 
0057 /**
0058 \class ROOT::Experimental::RKeyInfo
0059 \ingroup RFile
0060 \brief Information about an RFile object's Key.
0061 
0062 Every object inside a ROOT file has an associated "Key" which contains metadata on the object, such as its name, type
0063 etc.
0064 Querying this information can be done via RFile::ListKeys(). Reading an object's Key
0065 doesn't deserialize the full object, so it's a relatively lightweight operation.
0066 */
0067 class RKeyInfo final {
0068    friend class ROOT::Experimental::RFile;
0069    friend class ROOT::Experimental::RFileKeyIterable;
0070 
0071 public:
0072    enum class ECategory : std::uint16_t {
0073       kInvalid,
0074       kObject,
0075       kDirectory
0076    };
0077 
0078 private:
0079    std::string fPath;
0080    std::string fTitle;
0081    std::string fClassName;
0082    std::uint16_t fCycle = 0;
0083    ECategory fCategory = ECategory::kInvalid;
0084 
0085 public:
0086    /// Returns the absolute path of this key, i.e. the directory part plus the object name.
0087    const std::string &GetPath() const { return fPath; }
0088    /// Returns the base name of this key, i.e. the name of the object without the directory part.
0089    std::string GetBaseName() const { return std::string(Detail::DecomposePath(fPath).second); }
0090    const std::string &GetTitle() const { return fTitle; }
0091    const std::string &GetClassName() const { return fClassName; }
0092    std::uint16_t GetCycle() const { return fCycle; }
0093    ECategory GetCategory() const { return fCategory; }
0094 };
0095 
0096 /// The iterable returned by RFile::ListKeys()
0097 class RFileKeyIterable final {
0098    using Pattern_t = std::string;
0099 
0100    TFile *fFile = nullptr;
0101    Pattern_t fPattern;
0102    std::uint32_t fFlags = 0;
0103 
0104 public:
0105    class RIterator {
0106       friend class RFileKeyIterable;
0107 
0108       struct RIterStackElem {
0109          // This is ugly, but TList returns an (owning) pointer to a polymorphic TIterator...and we need this class
0110          // to be copy-constructible.
0111          std::shared_ptr<TIterator> fIter;
0112          std::string fDirPath;
0113 
0114          // Outlined to avoid including TIterator.h
0115          RIterStackElem(TIterator *it, const std::string &path = "");
0116          // Outlined to avoid including TIterator.h
0117          ~RIterStackElem();
0118 
0119          // fDirPath doesn't need to be compared because it's implied by fIter.
0120          bool operator==(const RIterStackElem &other) const { return fIter == other.fIter; }
0121       };
0122 
0123       // Using a deque to have pointer stability
0124       std::deque<RIterStackElem> fIterStack;
0125       Pattern_t fPattern;
0126       const TKey *fCurKey = nullptr;
0127       std::uint16_t fRootDirNesting = 0;
0128       std::uint32_t fFlags = 0;
0129 
0130       void Advance();
0131 
0132       // NOTE: `iter` here is an owning pointer (or null)
0133       RIterator(TIterator *iter, Pattern_t pattern, std::uint32_t flags);
0134 
0135    public:
0136       using iterator = RIterator;
0137       using iterator_category = std::input_iterator_tag;
0138       using difference_type = std::ptrdiff_t;
0139       using value_type = RKeyInfo;
0140       using pointer = const value_type *;
0141       using reference = const value_type &;
0142 
0143       iterator &operator++()
0144       {
0145          Advance();
0146          return *this;
0147       }
0148       value_type operator*();
0149       bool operator!=(const iterator &rh) const { return !(*this == rh); }
0150       bool operator==(const iterator &rh) const { return fIterStack == rh.fIterStack; }
0151    };
0152 
0153    RFileKeyIterable(TFile *file, std::string_view rootDir, std::uint32_t flags)
0154       : fFile(file), fPattern(std::string(rootDir)), fFlags(flags)
0155    {
0156    }
0157 
0158    RIterator begin() const;
0159    RIterator end() const;
0160 };
0161 
0162 /**
0163 \class ROOT::Experimental::RFile
0164 \ingroup RFile
0165 \brief An interface to read from, or write to, a ROOT file, as well as performing other common operations.
0166 
0167 ## When and why should you use RFile
0168 
0169 RFile is a modern and minimalistic interface to ROOT files, both local and remote, that can be used instead of TFile
0170 when you only need basic Put/Get operations and don't need the more advanced TFile/TDirectory functionalities.
0171 It provides:
0172 - a simple interface that makes it easy to do things right and hard to do things wrong;
0173 - more robustness and better error reporting for those operations;
0174 - clearer ownership semantics expressed through the type system.
0175 
0176 RFile doesn't cover the entirety of use cases covered by TFile/TDirectory/TDirectoryFile and is not
0177 a 1:1 replacement for them.  It is meant to simplify the most common use cases by following newer standard C++
0178 practices.
0179 
0180 ## Ownership model
0181 
0182 RFile handles ownership via smart pointers, typically std::unique_ptr.
0183 
0184 When getting an object from the file (via RFile::Get) you get back a unique copy of the object. Calling `Get` on the
0185 same object twice produces two independent clones of the object. The ownership over that object is solely on the caller
0186 and not shared with the RFile. Therefore, the object will remain valid after closing or destroying the RFile that
0187 generated it. This also means that any modification done to the object are **not** reflected to the file automatically:
0188 to update the object in the file you need to write it again (via RFile::Overwrite).
0189 
0190 RFile::Put and RFile::Overwrite are the way to write objects to the file. Both methods take a const reference to the
0191 object to write and don't change the ownership of the object in any way. Calling Put or Overwrite doesn't guarantee that
0192 the object is immediately written to the underlying storage: to ensure that, you need to call RFile::Flush (or close the
0193 file).
0194 
0195 ## Directories
0196 
0197 Even though there is no equivalent of TDirectory in the RFile API, directories are still an existing concept in RFile
0198 (since they are a concept in the ROOT binary format). However they are for now only interacted with indirectly, via the
0199 use of filesystem-like string-based paths. If you Put an object in an RFile under the path "path/to/object", "object"
0200 will be stored under directory "to" which is in turn stored under directory "path". This hierarchy is encoded in the
0201 ROOT file itself and it can provide some optimization and/or conveniences when querying objects.
0202 
0203 For the most part, it is convenient to think about RFile in terms of a key-value storage where string-based paths are
0204 used to refer to arbitrary objects. However, given the hierarchical nature of ROOT files, certain filesystem-like
0205 properties are applied to paths, for ease of use: the '/' character is treated specially as the directory separator;
0206 multiple '/' in a row are collapsed into one (since RFile doesn't allow directories with empty names).
0207 
0208 At the moment, RFile doesn't allow getting directories via Get, nor writing ones via Put (this may change in the
0209 future).
0210 
0211 ## Sample usage
0212 Opening an RFile (for writing) and writing an object to it:
0213 ~~~{.cpp}
0214 auto rfile = ROOT::RFile::Recreate("my_file.root");
0215 auto myObj = TH1D("h", "h", 10, 0, 1);
0216 rfile->Put(myObj.GetName(), myObj);
0217 ~~~
0218 
0219 Opening an RFile (for reading) and reading an object from it:
0220 ~~~{.cpp}
0221 auto rfile = ROOT::RFile::Open("my_file.root");
0222 auto myObj = file->Get<TH1D>("h");
0223 ~~~
0224 */
0225 class RFile final {
0226    friend void *Internal::RFile_GetObjectFromKey(RFile &file, const RKeyInfo &key);
0227 
0228    /// Flags used in PutInternal()
0229    enum PutFlags {
0230       /// When encountering an object at the specified path, overwrite it with the new one instead of erroring out.
0231       kPutAllowOverwrite = 0x1,
0232       /// When overwriting an object, preserve the existing one and create a new cycle, rather than removing it.
0233       kPutOverwriteKeepCycle = 0x2,
0234    };
0235 
0236    std::unique_ptr<TFile> fFile;
0237 
0238    // Outlined to avoid including TFile.h
0239    explicit RFile(std::unique_ptr<TFile> file);
0240 
0241    /// Gets object `path` from the file and returns an **owning** pointer to it.
0242    /// The caller should immediately wrap it into a unique_ptr of the type described by `type`.
0243    [[nodiscard]] void *GetUntyped(std::string_view path,
0244                                   std::variant<const char *, std::reference_wrapper<const std::type_info>> type) const;
0245 
0246    /// Writes `obj` to file, without taking its ownership.
0247    void PutUntyped(std::string_view path, const std::type_info &type, const void *obj, std::uint32_t flags);
0248 
0249    /// \see Put
0250    template <typename T>
0251    void PutInternal(std::string_view path, const T &obj, std::uint32_t flags)
0252    {
0253       PutUntyped(path, typeid(T), &obj, flags);
0254    }
0255 
0256    /// Given `path`, returns the TKey corresponding to the object at that path (assuming the path is fully split, i.e.
0257    /// "a/b/c" always means "object 'c' inside directory 'b' inside directory 'a'").
0258    /// IMPORTANT: `path` must have been validated/normalized via ValidateAndNormalizePath() (see RFile.cxx).
0259    TKey *GetTKey(std::string_view path) const;
0260 
0261 public:
0262    enum EListKeyFlags {
0263       kListObjects = 1 << 0,
0264       kListDirs = 1 << 1,
0265       kListRecursive = 1 << 2,
0266    };
0267 
0268    // This is arbitrary, but it's useful to avoid pathological cases
0269    static constexpr int kMaxPathNesting = 1000;
0270 
0271    ///// Factory methods /////
0272 
0273    /// Opens the file for reading. `path` may be a regular file path or a remote URL.
0274    /// \throw ROOT::RException if the file at `path` could not be opened.
0275    static std::unique_ptr<RFile> Open(std::string_view path);
0276 
0277    /// Opens the file for reading/writing, overwriting it if it already exists.
0278    /// \throw ROOT::RException if a file could not be created at `path` (e.g. if the specified
0279    /// directory tree does not exist).
0280    static std::unique_ptr<RFile> Recreate(std::string_view path);
0281 
0282    /// Opens the file for updating, creating a new one if it doesn't exist.
0283    /// \throw ROOT::RException if the file at `path` could neither be read nor created
0284    /// (e.g. if the specified directory tree does not exist).
0285    static std::unique_ptr<RFile> Update(std::string_view path);
0286 
0287    ///// Instance methods /////
0288 
0289    // Outlined to avoid including TFile.h
0290    ~RFile();
0291 
0292    /// Retrieves an object from the file.
0293    /// `path` should be a string such that `IsValidPath(path) == true`, otherwise an exception will be thrown.
0294    /// See \ref ValidateAndNormalizePath() for info about valid path names.
0295    /// If the object is not there returns a null pointer.
0296    template <typename T>
0297    std::unique_ptr<T> Get(std::string_view path) const
0298    {
0299       void *obj = GetUntyped(path, typeid(T));
0300       return std::unique_ptr<T>(static_cast<T *>(obj));
0301    }
0302 
0303    /// Puts an object into the file.
0304    /// The application retains ownership of the object.
0305    /// `path` should be a string such that `IsValidPath(path) == true`, otherwise an exception will be thrown.
0306    /// See \ref ValidateAndNormalizePath() for info about valid path names.
0307    ///
0308    /// Throws a RException if `path` already identifies a valid object or directory.
0309    /// Throws a RException if the file was opened in read-only mode.
0310    template <typename T>
0311    void Put(std::string_view path, const T &obj)
0312    {
0313       PutInternal(path, obj, /* flags = */ 0);
0314    }
0315 
0316    /// Puts an object into the file, overwriting any previously-existing object at that path.
0317    /// The application retains ownership of the object.
0318    ///
0319    /// If an object already exists at that path, it is kept as a backup cycle unless `backupPrevious` is false.
0320    /// Note that even if `backupPrevious` is false, any existing cycle except the latest will be preserved.
0321    ///
0322    /// Throws a RException if `path` is already the path of a directory.
0323    /// Throws a RException if the file was opened in read-only mode.
0324    template <typename T>
0325    void Overwrite(std::string_view path, const T &obj, bool backupPrevious = true)
0326    {
0327       std::uint32_t flags = kPutAllowOverwrite;
0328       flags |= backupPrevious * kPutOverwriteKeepCycle;
0329       PutInternal(path, obj, flags);
0330    }
0331 
0332    /// Writes all objects and the file structure to disk.
0333    /// Returns the number of bytes written.
0334    size_t Flush();
0335 
0336    /// Flushes the RFile if needed and closes it, disallowing any further reading or writing.
0337    void Close();
0338 
0339    /// Returns an iterable over all keys of objects and/or directories written into this RFile starting at path
0340    /// `basePath` (defaulting to include the content of all subdirectories).
0341    /// By default, keys referring to directories are not returned: only those referring to leaf objects are.
0342    /// If `basePath` is the path of a leaf object, only `basePath` itself will be returned.
0343    /// `flags` is a bitmask specifying the listing mode.
0344    /// If `(flags & kListObject) != 0`, the listing will include keys of non-directory objects (default);
0345    /// If `(flags & kListDirs) != 0`, the listing will include keys of directory objects;
0346    /// If `(flags & kListRecursive) != 0`, the listing will recurse on all subdirectories of `basePath` (default),
0347    /// otherwise it will only list immediate children of `basePath`.
0348    ///
0349    /// Example usage:
0350    /// ~~~{.cpp}
0351    /// for (RKeyInfo key : file->ListKeys()) {
0352    ///     /* iterate over all objects in the RFile */
0353    ///     cout << key.GetPath() << ";" << key.GetCycle() << " of type " << key.GetClassName() << "\n";
0354    /// }
0355    /// for (RKeyInfo key : file->ListKeys("", kListDirs|kListObjects|kListRecursive)) {
0356    ///     /* iterate over all objects and directories in the RFile */
0357    /// }
0358    /// for (RKeyInfo key : file->ListKeys("a/b", kListObjects)) {
0359    ///     /* iterate over all objects that are immediate children of directory "a/b" */
0360    /// }
0361    /// for (RKeyInfo key : file->ListKeys("foo", kListDirs|kListRecursive)) {
0362    ///     /* iterate over all directories under directory "foo", recursively */
0363    /// }
0364    /// ~~~
0365    RFileKeyIterable ListKeys(std::string_view basePath = "", std::uint32_t flags = kListObjects | kListRecursive) const
0366    {
0367       return RFileKeyIterable(fFile.get(), basePath, flags);
0368    }
0369 
0370    /// Retrieves information about the key of object at `path`, if one exists.
0371    std::optional<RKeyInfo> GetKeyInfo(std::string_view path) const;
0372 
0373    /// Prints the internal structure of this RFile to the given stream.
0374    void Print(std::ostream &out = std::cout) const;
0375 };
0376 
0377 } // namespace Experimental
0378 } // namespace ROOT
0379 
0380 #endif