Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-16 09:08:34

0001 /// \file ROOT/RNTupleParallelWriter.hxx
0002 /// \ingroup NTuple
0003 /// \author Jonas Hahnfeld <jonas.hahnfeld@cern.ch>
0004 /// \date 2024-02-01
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2024, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT_RNTupleParallelWriter
0017 #define ROOT_RNTupleParallelWriter
0018 
0019 #include <ROOT/RNTupleMetrics.hxx>
0020 #include <ROOT/RNTupleWriteOptions.hxx>
0021 
0022 #include <memory>
0023 #include <mutex>
0024 #include <string_view>
0025 #include <vector>
0026 
0027 class TDirectory;
0028 
0029 namespace ROOT {
0030 
0031 class RNTupleModel;
0032 
0033 namespace Internal {
0034 class RPageSink;
0035 } // namespace Internal
0036 
0037 namespace Experimental {
0038 
0039 class RNTupleFillContext;
0040 
0041 /**
0042 \class ROOT::Experimental::RNTupleParallelWriter
0043 \ingroup NTuple
0044 \brief A writer to fill an RNTuple from multiple contexts
0045 
0046 Compared to the sequential RNTupleWriter, a parallel writer enables the creation of multiple RNTupleFillContext (see
0047 RNTupleParallelWriter::CreateFillContext).  Each fill context prepares independent clusters that are appended to the
0048 common ntuple with internal synchronization.  Before destruction, all fill contexts must have flushed their data and
0049 been destroyed (or data could be lost!).
0050 
0051 For user convenience, RNTupleParallelWriter::CreateFillContext is thread-safe and may be called from multiple threads
0052 in parallel at any time, also after some data has already been written.  Internally, the original model is cloned and
0053 ownership is passed to a newly created RNTupleFillContext.  For that reason, it is recommended to use
0054 RNTupleModel::CreateBare when creating the model for parallel writing and avoid the allocation of a useless default
0055 REntry per context.
0056 
0057 Note that the sequence of independently prepared clusters is indeterminate and therefore entries are only partially
0058 ordered:  Entries from one context are totally ordered as they were filled.  However, there is no orderering with other
0059 contexts and the entries may be appended to the ntuple either before or after other entries written in parallel into
0060 other contexts.  In addition, two consecutive entries in one fill context can end up separated in the final ntuple, if
0061 they happen to fall onto a cluster boundary and other contexts append more entries before the next cluster is full.
0062 
0063 At the moment, the parallel writer does not (yet) support incremental updates of the underlying model. Please refer to
0064 RNTupleWriter::CreateModelUpdater if required for your use case.
0065 */
0066 class RNTupleParallelWriter {
0067 private:
0068    /// A global mutex to protect the internal data structures of this object.
0069    std::mutex fMutex;
0070    /// A mutex to synchronize the final page sink.
0071    std::mutex fSinkMutex;
0072    /// The final RPageSink that represents the synchronization point.
0073    std::unique_ptr<ROOT::Internal::RPageSink> fSink;
0074    /// The original RNTupleModel connected to fSink; needs to be destructed before it.
0075    std::unique_ptr<ROOT::RNTupleModel> fModel;
0076    Detail::RNTupleMetrics fMetrics;
0077    /// List of all created helpers. They must be destroyed before this RNTupleParallelWriter is destructed.
0078    std::vector<std::weak_ptr<RNTupleFillContext>> fFillContexts;
0079 
0080    RNTupleParallelWriter(std::unique_ptr<ROOT::RNTupleModel> model, std::unique_ptr<ROOT::Internal::RPageSink> sink);
0081    RNTupleParallelWriter(const RNTupleParallelWriter &) = delete;
0082    RNTupleParallelWriter &operator=(const RNTupleParallelWriter &) = delete;
0083 
0084 public:
0085    /// Recreate a new file and return a writer to write an ntuple.
0086    static std::unique_ptr<RNTupleParallelWriter>
0087    Recreate(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName, std::string_view storage,
0088             const ROOT::RNTupleWriteOptions &options = ROOT::RNTupleWriteOptions());
0089    /// Append an ntuple to the existing file, which must not be accessed while data is filled into any created context.
0090    static std::unique_ptr<RNTupleParallelWriter>
0091    Append(std::unique_ptr<ROOT::RNTupleModel> model, std::string_view ntupleName, TDirectory &fileOrDirectory,
0092           const ROOT::RNTupleWriteOptions &options = ROOT::RNTupleWriteOptions());
0093 
0094    ~RNTupleParallelWriter();
0095 
0096    /// Create a new RNTupleFillContext that can be used to fill entries and prepare clusters in parallel. This method is
0097    /// thread-safe and may be called from multiple threads in parallel at any time, also after some data has already
0098    /// been written.
0099    ///
0100    /// Note that all fill contexts must be destroyed before RNTupleParallelWriter::CommitDataset() is called.
0101    std::shared_ptr<RNTupleFillContext> CreateFillContext();
0102 
0103    /// Automatically called by the destructor
0104    void CommitDataset();
0105 
0106    void EnableMetrics() { fMetrics.Enable(); }
0107    const Detail::RNTupleMetrics &GetMetrics() const { return fMetrics; }
0108 };
0109 
0110 } // namespace Experimental
0111 } // namespace ROOT
0112 
0113 #endif