Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-07 08:51:29

0001 /// \file ROOT/RNTupleImporter.hxx
0002 /// \ingroup NTuple ROOT7
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2022-11-22
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT7_RNTuplerImporter
0017 #define ROOT7_RNTuplerImporter
0018 
0019 #include <ROOT/REntry.hxx>
0020 #include <ROOT/RError.hxx>
0021 #include <ROOT/RField.hxx>
0022 #include <ROOT/RNTupleModel.hxx>
0023 #include <ROOT/RNTupleWriteOptions.hxx>
0024 #include <ROOT/RNTupleWriter.hxx>
0025 #include <string_view>
0026 
0027 #include <TFile.h>
0028 #include <TTree.h>
0029 
0030 #include <cstdlib>
0031 #include <functional>
0032 #include <map>
0033 #include <memory>
0034 #include <vector>
0035 
0036 class TLeaf;
0037 
0038 namespace ROOT {
0039 namespace Experimental {
0040 
0041 // clang-format off
0042 /**
0043 \class ROOT::Experimental::RNTupleImporter
0044 \ingroup NTuple
0045 \brief Converts a TTree into an RNTuple
0046 
0047 Example usage (see the ntpl008_import.C tutorial for a full example):
0048 
0049 ~~~ {.cpp}
0050 #include <ROOT/RNTupleImporter.hxx>
0051 using ROOT::Experimental::RNTupleImporter;
0052 
0053 auto importer = RNTupleImporter::Create("data.root", "TreeName", "output.root");
0054 // As required: importer->SetNTupleName(), importer->SetWriteOptions(), ...
0055 importer->Import();
0056 ~~~
0057 
0058 The output file is created if it does not exist, otherwise the ntuple is added to the existing file.
0059 Directories in the output file are created as necessary, allowing ntuples to be stored in a nested structure (e.g. DirName/TreeName).
0060 Note that input file and output file can be identical if the ntuple is stored under a different name than the tree
0061 (use `SetNTupleName()`).
0062 
0063 By default, the RNTuple is compressed with zstd, independent of the input compression. The compression settings
0064 (and other output parameters) can be changed by `SetWriteOptions()`. For example, to compress the imported RNTuple
0065 using lz4 (with compression level 4) instead:
0066 
0067 ~~~ {.cpp}
0068 auto writeOptions = importer->GetWriteOptions();
0069 writeOptions.SetCompression(404);
0070 importer->SetWriteOptions(writeOptions);
0071 ~~~
0072 
0073 Most RNTuple fields have a type identical to the corresponding TTree input branch. Exceptions are
0074   - C string branches are translated to `std::string` fields
0075   - C style arrays are translated to `std::array<...>` fields
0076   - Leaf lists are translated to untyped records
0077   - Leaf count arrays are translated to anonymous collections with generic names (`_collection0`, `_collection1`, etc.).
0078     In order to keep field names and branch names aligned, RNTuple projects the members of these collections and
0079     its collection counter to the input branch names. For instance, the following input leafs:
0080 ~~~
0081 Int_t njets
0082 float jet_pt[njets]
0083 float jet_eta[njets]
0084 ~~~
0085     will be converted to the following RNTuple schema:
0086 ~~~
0087       _collection0 (untyped collection)
0088       |- float jet_pt
0089       |- float jet_eta
0090       std::size_t (RNTupleCardinality) njets   (projected from _collection0 without subfields)
0091       ROOT::RVec<float>                jet_pt  (projected from _collection0.jet_pt)
0092       ROOT::RVec<float>                jet_eta (projected from _collection0.jet_eta)
0093 ~~~
0094     These projections are meta-data only operations and don't involve duplicating the data.
0095 
0096 Current limitations of the importer:
0097   - No support for trees containing TClonesArray collections
0098   - Due to RNTuple currently storing data fully split, "don't split" markers are ignored
0099   - Some types are not available in RNTuple. Please refer to the
0100     [RNTuple specification](https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md) for
0101     an overview of all types currently supported.
0102 */
0103 // clang-format on
0104 class RNTupleImporter {
0105 public:
0106    /// Used to make adjustments to the fields of the output model.
0107    using FieldModifier_t = std::function<void(ROOT::RFieldBase &)>;
0108 
0109    /// Used to report every ~100 MB (compressed), and at the end about the status of the import.
0110    class RProgressCallback {
0111    public:
0112       virtual ~RProgressCallback() = default;
0113       void operator()(std::uint64_t nbytesWritten, std::uint64_t neventsWritten)
0114       {
0115          Call(nbytesWritten, neventsWritten);
0116       }
0117       virtual void Call(std::uint64_t nbytesWritten, std::uint64_t neventsWritten) = 0;
0118       virtual void Finish(std::uint64_t nbytesWritten, std::uint64_t neventsWritten) = 0;
0119    };
0120 
0121 private:
0122    struct RImportBranch {
0123       RImportBranch() = default;
0124       RImportBranch(const RImportBranch &other) = delete;
0125       RImportBranch(RImportBranch &&other) = default;
0126       RImportBranch &operator=(const RImportBranch &other) = delete;
0127       RImportBranch &operator=(RImportBranch &&other) = default;
0128       std::string fBranchName;                        ///< Top-level branch name from the input TTree
0129       std::unique_ptr<unsigned char[]> fBranchBuffer; ///< The destination of SetBranchAddress() for `fBranchName`
0130    };
0131 
0132    struct RImportField {
0133       RImportField() = default;
0134       ~RImportField() = default;
0135       RImportField(const RImportField &other) = delete;
0136       RImportField(RImportField &&other) = default;
0137       RImportField &operator=(const RImportField &other) = delete;
0138       RImportField &operator=(RImportField &&other) = default;
0139 
0140       /// The field is kept during schema preparation and transferred to the fModel before the writing starts
0141       ROOT::RFieldBase *fField = nullptr;
0142       std::unique_ptr<ROOT::RFieldBase::RValue> fValue; ///< Set if a value is generated, only for transformed fields
0143       void *fFieldBuffer = nullptr; ///< Usually points to the corresponding RImportBranch::fBranchBuffer but not always
0144    };
0145 
0146    /// Base class to perform data transformations from TTree branches to RNTuple fields if necessary
0147    struct RImportTransformation {
0148       std::size_t fImportBranchIdx = 0;
0149       std::size_t fImportFieldIdx = 0;
0150 
0151       RImportTransformation(std::size_t branchIdx, std::size_t fieldIdx)
0152          : fImportBranchIdx(branchIdx), fImportFieldIdx(fieldIdx)
0153       {
0154       }
0155       virtual ~RImportTransformation() = default;
0156       virtual RResult<void> Transform(const RImportBranch &branch, RImportField &field) = 0;
0157    };
0158 
0159    /// When the schema is set up and the import started, it needs to be reset before the next Import() call
0160    /// can start. This RAII guard ensures that ResetSchema is called.
0161    struct RImportGuard {
0162       RNTupleImporter &fImporter;
0163 
0164       explicit RImportGuard(RNTupleImporter &importer) : fImporter(importer) {}
0165       RImportGuard(const RImportGuard &) = delete;
0166       RImportGuard &operator=(const RImportGuard &) = delete;
0167       RImportGuard(RImportGuard &&) = delete;
0168       RImportGuard &operator=(RImportGuard &&) = delete;
0169       ~RImportGuard() { fImporter.ResetSchema(); }
0170    };
0171 
0172    /// Leaf count arrays require special treatment. They are translated into untyped collections of untyped records.
0173    /// This class does the bookkeeping of the sub-schema for these collections.
0174    struct RImportLeafCountCollection {
0175       RImportLeafCountCollection() = default;
0176       RImportLeafCountCollection(const RImportLeafCountCollection &other) = delete;
0177       RImportLeafCountCollection(RImportLeafCountCollection &&other) = default;
0178       RImportLeafCountCollection &operator=(const RImportLeafCountCollection &other) = delete;
0179       RImportLeafCountCollection &operator=(RImportLeafCountCollection &&other) = default;
0180       std::string fFieldName; ///< name of the untyped collection, e.g. `_collection0`, `_collection1`, etc.
0181       /// Stores count leaf GetMaximum() to create large enough buffers for the array leafs.
0182       /// Uses Int_t because that is the return type if TLeaf::GetMaximum().
0183       Int_t fMaxLength = 0;
0184       /// The number of elements for the collection for a particular event. Used as a destination for SetBranchAddress()
0185       /// of the count leaf
0186       std::unique_ptr<Int_t> fCountVal;
0187       /// The leafs of the array as we encounter them traversing the TTree schema.
0188       /// Eventually, the fields are moved as leaves to an untyped collection of untyped records that in turn
0189       /// is attached to the RNTuple model.
0190       std::vector<std::unique_ptr<ROOT::RFieldBase>> fLeafFields;
0191       std::vector<size_t> fLeafBranchIndexes; ///< Points to the correspondings leaf branches in fImportBranches
0192       ROOT::RRecordField *fRecordField =
0193          nullptr; ///< Points to the item field of the untyped collection field in the model.
0194       std::vector<unsigned char> fFieldBuffer; ///< The collection field memory representation. Bound to the entry.
0195    };
0196 
0197    /// Transform a NULL terminated C string branch into an `std::string` field
0198    struct RCStringTransformation : public RImportTransformation {
0199       RCStringTransformation(std::size_t b, std::size_t f) : RImportTransformation(b, f) {}
0200       ~RCStringTransformation() override = default;
0201       RResult<void> Transform(const RImportBranch &branch, RImportField &field) final;
0202    };
0203 
0204    RNTupleImporter() = default;
0205 
0206    std::unique_ptr<TFile> fSourceFile;
0207    TTree *fSourceTree;
0208 
0209    std::string fDestFileName;
0210    std::string fNTupleName;
0211    std::unique_ptr<TFile> fDestFile;
0212    ROOT::RNTupleWriteOptions fWriteOptions;
0213 
0214    /// Whether or not dot characters in branch names should be converted to underscores. If this option is not set and a
0215    /// branch with a '.' is encountered, the importer will throw an exception.
0216    bool fConvertDotsInBranchNames = false;
0217 
0218    /// The maximum number of entries to import. When this value is -1 (default), import all entries.
0219    std::int64_t fMaxEntries = -1;
0220 
0221    /// No standard output, conversely if set to false, schema information and progress is printed.
0222    bool fIsQuiet = false;
0223    std::unique_ptr<RProgressCallback> fProgressCallback;
0224    FieldModifier_t fFieldModifier;
0225 
0226    std::unique_ptr<ROOT::RNTupleModel> fModel;
0227    std::unique_ptr<ROOT::REntry> fEntry;
0228    std::vector<RImportBranch> fImportBranches;
0229    std::vector<RImportField> fImportFields;
0230    /// Maps the count leaf to the information about the corresponding untyped collection
0231    std::map<std::string, RImportLeafCountCollection> fLeafCountCollections;
0232    /// The list of transformations to be performed for every entry
0233    std::vector<std::unique_ptr<RImportTransformation>> fImportTransformations;
0234 
0235    ROOT::RResult<void> InitDestination(std::string_view destFileName);
0236 
0237    void ResetSchema();
0238    /// Sets up the connection from TTree branches to RNTuple fields, including initialization of the memory
0239    /// buffers used for reading and writing.
0240    ROOT::RResult<void> PrepareSchema();
0241    void ReportSchema();
0242 
0243 public:
0244    RNTupleImporter(const RNTupleImporter &other) = delete;
0245    RNTupleImporter &operator=(const RNTupleImporter &other) = delete;
0246    RNTupleImporter(RNTupleImporter &&other) = delete;
0247    RNTupleImporter &operator=(RNTupleImporter &&other) = delete;
0248    ~RNTupleImporter() = default;
0249 
0250    /// Opens the input file for reading and the output file for writing (update).
0251    static std::unique_ptr<RNTupleImporter>
0252    Create(std::string_view sourceFileName, std::string_view treeName, std::string_view destFileName);
0253 
0254    /// Directly uses the provided tree and opens the output file for writing (update).
0255    static std::unique_ptr<RNTupleImporter> Create(TTree *sourceTree, std::string_view destFileName);
0256 
0257    ROOT::RNTupleWriteOptions GetWriteOptions() const { return fWriteOptions; }
0258    void SetWriteOptions(ROOT::RNTupleWriteOptions options) { fWriteOptions = options; }
0259    void SetNTupleName(const std::string &name) { fNTupleName = name; }
0260    void SetMaxEntries(std::uint64_t maxEntries) { fMaxEntries = maxEntries; };
0261 
0262    /// Whereas branch names may contain dots, RNTuple field names may not. By setting this option, dot characters are
0263    /// automatically converted into underscores to prevent the importer from throwing an exception.
0264    void SetConvertDotsInBranchNames(bool value) { fConvertDotsInBranchNames = value; }
0265 
0266    /// Whether or not information and progress is printed to stdout.
0267    void SetIsQuiet(bool value) { fIsQuiet = value; }
0268 
0269    /// Add custom method to adjust column representations.  Will be called for every field of the frozen model
0270    /// before it is attached to the page sink
0271    void SetFieldModifier(FieldModifier_t modifier) { fFieldModifier = modifier; }
0272 
0273    /// Import works in two steps:
0274    /// 1. PrepareSchema() calls SetBranchAddress() on all the TTree branches and creates the corresponding RNTuple
0275    ///    fields and the model
0276    /// 2. An event loop reads every entry from the TTree, applies transformations where necessary, and writes the
0277    ///    output entry to the RNTuple.
0278    void Import();
0279 }; // class RNTupleImporter
0280 
0281 } // namespace Experimental
0282 } // namespace ROOT
0283 
0284 #endif