Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-16 09:08:33

0001 /// \file ROOT/RNTupleImporter.hxx
0002 /// \ingroup NTuple ROOT7
0003 /// \author Jakob Blomer <jblomer@cern.ch>
0004 /// \date 2022-11-22
0005 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
0006 /// is welcome!
0007 
0008 /*************************************************************************
0009  * Copyright (C) 1995-2022, Rene Brun and Fons Rademakers.               *
0010  * All rights reserved.                                                  *
0011  *                                                                       *
0012  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0013  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0014  *************************************************************************/
0015 
0016 #ifndef ROOT7_RNTuplerImporter
0017 #define ROOT7_RNTuplerImporter
0018 
0019 #include <ROOT/REntry.hxx>
0020 #include <ROOT/RError.hxx>
0021 #include <ROOT/RField.hxx>
0022 #include <ROOT/RNTupleModel.hxx>
0023 #include <ROOT/RNTupleWriteOptions.hxx>
0024 #include <ROOT/RNTupleWriter.hxx>
0025 #include <string_view>
0026 
0027 #include <TFile.h>
0028 #include <TTree.h>
0029 
0030 #include <cstdlib>
0031 #include <functional>
0032 #include <map>
0033 #include <memory>
0034 #include <vector>
0035 
0036 class TLeaf;
0037 
0038 namespace ROOT {
0039 namespace Experimental {
0040 
0041 // clang-format off
0042 /**
0043 \class ROOT::Experimental::RNTupleImporter
0044 \ingroup NTuple
0045 \brief Converts a TTree into an RNTuple
0046 
0047 Example usage (see the ntpl008_import.C tutorial for a full example):
0048 
0049 ~~~ {.cpp}
0050 #include <ROOT/RNTupleImporter.hxx>
0051 using ROOT::Experimental::RNTupleImporter;
0052 
0053 auto importer = RNTupleImporter::Create("data.root", "TreeName", "output.root");
0054 // As required: importer->SetNTupleName(), importer->SetWriteOptions(), ...
0055 importer->Import();
0056 ~~~
0057 
0058 The output file is created if it does not exist, otherwise the ntuple is added to the existing file.
0059 Note that input file and output file can be identical if the ntuple is stored under a different name than the tree
0060 (use `SetNTupleName()`).
0061 
0062 By default, the RNTuple is compressed with zstd, independent of the input compression. The compression settings
0063 (and other output parameters) can be changed by `SetWriteOptions()`. For example, to compress the imported RNTuple
0064 using lz4 (with compression level 4) instead:
0065 
0066 ~~~ {.cpp}
0067 auto writeOptions = importer->GetWriteOptions();
0068 writeOptions.SetCompression(404);
0069 importer->SetWriteOptions(writeOptions);
0070 ~~~
0071 
0072 Most RNTuple fields have a type identical to the corresponding TTree input branch. Exceptions are
0073   - C string branches are translated to `std::string` fields
0074   - C style arrays are translated to `std::array<...>` fields
0075   - Leaf lists are translated to untyped records
0076   - Leaf count arrays are translated to anonymous collections with generic names (`_collection0`, `_collection1`, etc.).
0077     In order to keep field names and branch names aligned, RNTuple projects the members of these collections and
0078     its collection counter to the input branch names. For instance, the following input leafs:
0079 ~~~
0080 Int_t njets
0081 float jet_pt[njets]
0082 float jet_eta[njets]
0083 ~~~
0084     will be converted to the following RNTuple schema:
0085 ~~~
0086       _collection0 (untyped collection)
0087       |- float jet_pt
0088       |- float jet_eta
0089       std::size_t (RNTupleCardinality) njets   (projected from _collection0 without subfields)
0090       ROOT::RVec<float>                jet_pt  (projected from _collection0.jet_pt)
0091       ROOT::RVec<float>                jet_eta (projected from _collection0.jet_eta)
0092 ~~~
0093     These projections are meta-data only operations and don't involve duplicating the data.
0094 
0095 Current limitations of the importer:
0096   - No support for trees containing TClonesArray collections
0097   - Due to RNTuple currently storing data fully split, "don't split" markers are ignored
0098   - Some types are not available in RNTuple. Please refer to the
0099     [RNTuple specification](https://github.com/root-project/root/blob/master/tree/ntuple/v7/doc/specifications.md) for
0100     an overview of all types currently supported.
0101 */
0102 // clang-format on
0103 class RNTupleImporter {
0104 public:
0105    /// Used to make adjustments to the fields of the output model.
0106    using FieldModifier_t = std::function<void(ROOT::RFieldBase &)>;
0107 
0108    /// Used to report every ~100 MB (compressed), and at the end about the status of the import.
0109    class RProgressCallback {
0110    public:
0111       virtual ~RProgressCallback() = default;
0112       void operator()(std::uint64_t nbytesWritten, std::uint64_t neventsWritten)
0113       {
0114          Call(nbytesWritten, neventsWritten);
0115       }
0116       virtual void Call(std::uint64_t nbytesWritten, std::uint64_t neventsWritten) = 0;
0117       virtual void Finish(std::uint64_t nbytesWritten, std::uint64_t neventsWritten) = 0;
0118    };
0119 
0120 private:
0121    struct RImportBranch {
0122       RImportBranch() = default;
0123       RImportBranch(const RImportBranch &other) = delete;
0124       RImportBranch(RImportBranch &&other) = default;
0125       RImportBranch &operator=(const RImportBranch &other) = delete;
0126       RImportBranch &operator=(RImportBranch &&other) = default;
0127       std::string fBranchName;                        ///< Top-level branch name from the input TTree
0128       std::unique_ptr<unsigned char[]> fBranchBuffer; ///< The destination of SetBranchAddress() for `fBranchName`
0129    };
0130 
0131    struct RImportField {
0132       RImportField() = default;
0133       ~RImportField() = default;
0134       RImportField(const RImportField &other) = delete;
0135       RImportField(RImportField &&other) = default;
0136       RImportField &operator=(const RImportField &other) = delete;
0137       RImportField &operator=(RImportField &&other) = default;
0138 
0139       /// The field is kept during schema preparation and transferred to the fModel before the writing starts
0140       ROOT::RFieldBase *fField = nullptr;
0141       std::unique_ptr<ROOT::RFieldBase::RValue> fValue; ///< Set if a value is generated, only for transformed fields
0142       void *fFieldBuffer = nullptr; ///< Usually points to the corresponding RImportBranch::fBranchBuffer but not always
0143    };
0144 
0145    /// Base class to perform data transformations from TTree branches to RNTuple fields if necessary
0146    struct RImportTransformation {
0147       std::size_t fImportBranchIdx = 0;
0148       std::size_t fImportFieldIdx = 0;
0149 
0150       RImportTransformation(std::size_t branchIdx, std::size_t fieldIdx)
0151          : fImportBranchIdx(branchIdx), fImportFieldIdx(fieldIdx)
0152       {
0153       }
0154       virtual ~RImportTransformation() = default;
0155       virtual RResult<void> Transform(const RImportBranch &branch, RImportField &field) = 0;
0156    };
0157 
0158    /// When the schema is set up and the import started, it needs to be reset before the next Import() call
0159    /// can start. This RAII guard ensures that ResetSchema is called.
0160    struct RImportGuard {
0161       RNTupleImporter &fImporter;
0162 
0163       explicit RImportGuard(RNTupleImporter &importer) : fImporter(importer) {}
0164       RImportGuard(const RImportGuard &) = delete;
0165       RImportGuard &operator=(const RImportGuard &) = delete;
0166       RImportGuard(RImportGuard &&) = delete;
0167       RImportGuard &operator=(RImportGuard &&) = delete;
0168       ~RImportGuard() { fImporter.ResetSchema(); }
0169    };
0170 
0171    /// Leaf count arrays require special treatment. They are translated into untyped collections of untyped records.
0172    /// This class does the bookkeeping of the sub-schema for these collections.
0173    struct RImportLeafCountCollection {
0174       RImportLeafCountCollection() = default;
0175       RImportLeafCountCollection(const RImportLeafCountCollection &other) = delete;
0176       RImportLeafCountCollection(RImportLeafCountCollection &&other) = default;
0177       RImportLeafCountCollection &operator=(const RImportLeafCountCollection &other) = delete;
0178       RImportLeafCountCollection &operator=(RImportLeafCountCollection &&other) = default;
0179       std::string fFieldName; ///< name of the untyped collection, e.g. `_collection0`, `_collection1`, etc.
0180       /// Stores count leaf GetMaximum() to create large enough buffers for the array leafs.
0181       /// Uses Int_t because that is the return type if TLeaf::GetMaximum().
0182       Int_t fMaxLength = 0;
0183       /// The number of elements for the collection for a particular event. Used as a destination for SetBranchAddress()
0184       /// of the count leaf
0185       std::unique_ptr<Int_t> fCountVal;
0186       /// The leafs of the array as we encounter them traversing the TTree schema.
0187       /// Eventually, the fields are moved as leaves to an untyped collection of untyped records that in turn
0188       /// is attached to the RNTuple model.
0189       std::vector<std::unique_ptr<ROOT::RFieldBase>> fLeafFields;
0190       std::vector<size_t> fLeafBranchIndexes; ///< Points to the correspondings leaf branches in fImportBranches
0191       ROOT::RRecordField *fRecordField =
0192          nullptr; ///< Points to the item field of the untyped collection field in the model.
0193       std::vector<unsigned char> fFieldBuffer; ///< The collection field memory representation. Bound to the entry.
0194    };
0195 
0196    /// Transform a NULL terminated C string branch into an `std::string` field
0197    struct RCStringTransformation : public RImportTransformation {
0198       RCStringTransformation(std::size_t b, std::size_t f) : RImportTransformation(b, f) {}
0199       ~RCStringTransformation() override = default;
0200       RResult<void> Transform(const RImportBranch &branch, RImportField &field) final;
0201    };
0202 
0203    RNTupleImporter() = default;
0204 
0205    std::unique_ptr<TFile> fSourceFile;
0206    TTree *fSourceTree;
0207 
0208    std::string fDestFileName;
0209    std::string fNTupleName;
0210    std::unique_ptr<TFile> fDestFile;
0211    ROOT::RNTupleWriteOptions fWriteOptions;
0212 
0213    /// Whether or not dot characters in branch names should be converted to underscores. If this option is not set and a
0214    /// branch with a '.' is encountered, the importer will throw an exception.
0215    bool fConvertDotsInBranchNames = false;
0216 
0217    /// The maximum number of entries to import. When this value is -1 (default), import all entries.
0218    std::int64_t fMaxEntries = -1;
0219 
0220    /// No standard output, conversely if set to false, schema information and progress is printed.
0221    bool fIsQuiet = false;
0222    std::unique_ptr<RProgressCallback> fProgressCallback;
0223    FieldModifier_t fFieldModifier;
0224 
0225    std::unique_ptr<ROOT::RNTupleModel> fModel;
0226    std::unique_ptr<ROOT::REntry> fEntry;
0227    std::vector<RImportBranch> fImportBranches;
0228    std::vector<RImportField> fImportFields;
0229    /// Maps the count leaf to the information about the corresponding untyped collection
0230    std::map<std::string, RImportLeafCountCollection> fLeafCountCollections;
0231    /// The list of transformations to be performed for every entry
0232    std::vector<std::unique_ptr<RImportTransformation>> fImportTransformations;
0233 
0234    ROOT::RResult<void> InitDestination(std::string_view destFileName);
0235 
0236    void ResetSchema();
0237    /// Sets up the connection from TTree branches to RNTuple fields, including initialization of the memory
0238    /// buffers used for reading and writing.
0239    ROOT::RResult<void> PrepareSchema();
0240    void ReportSchema();
0241 
0242 public:
0243    RNTupleImporter(const RNTupleImporter &other) = delete;
0244    RNTupleImporter &operator=(const RNTupleImporter &other) = delete;
0245    RNTupleImporter(RNTupleImporter &&other) = delete;
0246    RNTupleImporter &operator=(RNTupleImporter &&other) = delete;
0247    ~RNTupleImporter() = default;
0248 
0249    /// Opens the input file for reading and the output file for writing (update).
0250    static std::unique_ptr<RNTupleImporter>
0251    Create(std::string_view sourceFileName, std::string_view treeName, std::string_view destFileName);
0252 
0253    /// Directly uses the provided tree and opens the output file for writing (update).
0254    static std::unique_ptr<RNTupleImporter> Create(TTree *sourceTree, std::string_view destFileName);
0255 
0256    ROOT::RNTupleWriteOptions GetWriteOptions() const { return fWriteOptions; }
0257    void SetWriteOptions(ROOT::RNTupleWriteOptions options) { fWriteOptions = options; }
0258    void SetNTupleName(const std::string &name) { fNTupleName = name; }
0259    void SetMaxEntries(std::uint64_t maxEntries) { fMaxEntries = maxEntries; };
0260 
0261    /// Whereas branch names may contain dots, RNTuple field names may not. By setting this option, dot characters are
0262    /// automatically converted into underscores to prevent the importer from throwing an exception.
0263    void SetConvertDotsInBranchNames(bool value) { fConvertDotsInBranchNames = value; }
0264 
0265    /// Whether or not information and progress is printed to stdout.
0266    void SetIsQuiet(bool value) { fIsQuiet = value; }
0267 
0268    /// Add custom method to adjust column representations.  Will be called for every field of the frozen model
0269    /// before it is attached to the page sink
0270    void SetFieldModifier(FieldModifier_t modifier) { fFieldModifier = modifier; }
0271 
0272    /// Import works in two steps:
0273    /// 1. PrepareSchema() calls SetBranchAddress() on all the TTree branches and creates the corresponding RNTuple
0274    ///    fields and the model
0275    /// 2. An event loop reads every entry from the TTree, applies transformations where necessary, and writes the
0276    ///    output entry to the RNTuple.
0277    void Import();
0278 }; // class RNTupleImporter
0279 
0280 } // namespace Experimental
0281 } // namespace ROOT
0282 
0283 #endif