Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-12-10 10:30:15

0001 // Author: Stefan Wunsch CERN  04/2019
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #include <ROOT/RDataFrame.hxx>
0012 #include <ROOT/RDataSource.hxx>
0013 #include <ROOT/RVec.hxx>
0014 #include <ROOT/TSeq.hxx>
0015 
0016 #include <algorithm>
0017 #include <functional>
0018 #include <map>
0019 #include <memory>
0020 #include <string>
0021 #include <tuple>
0022 #include <typeinfo>
0023 #include <utility>
0024 #include <vector>
0025 
0026 #ifndef ROOT_RVECDS
0027 #define ROOT_RVECDS
0028 
0029 namespace ROOT {
0030 
0031 namespace Internal {
0032 
0033 namespace RDF {
0034 
0035 class R__CLING_PTRCHECK(off) RVecDSColumnReader final : public ROOT::Detail::RDF::RColumnReaderBase {
0036    TPointerHolder *fPtrHolder;
0037    void *GetImpl(Long64_t) final { return fPtrHolder->GetPointer(); }
0038 
0039 public:
0040    RVecDSColumnReader(TPointerHolder *ptrHolder) : fPtrHolder(ptrHolder) {}
0041 };
0042 
0043 ////////////////////////////////////////////////////////////////////////////////////////////////
0044 /// \brief A RDataSource implementation which takes a collection of RVecs, which
0045 /// are able to adopt data from Numpy arrays
0046 ///
0047 /// This component allows to create a data source on a set of columns with data
0048 /// coming from RVecs. The adoption of externally provided data, e.g., via Numpy
0049 /// arrays, with RVecs allows to read arbitrary data from memory.
0050 /// In addition, the data source has to keep a reference on the Python owned data
0051 /// so that the lifetime of the data is tied to the datasource.
0052 template <typename... ColumnTypes>
0053 class RVecDS final : public ROOT::RDF::RDataSource {
0054    using PointerHolderPtrs_t = std::vector<ROOT::Internal::RDF::TPointerHolder *>;
0055 
0056    std::tuple<ROOT::RVec<ColumnTypes>...> fColumns;
0057    std::vector<std::string> fColNames;
0058    std::unordered_map<std::string, std::string> fColTypesMap;
0059    // The role of the fPointerHoldersModels is to be initialised with the pack
0060    // of arguments in the constrcutor signature at construction time
0061    // Once the number of slots is known, the fPointerHolders are initialised
0062    // according to the models.
0063    PointerHolderPtrs_t fPointerHoldersModels;
0064    std::vector<PointerHolderPtrs_t> fPointerHolders;
0065    std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
0066    std::function<void()> fDeleteRVecs;
0067 
0068    Record_t GetColumnReadersImpl(std::string_view, const std::type_info &) { return {}; }
0069 
0070    size_t GetEntriesNumber() { return std::get<0>(fColumns).size(); }
0071    template <std::size_t... S>
0072    void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
0073    {
0074       std::initializer_list<int> expander{
0075          (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = std::get<S>(fColumns)[entry], 0)...};
0076       (void)expander; // avoid unused variable warnings
0077    }
0078 
0079    template <std::size_t... S>
0080    void ColLengthChecker(std::index_sequence<S...>)
0081    {
0082       if (sizeof...(S) < 2)
0083          return;
0084 
0085       const std::vector<size_t> colLengths{std::get<S>(fColumns).size()...};
0086       const auto expectedLen = colLengths[0];
0087       std::string err;
0088       for (auto i : TSeqI(1, colLengths.size())) {
0089          if (expectedLen != colLengths[i]) {
0090             err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
0091                    "\" have different lengths: " + std::to_string(expectedLen) + " and " +
0092                    std::to_string(colLengths[i]);
0093          }
0094       }
0095       if (!err.empty()) {
0096          throw std::runtime_error(err);
0097       }
0098    }
0099 
0100 protected:
0101    std::string AsString() { return "Numpy data source"; };
0102 
0103 public:
0104    RVecDS(std::function<void()> deleteRVecs, std::pair<std::string, ROOT::RVec<ColumnTypes>> const &...colsNameVals)
0105       : fColumns(colsNameVals.second...),
0106         fColNames{colsNameVals.first...},
0107         fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
0108         fPointerHoldersModels({new ROOT::Internal::RDF::TTypedPointerHolder<ColumnTypes>(new ColumnTypes())...}),
0109         fDeleteRVecs(deleteRVecs)
0110    {
0111    }
0112 
0113    // Rule of five
0114    RVecDS(const RVecDS &) = delete;
0115    RVecDS &operator=(const RVecDS &) = delete;
0116    RVecDS(RVecDS &&) = delete;
0117    RVecDS &operator=(RVecDS &&) = delete;
0118    ~RVecDS() final
0119    {
0120       for (auto &&ptrHolderv : fPointerHolders) {
0121          for (auto &&ptrHolder : ptrHolderv) {
0122             delete ptrHolder;
0123          }
0124       }
0125       // Release the data associated to this data source
0126       fDeleteRVecs();
0127    }
0128 
0129    std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
0130    GetColumnReaders(unsigned int slot, std::string_view colName, const std::type_info &id) final
0131    {
0132       auto colNameStr = std::string(colName);
0133 
0134       auto it = fColTypesMap.find(colNameStr);
0135       if (fColTypesMap.end() == it) {
0136          std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
0137          throw std::runtime_error(err);
0138       }
0139 
0140       const auto &colIdName = it->second;
0141       const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
0142       if (colIdName != idName) {
0143          std::string err = "Column " + colNameStr + " has type " + colIdName +
0144                            " while the id specified is associated to type " + idName;
0145          throw std::runtime_error(err);
0146       }
0147 
0148       if (auto colNameIt = std::find(fColNames.begin(), fColNames.end(), colNameStr); colNameIt != fColNames.end()) {
0149          const auto index = std::distance(fColNames.begin(), colNameIt);
0150          return std::make_unique<ROOT::Internal::RDF::RVecDSColumnReader>(fPointerHolders[index][slot]);
0151       }
0152 
0153       throw std::runtime_error("Could not find column name \"" + colNameStr + "\" in available column names.");
0154    }
0155 
0156    const std::vector<std::string> &GetColumnNames() const { return fColNames; }
0157 
0158    std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
0159    {
0160       auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
0161       return entryRanges;
0162    }
0163 
0164    std::string GetTypeName(std::string_view colName) const
0165    {
0166       const auto key = std::string(colName);
0167       return fColTypesMap.at(key);
0168    }
0169 
0170    bool HasColumn(std::string_view colName) const
0171    {
0172       const auto key = std::string(colName);
0173       const auto endIt = fColTypesMap.end();
0174       return endIt != fColTypesMap.find(key);
0175    }
0176 
0177    bool SetEntry(unsigned int slot, ULong64_t entry)
0178    {
0179       SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
0180       return true;
0181    }
0182 
0183    void SetNSlots(unsigned int nSlots) final
0184    {
0185       fNSlots = nSlots;
0186       const auto nCols = fColNames.size();
0187       fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
0188       auto colIndex = 0U;
0189       for (auto &&ptrHolderv : fPointerHolders) {
0190          for (auto slot : ROOT::TSeqI(fNSlots)) {
0191             auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
0192             ptrHolderv.emplace_back(ptrHolder);
0193             (void)slot;
0194          }
0195          colIndex++;
0196       }
0197       for (auto &&ptrHolder : fPointerHoldersModels)
0198          delete ptrHolder;
0199    }
0200 
0201    void Initialize()
0202    {
0203       ColLengthChecker(std::index_sequence_for<ColumnTypes...>());
0204       const auto nEntries = GetEntriesNumber();
0205       const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
0206       auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
0207       fEntryRanges.resize(fNSlots);
0208       auto init = 0ULL;
0209       auto end = 0ULL;
0210       for (auto &&range : fEntryRanges) {
0211          end = init + nEntriesInRange;
0212          if (0 != reminder) { // Distribute the reminder among the first chunks
0213             reminder--;
0214             end += 1;
0215          }
0216          range.first = init;
0217          range.second = end;
0218          init = end;
0219       }
0220    }
0221 
0222    std::string GetLabel() { return "RVecDS"; }
0223 };
0224 
0225 // Factory to create datasource able to read Numpy arrays through RVecs.
0226 // \param pyRVecs Pointer to PyObject holding RVecs.
0227 //                The RVecs itself hold a reference to the associated Numpy arrays so that
0228 //                the data cannot go out of scope as long as the datasource survives.
0229 template <typename... ColumnTypes>
0230 std::unique_ptr<RDataFrame>
0231 MakeRVecDataFrame(std::function<void()> deleteRVecs,
0232                   std::pair<std::string, ROOT::RVec<ColumnTypes>> const &...colNameProxyPairs)
0233 {
0234    return std::make_unique<RDataFrame>(std::make_unique<RVecDS<ColumnTypes...>>(deleteRVecs, colNameProxyPairs...));
0235 }
0236 
0237 } // namespace RDF
0238 } // namespace Internal
0239 } // namespace ROOT
0240 
0241 #endif // ROOT_RNUMPYDS