Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-17 09:14:35

0001 // Author: Stefan Wunsch CERN  04/2019
0002 
0003 /*************************************************************************
0004  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers.               *
0005  * All rights reserved.                                                  *
0006  *                                                                       *
0007  * For the licensing terms see $ROOTSYS/LICENSE.                         *
0008  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
0009  *************************************************************************/
0010 
0011 #include <ROOT/RDataFrame.hxx>
0012 #include <ROOT/RDataSource.hxx>
0013 #include <ROOT/RVec.hxx>
0014 #include <ROOT/TSeq.hxx>
0015 
0016 #include <algorithm>
0017 #include <functional>
0018 #include <map>
0019 #include <memory>
0020 #include <string>
0021 #include <tuple>
0022 #include <typeinfo>
0023 #include <utility>
0024 #include <vector>
0025 
0026 #ifndef ROOT_RVECDS
0027 #define ROOT_RVECDS
0028 
0029 namespace ROOT {
0030 
0031 namespace Internal {
0032 
0033 namespace RDF {
0034 
0035 ////////////////////////////////////////////////////////////////////////////////////////////////
0036 /// \brief A RDataSource implementation which takes a collection of RVecs, which
0037 /// are able to adopt data from Numpy arrays
0038 ///
0039 /// This component allows to create a data source on a set of columns with data
0040 /// coming from RVecs. The adoption of externally provided data, e.g., via Numpy
0041 /// arrays, with RVecs allows to read arbitrary data from memory.
0042 /// In addition, the data source has to keep a reference on the Python owned data
0043 /// so that the lifetime of the data is tied to the datasource.
0044 template <typename... ColumnTypes>
0045 class RVecDS final : public ROOT::RDF::RDataSource {
0046    using PointerHolderPtrs_t = std::vector<ROOT::Internal::TDS::TPointerHolder *>;
0047 
0048    std::tuple<ROOT::RVec<ColumnTypes>...> fColumns;
0049    const std::vector<std::string> fColNames;
0050    const std::map<std::string, std::string> fColTypesMap;
0051    // The role of the fPointerHoldersModels is to be initialised with the pack
0052    // of arguments in the constrcutor signature at construction time
0053    // Once the number of slots is known, the fPointerHolders are initialised
0054    // according to the models.
0055    const PointerHolderPtrs_t fPointerHoldersModels;
0056    std::vector<PointerHolderPtrs_t> fPointerHolders;
0057    std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
0058    std::function<void()> fDeleteRVecs;
0059 
0060    Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
0061    {
0062       auto colNameStr = std::string(colName);
0063       // This could be optimised and done statically
0064       const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
0065       auto it = fColTypesMap.find(colNameStr);
0066       if (fColTypesMap.end() == it) {
0067          std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
0068          throw std::runtime_error(err);
0069       }
0070 
0071       const auto colIdName = it->second;
0072       if (colIdName != idName) {
0073          std::string err = "Column " + colNameStr + " has type " + colIdName +
0074                            " while the id specified is associated to type " + idName;
0075          throw std::runtime_error(err);
0076       }
0077 
0078       const auto colBegin = fColNames.begin();
0079       const auto colEnd = fColNames.end();
0080       const auto namesIt = std::find(colBegin, colEnd, colName);
0081       const auto index = std::distance(colBegin, namesIt);
0082 
0083       Record_t ret(fNSlots);
0084       for (auto slot : ROOT::TSeqU(fNSlots)) {
0085          ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
0086       }
0087       return ret;
0088    }
0089 
0090    size_t GetEntriesNumber() { return std::get<0>(fColumns).size(); }
0091    template <std::size_t... S>
0092    void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
0093    {
0094       std::initializer_list<int> expander{
0095          (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = std::get<S>(fColumns)[entry], 0)...};
0096       (void)expander; // avoid unused variable warnings
0097    }
0098 
0099    template <std::size_t... S>
0100    void ColLengthChecker(std::index_sequence<S...>)
0101    {
0102       if (sizeof...(S) < 2)
0103          return;
0104 
0105       const std::vector<size_t> colLengths{std::get<S>(fColumns).size()...};
0106       const auto expectedLen = colLengths[0];
0107       std::string err;
0108       for (auto i : TSeqI(1, colLengths.size())) {
0109          if (expectedLen != colLengths[i]) {
0110             err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
0111                    "\" have different lengths: " + std::to_string(expectedLen) + " and " +
0112                    std::to_string(colLengths[i]);
0113          }
0114       }
0115       if (!err.empty()) {
0116          throw std::runtime_error(err);
0117       }
0118    }
0119 
0120 protected:
0121    std::string AsString() { return "Numpy data source"; };
0122 
0123 public:
0124    RVecDS(std::function<void()> deleteRVecs, std::pair<std::string, ROOT::RVec<ColumnTypes>> const &...colsNameVals)
0125       : fColumns(colsNameVals.second...),
0126         fColNames{colsNameVals.first...},
0127         fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
0128         fPointerHoldersModels({new ROOT::Internal::TDS::TTypedPointerHolder<ColumnTypes>(new ColumnTypes())...}),
0129         fDeleteRVecs(deleteRVecs)
0130    {
0131    }
0132 
0133    // Rule of five
0134    RVecDS(const RVecDS &) = delete;
0135    RVecDS &operator=(const RVecDS &) = delete;
0136    RVecDS(RVecDS &&) = delete;
0137    RVecDS &operator=(RVecDS &&) = delete;
0138    ~RVecDS() final
0139    {
0140       for (auto &&ptrHolderv : fPointerHolders) {
0141          for (auto &&ptrHolder : ptrHolderv) {
0142             delete ptrHolder;
0143          }
0144       }
0145       // Release the data associated to this data source
0146       fDeleteRVecs();
0147    }
0148 
0149    const std::vector<std::string> &GetColumnNames() const { return fColNames; }
0150 
0151    std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
0152    {
0153       auto entryRanges(std::move(fEntryRanges)); // empty fEntryRanges
0154       return entryRanges;
0155    }
0156 
0157    std::string GetTypeName(std::string_view colName) const
0158    {
0159       const auto key = std::string(colName);
0160       return fColTypesMap.at(key);
0161    }
0162 
0163    bool HasColumn(std::string_view colName) const
0164    {
0165       const auto key = std::string(colName);
0166       const auto endIt = fColTypesMap.end();
0167       return endIt != fColTypesMap.find(key);
0168    }
0169 
0170    bool SetEntry(unsigned int slot, ULong64_t entry)
0171    {
0172       SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
0173       return true;
0174    }
0175 
0176    void SetNSlots(unsigned int nSlots) final
0177    {
0178       fNSlots = nSlots;
0179       const auto nCols = fColNames.size();
0180       fPointerHolders.resize(nCols); // now we need to fill it with the slots, all of the same type
0181       auto colIndex = 0U;
0182       for (auto &&ptrHolderv : fPointerHolders) {
0183          for (auto slot : ROOT::TSeqI(fNSlots)) {
0184             auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
0185             ptrHolderv.emplace_back(ptrHolder);
0186             (void)slot;
0187          }
0188          colIndex++;
0189       }
0190       for (auto &&ptrHolder : fPointerHoldersModels)
0191          delete ptrHolder;
0192    }
0193 
0194    void Initialize()
0195    {
0196       ColLengthChecker(std::index_sequence_for<ColumnTypes...>());
0197       const auto nEntries = GetEntriesNumber();
0198       const auto nEntriesInRange = nEntries / fNSlots; // between integers. Should make smaller?
0199       auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
0200       fEntryRanges.resize(fNSlots);
0201       auto init = 0ULL;
0202       auto end = 0ULL;
0203       for (auto &&range : fEntryRanges) {
0204          end = init + nEntriesInRange;
0205          if (0 != reminder) { // Distribute the reminder among the first chunks
0206             reminder--;
0207             end += 1;
0208          }
0209          range.first = init;
0210          range.second = end;
0211          init = end;
0212       }
0213    }
0214 
0215    std::string GetLabel() { return "RVecDS"; }
0216 };
0217 
0218 // Factory to create datasource able to read Numpy arrays through RVecs.
0219 // \param pyRVecs Pointer to PyObject holding RVecs.
0220 //                The RVecs itself hold a reference to the associated Numpy arrays so that
0221 //                the data cannot go out of scope as long as the datasource survives.
0222 template <typename... ColumnTypes>
0223 std::unique_ptr<RDataFrame>
0224 MakeRVecDataFrame(std::function<void()> deleteRVecs,
0225                   std::pair<std::string, ROOT::RVec<ColumnTypes>> const &...colNameProxyPairs)
0226 {
0227    return std::make_unique<RDataFrame>(std::make_unique<RVecDS<ColumnTypes...>>(deleteRVecs, colNameProxyPairs...));
0228 }
0229 
0230 } // namespace RDF
0231 } // namespace Internal
0232 } // namespace ROOT
0233 
0234 #endif // ROOT_RNUMPYDS