File indexing completed on 2025-12-10 10:30:15
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #include <ROOT/RDataFrame.hxx>
0012 #include <ROOT/RDataSource.hxx>
0013 #include <ROOT/RVec.hxx>
0014 #include <ROOT/TSeq.hxx>
0015
0016 #include <algorithm>
0017 #include <functional>
0018 #include <map>
0019 #include <memory>
0020 #include <string>
0021 #include <tuple>
0022 #include <typeinfo>
0023 #include <utility>
0024 #include <vector>
0025
0026 #ifndef ROOT_RVECDS
0027 #define ROOT_RVECDS
0028
0029 namespace ROOT {
0030
0031 namespace Internal {
0032
0033 namespace RDF {
0034
0035 class R__CLING_PTRCHECK(off) RVecDSColumnReader final : public ROOT::Detail::RDF::RColumnReaderBase {
0036 TPointerHolder *fPtrHolder;
0037 void *GetImpl(Long64_t) final { return fPtrHolder->GetPointer(); }
0038
0039 public:
0040 RVecDSColumnReader(TPointerHolder *ptrHolder) : fPtrHolder(ptrHolder) {}
0041 };
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052 template <typename... ColumnTypes>
0053 class RVecDS final : public ROOT::RDF::RDataSource {
0054 using PointerHolderPtrs_t = std::vector<ROOT::Internal::RDF::TPointerHolder *>;
0055
0056 std::tuple<ROOT::RVec<ColumnTypes>...> fColumns;
0057 std::vector<std::string> fColNames;
0058 std::unordered_map<std::string, std::string> fColTypesMap;
0059
0060
0061
0062
0063 PointerHolderPtrs_t fPointerHoldersModels;
0064 std::vector<PointerHolderPtrs_t> fPointerHolders;
0065 std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
0066 std::function<void()> fDeleteRVecs;
0067
0068 Record_t GetColumnReadersImpl(std::string_view, const std::type_info &) { return {}; }
0069
0070 size_t GetEntriesNumber() { return std::get<0>(fColumns).size(); }
0071 template <std::size_t... S>
0072 void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
0073 {
0074 std::initializer_list<int> expander{
0075 (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = std::get<S>(fColumns)[entry], 0)...};
0076 (void)expander;
0077 }
0078
0079 template <std::size_t... S>
0080 void ColLengthChecker(std::index_sequence<S...>)
0081 {
0082 if (sizeof...(S) < 2)
0083 return;
0084
0085 const std::vector<size_t> colLengths{std::get<S>(fColumns).size()...};
0086 const auto expectedLen = colLengths[0];
0087 std::string err;
0088 for (auto i : TSeqI(1, colLengths.size())) {
0089 if (expectedLen != colLengths[i]) {
0090 err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
0091 "\" have different lengths: " + std::to_string(expectedLen) + " and " +
0092 std::to_string(colLengths[i]);
0093 }
0094 }
0095 if (!err.empty()) {
0096 throw std::runtime_error(err);
0097 }
0098 }
0099
0100 protected:
0101 std::string AsString() { return "Numpy data source"; };
0102
0103 public:
0104 RVecDS(std::function<void()> deleteRVecs, std::pair<std::string, ROOT::RVec<ColumnTypes>> const &...colsNameVals)
0105 : fColumns(colsNameVals.second...),
0106 fColNames{colsNameVals.first...},
0107 fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
0108 fPointerHoldersModels({new ROOT::Internal::RDF::TTypedPointerHolder<ColumnTypes>(new ColumnTypes())...}),
0109 fDeleteRVecs(deleteRVecs)
0110 {
0111 }
0112
0113
0114 RVecDS(const RVecDS &) = delete;
0115 RVecDS &operator=(const RVecDS &) = delete;
0116 RVecDS(RVecDS &&) = delete;
0117 RVecDS &operator=(RVecDS &&) = delete;
0118 ~RVecDS() final
0119 {
0120 for (auto &&ptrHolderv : fPointerHolders) {
0121 for (auto &&ptrHolder : ptrHolderv) {
0122 delete ptrHolder;
0123 }
0124 }
0125
0126 fDeleteRVecs();
0127 }
0128
0129 std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
0130 GetColumnReaders(unsigned int slot, std::string_view colName, const std::type_info &id) final
0131 {
0132 auto colNameStr = std::string(colName);
0133
0134 auto it = fColTypesMap.find(colNameStr);
0135 if (fColTypesMap.end() == it) {
0136 std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
0137 throw std::runtime_error(err);
0138 }
0139
0140 const auto &colIdName = it->second;
0141 const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
0142 if (colIdName != idName) {
0143 std::string err = "Column " + colNameStr + " has type " + colIdName +
0144 " while the id specified is associated to type " + idName;
0145 throw std::runtime_error(err);
0146 }
0147
0148 if (auto colNameIt = std::find(fColNames.begin(), fColNames.end(), colNameStr); colNameIt != fColNames.end()) {
0149 const auto index = std::distance(fColNames.begin(), colNameIt);
0150 return std::make_unique<ROOT::Internal::RDF::RVecDSColumnReader>(fPointerHolders[index][slot]);
0151 }
0152
0153 throw std::runtime_error("Could not find column name \"" + colNameStr + "\" in available column names.");
0154 }
0155
0156 const std::vector<std::string> &GetColumnNames() const { return fColNames; }
0157
0158 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
0159 {
0160 auto entryRanges(std::move(fEntryRanges));
0161 return entryRanges;
0162 }
0163
0164 std::string GetTypeName(std::string_view colName) const
0165 {
0166 const auto key = std::string(colName);
0167 return fColTypesMap.at(key);
0168 }
0169
0170 bool HasColumn(std::string_view colName) const
0171 {
0172 const auto key = std::string(colName);
0173 const auto endIt = fColTypesMap.end();
0174 return endIt != fColTypesMap.find(key);
0175 }
0176
0177 bool SetEntry(unsigned int slot, ULong64_t entry)
0178 {
0179 SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
0180 return true;
0181 }
0182
0183 void SetNSlots(unsigned int nSlots) final
0184 {
0185 fNSlots = nSlots;
0186 const auto nCols = fColNames.size();
0187 fPointerHolders.resize(nCols);
0188 auto colIndex = 0U;
0189 for (auto &&ptrHolderv : fPointerHolders) {
0190 for (auto slot : ROOT::TSeqI(fNSlots)) {
0191 auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
0192 ptrHolderv.emplace_back(ptrHolder);
0193 (void)slot;
0194 }
0195 colIndex++;
0196 }
0197 for (auto &&ptrHolder : fPointerHoldersModels)
0198 delete ptrHolder;
0199 }
0200
0201 void Initialize()
0202 {
0203 ColLengthChecker(std::index_sequence_for<ColumnTypes...>());
0204 const auto nEntries = GetEntriesNumber();
0205 const auto nEntriesInRange = nEntries / fNSlots;
0206 auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
0207 fEntryRanges.resize(fNSlots);
0208 auto init = 0ULL;
0209 auto end = 0ULL;
0210 for (auto &&range : fEntryRanges) {
0211 end = init + nEntriesInRange;
0212 if (0 != reminder) {
0213 reminder--;
0214 end += 1;
0215 }
0216 range.first = init;
0217 range.second = end;
0218 init = end;
0219 }
0220 }
0221
0222 std::string GetLabel() { return "RVecDS"; }
0223 };
0224
0225
0226
0227
0228
0229 template <typename... ColumnTypes>
0230 std::unique_ptr<RDataFrame>
0231 MakeRVecDataFrame(std::function<void()> deleteRVecs,
0232 std::pair<std::string, ROOT::RVec<ColumnTypes>> const &...colNameProxyPairs)
0233 {
0234 return std::make_unique<RDataFrame>(std::make_unique<RVecDS<ColumnTypes...>>(deleteRVecs, colNameProxyPairs...));
0235 }
0236
0237 }
0238 }
0239 }
0240
0241 #endif