File indexing completed on 2025-01-18 10:10:34
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef ROOT_RLAZYDSIMPL
0012 #define ROOT_RLAZYDSIMPL
0013
0014 #include "ROOT/RDataSource.hxx"
0015 #include "ROOT/RResultPtr.hxx"
0016 #include "ROOT/TSeq.hxx"
0017
0018 #include <algorithm>
0019 #include <map>
0020 #include <memory>
0021 #include <tuple>
0022 #include <string>
0023 #include <typeinfo>
0024 #include <utility> // std::index_sequence
0025 #include <vector>
0026
0027 namespace ROOT {
0028
0029 namespace RDF {
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040 template <typename... ColumnTypes>
0041 class RLazyDS final : public ROOT::RDF::RDataSource {
0042 using PointerHolderPtrs_t = std::vector<ROOT::Internal::TDS::TPointerHolder *>;
0043
0044 std::tuple<RResultPtr<std::vector<ColumnTypes>>...> fColumns;
0045 const std::vector<std::string> fColNames;
0046 const std::map<std::string, std::string> fColTypesMap;
0047
0048
0049
0050
0051 const PointerHolderPtrs_t fPointerHoldersModels;
0052 std::vector<PointerHolderPtrs_t> fPointerHolders;
0053 std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
0054 unsigned int fNSlots{0};
0055
0056 Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id) final
0057 {
0058 auto colNameStr = std::string(colName);
0059
0060 const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
0061 auto it = fColTypesMap.find(colNameStr);
0062 if (fColTypesMap.end() == it) {
0063 std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
0064 throw std::runtime_error(err);
0065 }
0066
0067 const auto colIdName = it->second;
0068 if (colIdName != idName) {
0069 std::string err = "Column " + colNameStr + " has type " + colIdName +
0070 " while the id specified is associated to type " + idName;
0071 throw std::runtime_error(err);
0072 }
0073
0074 const auto colBegin = fColNames.begin();
0075 const auto colEnd = fColNames.end();
0076 const auto namesIt = std::find(colBegin, colEnd, colName);
0077 const auto index = std::distance(colBegin, namesIt);
0078
0079 Record_t ret(fNSlots);
0080 for (auto slot : ROOT::TSeqU(fNSlots)) {
0081 ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
0082 }
0083 return ret;
0084 }
0085
0086 size_t GetEntriesNumber() { return std::get<0>(fColumns)->size(); }
0087 template <std::size_t... S>
0088 void SetEntryHelper(unsigned int slot, ULong64_t entry, std::index_sequence<S...>)
0089 {
0090 std::initializer_list<int> expander{
0091 (*static_cast<ColumnTypes *>(fPointerHolders[S][slot]->GetPointer()) = (*std::get<S>(fColumns))[entry], 0)...};
0092 (void)expander;
0093 }
0094
0095 template <std::size_t... S>
0096 void ColLengthChecker(std::index_sequence<S...>)
0097 {
0098 if (sizeof...(S) < 2)
0099 return;
0100
0101 const std::vector<size_t> colLengths{std::get<S>(fColumns)->size()...};
0102 const auto expectedLen = colLengths[0];
0103 std::string err;
0104 for (auto i : TSeqI(1, colLengths.size())) {
0105 if (expectedLen != colLengths[i]) {
0106 err += "Column \"" + fColNames[i] + "\" and column \"" + fColNames[0] +
0107 "\" have different lengths: " + std::to_string(expectedLen) + " and " +
0108 std::to_string(colLengths[i]);
0109 }
0110 }
0111 if (!err.empty()) {
0112 throw std::runtime_error(err);
0113 }
0114 }
0115
0116 protected:
0117 std::string AsString() final { return "lazy data source"; };
0118
0119 public:
0120 RLazyDS(std::pair<std::string, RResultPtr<std::vector<ColumnTypes>>>... colsNameVals)
0121 : fColumns(std::tuple<RResultPtr<std::vector<ColumnTypes>>...>(colsNameVals.second...)),
0122 fColNames({colsNameVals.first...}),
0123 fColTypesMap({{colsNameVals.first, ROOT::Internal::RDF::TypeID2TypeName(typeid(ColumnTypes))}...}),
0124 fPointerHoldersModels({new ROOT::Internal::TDS::TTypedPointerHolder<ColumnTypes>(new ColumnTypes())...})
0125 {
0126 }
0127
0128 ~RLazyDS()
0129 {
0130 for (auto &&ptrHolderv : fPointerHolders) {
0131 for (auto &&ptrHolder : ptrHolderv) {
0132 delete ptrHolder;
0133 }
0134 }
0135 }
0136
0137 const std::vector<std::string> &GetColumnNames() const final { return fColNames; }
0138
0139 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final
0140 {
0141 auto entryRanges(std::move(fEntryRanges));
0142 return entryRanges;
0143 }
0144
0145 std::string GetTypeName(std::string_view colName) const final
0146 {
0147 const auto key = std::string(colName);
0148 return fColTypesMap.at(key);
0149 }
0150
0151 bool HasColumn(std::string_view colName) const final
0152 {
0153 const auto key = std::string(colName);
0154 const auto endIt = fColTypesMap.end();
0155 return endIt != fColTypesMap.find(key);
0156 }
0157
0158 bool SetEntry(unsigned int slot, ULong64_t entry) final
0159 {
0160 SetEntryHelper(slot, entry, std::index_sequence_for<ColumnTypes...>());
0161 return true;
0162 }
0163
0164 void SetNSlots(unsigned int nSlots) final
0165 {
0166 fNSlots = nSlots;
0167 const auto nCols = fColNames.size();
0168 fPointerHolders.resize(nCols);
0169 auto colIndex = 0U;
0170 for (auto &&ptrHolderv : fPointerHolders) {
0171 for (auto slot : ROOT::TSeqI(fNSlots)) {
0172 auto ptrHolder = fPointerHoldersModels[colIndex]->GetDeepCopy();
0173 ptrHolderv.emplace_back(ptrHolder);
0174 (void)slot;
0175 }
0176 colIndex++;
0177 }
0178 for (auto &&ptrHolder : fPointerHoldersModels)
0179 delete ptrHolder;
0180 }
0181
0182 void Initialize() final
0183 {
0184 ColLengthChecker(std::index_sequence_for<ColumnTypes...>());
0185 const auto nEntries = GetEntriesNumber();
0186 const auto nEntriesInRange = nEntries / fNSlots;
0187 auto reminder = 1U == fNSlots ? 0 : nEntries % fNSlots;
0188 fEntryRanges.resize(fNSlots);
0189 auto init = 0ULL;
0190 auto end = 0ULL;
0191 for (auto &&range : fEntryRanges) {
0192 end = init + nEntriesInRange;
0193 if (0 != reminder) {
0194 reminder--;
0195 end += 1;
0196 }
0197 range.first = init;
0198 range.second = end;
0199 init = end;
0200 }
0201 }
0202
0203 std::string GetLabel() final { return "LazyDS"; }
0204 };
0205
0206 }
0207
0208 }
0209
0210 #endif