Warning, file /include/root/ROOT/TThreadExecutor.hxx was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012 #ifndef ROOT_TThreadExecutor
0013 #define ROOT_TThreadExecutor
0014
0015 #include "RConfigure.h"
0016
0017
0018 #ifndef R__USE_IMT
0019
0020 # if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
0021 # error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
0022 # endif
0023 #else
0024
0025 #include "ROOT/TExecutorCRTP.hxx"
0026 #include "ROOT/TSeq.hxx"
0027 #include "ROOT/TypeTraits.hxx" // InvokeResult
0028 #include "RTaskArena.hxx"
0029 #include "TError.h"
0030
0031 #include <functional> //std::function
0032 #include <initializer_list>
0033 #include <memory>
0034 #include <numeric> //std::accumulate
0035 #include <type_traits> //std::enable_if
0036 #include <utility> //std::move
0037 #include <vector>
0038
0039 namespace ROOT {
0040
0041 class TThreadExecutor: public TExecutorCRTP<TThreadExecutor> {
0042 friend TExecutorCRTP;
0043
0044 public:
0045
0046 explicit TThreadExecutor(UInt_t nThreads = 0u);
0047
0048 TThreadExecutor(const TThreadExecutor &) = delete;
0049 TThreadExecutor &operator=(const TThreadExecutor &) = delete;
0050
0051
0052
0053 template<class F>
0054 void Foreach(F func, unsigned nTimes, unsigned nChunks = 0);
0055 template<class F, class INTEGER>
0056 void Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks = 0);
0057 template<class F, class T>
0058 void Foreach(F func, std::initializer_list<T> args, unsigned nChunks = 0);
0059 template<class F, class T>
0060 void Foreach(F func, std::vector<T> &args, unsigned nChunks = 0);
0061 template<class F, class T>
0062 void Foreach(F func, const std::vector<T> &args, unsigned nChunks = 0);
0063
0064
0065
0066 using TExecutorCRTP<TThreadExecutor>::Map;
0067
0068
0069 template <class F, class R, class Cond = validMapReturnCond<F>>
0070 auto Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>;
0071 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
0072 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
0073 -> std::vector<InvokeResult_t<F, INTEGER>>;
0074 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0075 auto Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
0076 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0077 auto Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
0078 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0079 auto Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F, T>>;
0080
0081
0082
0083
0084
0085
0086
0087
0088
0089 using TExecutorCRTP<TThreadExecutor>::MapReduce;
0090 template <class F, class R, class Cond = validMapReturnCond<F>>
0091 auto MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>;
0092 template <class F, class R, class Cond = validMapReturnCond<F>>
0093 auto MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>;
0094 template <class F, class INTEGER, class R, class Cond = validMapReturnCond<F, INTEGER>>
0095 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, INTEGER>;
0096 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0097 auto MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
0098 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0099 auto MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
0100 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0101 auto MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>;
0102 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0103 auto MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
0104 template <class F, class T, class R, class Cond = validMapReturnCond<F, T>>
0105 auto MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>;
0106
0107 using TExecutorCRTP<TThreadExecutor>::Reduce;
0108 template<class T, class R> auto Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
0109 template<class T, class BINARYOP> auto Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
0110
0111 unsigned GetPoolSize() const;
0112
0113 private:
0114
0115
0116 template <class F, class Cond = validMapReturnCond<F>>
0117 auto MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>;
0118 template <class F, class INTEGER, class Cond = validMapReturnCond<F, INTEGER>>
0119 auto MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>;
0120 template <class F, class T, class Cond = validMapReturnCond<F, T>>
0121 auto MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
0122 template <class F, class T, class Cond = validMapReturnCond<F, T>>
0123 auto MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>;
0124
0125
0126 void ParallelFor(unsigned start, unsigned end, unsigned step, const std::function<void(unsigned int i)> &f);
0127 double ParallelReduce(const std::vector<double> &objs, const std::function<double(double a, double b)> &redfunc);
0128 float ParallelReduce(const std::vector<float> &objs, const std::function<float(float a, float b)> &redfunc);
0129 template<class T, class R>
0130 auto SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
0131
0132
0133 std::shared_ptr<ROOT::Internal::RTaskArenaWrapper> fTaskArenaW = nullptr;
0134 };
0135
0136
0137
0138
0139
0140
0141
0142
0143
0144 template<class F>
0145 void TThreadExecutor::Foreach(F func, unsigned nTimes, unsigned nChunks) {
0146 if (nChunks == 0) {
0147 ParallelFor(0U, nTimes, 1, [&](unsigned int){func();});
0148 return;
0149 }
0150
0151 unsigned step = (nTimes + nChunks - 1) / nChunks;
0152 auto lambda = [&](unsigned int i)
0153 {
0154 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
0155 func();
0156 }
0157 };
0158 ParallelFor(0U, nTimes, step, lambda);
0159 }
0160
0161
0162
0163
0164
0165
0166
0167 template<class F, class INTEGER>
0168 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args, unsigned nChunks) {
0169 if (nChunks == 0) {
0170 ParallelFor(*args.begin(), *args.end(), args.step(), [&](unsigned int i){func(i);});
0171 return;
0172 }
0173 unsigned start = *args.begin();
0174 unsigned end = *args.end();
0175 unsigned seqStep = args.step();
0176 unsigned step = (end - start + nChunks - 1) / nChunks;
0177
0178 auto lambda = [&](unsigned int i)
0179 {
0180 for (unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
0181 func(i + j);
0182 }
0183 };
0184 ParallelFor(start, end, step, lambda);
0185 }
0186
0187
0188
0189
0190
0191
0192
0193 template<class F, class T>
0194 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args, unsigned nChunks) {
0195 std::vector<T> vargs(std::move(args));
0196 Foreach(func, vargs, nChunks);
0197 }
0198
0199
0200
0201
0202
0203
0204
0205 template<class F, class T>
0206 void TThreadExecutor::Foreach(F func, std::vector<T> &args, unsigned nChunks) {
0207 unsigned int nToProcess = args.size();
0208 if (nChunks == 0) {
0209 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
0210 return;
0211 }
0212
0213 unsigned step = (nToProcess + nChunks - 1) / nChunks;
0214 auto lambda = [&](unsigned int i)
0215 {
0216 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
0217 func(args[i + j]);
0218 }
0219 };
0220 ParallelFor(0U, nToProcess, step, lambda);
0221 }
0222
0223
0224
0225
0226
0227
0228
0229 template<class F, class T>
0230 void TThreadExecutor::Foreach(F func, const std::vector<T> &args, unsigned nChunks) {
0231 unsigned int nToProcess = args.size();
0232 if (nChunks == 0) {
0233 ParallelFor(0U, nToProcess, 1, [&](unsigned int i){func(args[i]);});
0234 return;
0235 }
0236
0237 unsigned step = (nToProcess + nChunks - 1) / nChunks;
0238 auto lambda = [&](unsigned int i)
0239 {
0240 for (unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
0241 func(args[i + j]);
0242 }
0243 };
0244 ParallelFor(0U, nToProcess, step, lambda);
0245 }
0246
0247
0248
0249
0250
0251
0252 template <class F, class Cond>
0253 auto TThreadExecutor::MapImpl(F func, unsigned nTimes) -> std::vector<InvokeResult_t<F>>
0254 {
0255 using retType = decltype(func());
0256 std::vector<retType> reslist(nTimes);
0257 auto lambda = [&](unsigned int i)
0258 {
0259 reslist[i] = func();
0260 };
0261 ParallelFor(0U, nTimes, 1, lambda);
0262
0263 return reslist;
0264 }
0265
0266
0267
0268
0269
0270
0271 template <class F, class INTEGER, class Cond>
0272 auto TThreadExecutor::MapImpl(F func, ROOT::TSeq<INTEGER> args) -> std::vector<InvokeResult_t<F, INTEGER>>
0273 {
0274 using retType = decltype(func(*args.begin()));
0275 std::vector<retType> reslist(args.size());
0276 auto lambda = [&](unsigned int i) { reslist[i] = func(args[i]); };
0277 ParallelFor(0U, args.size(), 1, lambda);
0278
0279 return reslist;
0280 }
0281
0282
0283
0284
0285
0286
0287 template <class F, class R, class Cond>
0288 auto TThreadExecutor::Map(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> std::vector<InvokeResult_t<F>>
0289 {
0290 if (nChunks == 0)
0291 {
0292 return Map(func, nTimes);
0293 }
0294
0295 unsigned step = (nTimes + nChunks - 1) / nChunks;
0296
0297 unsigned actualChunks = (nTimes + step - 1) / step;
0298 using retType = decltype(func());
0299 std::vector<retType> reslist(actualChunks);
0300 auto lambda = [&](unsigned int i)
0301 {
0302 std::vector<retType> partialResults(std::min(nTimes-i, step));
0303 for (unsigned j = 0; j < step && (i + j) < nTimes; j++) {
0304 partialResults[j] = func();
0305 }
0306 reslist[i / step] = Reduce(partialResults, redfunc);
0307 };
0308 ParallelFor(0U, nTimes, step, lambda);
0309
0310 return reslist;
0311 }
0312
0313
0314
0315
0316
0317
0318 template <class F, class T, class Cond>
0319 auto TThreadExecutor::MapImpl(F func, std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
0320 {
0321
0322 using retType = decltype(func(args.front()));
0323
0324 unsigned int nToProcess = args.size();
0325 std::vector<retType> reslist(nToProcess);
0326
0327 auto lambda = [&](unsigned int i)
0328 {
0329 reslist[i] = func(args[i]);
0330 };
0331
0332 ParallelFor(0U, nToProcess, 1, lambda);
0333
0334 return reslist;
0335 }
0336
0337
0338
0339
0340
0341
0342 template <class F, class T, class Cond>
0343 auto TThreadExecutor::MapImpl(F func, const std::vector<T> &args) -> std::vector<InvokeResult_t<F, T>>
0344 {
0345
0346 using retType = decltype(func(args.front()));
0347
0348 unsigned int nToProcess = args.size();
0349 std::vector<retType> reslist(nToProcess);
0350
0351 auto lambda = [&](unsigned int i)
0352 {
0353 reslist[i] = func(args[i]);
0354 };
0355
0356 ParallelFor(0U, nToProcess, 1, lambda);
0357
0358 return reslist;
0359 }
0360
0361
0362
0363
0364
0365
0366 template <class F, class INTEGER, class R, class Cond>
0367 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
0368 -> std::vector<InvokeResult_t<F, INTEGER>>
0369 {
0370 if (nChunks == 0)
0371 {
0372 return Map(func, args);
0373 }
0374
0375 unsigned nToProcess = args.size();
0376 unsigned step = (nToProcess + nChunks - 1) / nChunks;
0377
0378 unsigned actualChunks = (nToProcess + step - 1) / step;
0379
0380 using retType = decltype(func(*args.begin()));
0381 std::vector<retType> reslist(actualChunks);
0382 auto lambda = [&](unsigned int i) {
0383 std::vector<retType> partialResults(std::min(step, nToProcess - i));
0384 for (unsigned j = 0; j < partialResults.size(); j++) {
0385 partialResults[j] = func(args[i + j]);
0386 }
0387 reslist[i / step] = Reduce(partialResults, redfunc);
0388 };
0389
0390 ParallelFor(0U, nToProcess, step, lambda);
0391
0392 return reslist;
0393 }
0394
0395
0396
0397
0398
0399
0400 template <class F, class T, class R, class Cond>
0401 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc, unsigned nChunks)
0402 -> std::vector<InvokeResult_t<F, T>>
0403 {
0404 if (nChunks == 0)
0405 {
0406 return Map(func, args);
0407 }
0408
0409 unsigned int nToProcess = args.size();
0410 unsigned step = (nToProcess + nChunks - 1) / nChunks;
0411
0412 unsigned actualChunks = (nToProcess + step - 1) / step;
0413
0414 using retType = decltype(func(args.front()));
0415 std::vector<retType> reslist(actualChunks);
0416 auto lambda = [&](unsigned int i) {
0417 std::vector<retType> partialResults(std::min(step, nToProcess - i));
0418 for (unsigned j = 0; j < partialResults.size(); j++) {
0419 partialResults[j] = func(args[i + j]);
0420 }
0421 reslist[i / step] = Reduce(partialResults, redfunc);
0422 };
0423
0424 ParallelFor(0U, nToProcess, step, lambda);
0425
0426 return reslist;
0427 }
0428
0429
0430
0431
0432
0433
0434 template <class F, class T, class R, class Cond>
0435 auto TThreadExecutor::Map(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
0436 -> std::vector<InvokeResult_t<F, T>>
0437 {
0438 if (nChunks == 0)
0439 {
0440 return Map(func, args);
0441 }
0442
0443 unsigned int nToProcess = args.size();
0444 unsigned step = (nToProcess + nChunks - 1) / nChunks;
0445
0446 unsigned actualChunks = (nToProcess + step - 1) / step;
0447
0448 using retType = decltype(func(args.front()));
0449 std::vector<retType> reslist(actualChunks);
0450 auto lambda = [&](unsigned int i) {
0451 std::vector<retType> partialResults(std::min(step, nToProcess - i));
0452 for (unsigned j = 0; j < partialResults.size(); j++) {
0453 partialResults[j] = func(args[i + j]);
0454 }
0455 reslist[i / step] = Reduce(partialResults, redfunc);
0456 };
0457
0458 ParallelFor(0U, nToProcess, step, lambda);
0459
0460 return reslist;
0461 }
0462
0463
0464
0465
0466
0467
0468 template <class F, class T, class R, class Cond>
0469 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
0470 -> std::vector<InvokeResult_t<F, T>>
0471 {
0472 std::vector<T> vargs(std::move(args));
0473 const auto &reslist = Map(func, vargs, redfunc, nChunks);
0474 return reslist;
0475 }
0476
0477
0478
0479
0480 template <class F, class R, class Cond>
0481 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc) -> InvokeResult_t<F>
0482 {
0483 return Reduce(Map(func, nTimes), redfunc);
0484 }
0485
0486
0487
0488
0489
0490
0491 template <class F, class R, class Cond>
0492 auto TThreadExecutor::MapReduce(F func, unsigned nTimes, R redfunc, unsigned nChunks) -> InvokeResult_t<F>
0493 {
0494 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
0495 }
0496
0497
0498
0499
0500
0501
0502 template <class F, class INTEGER, class R, class Cond>
0503 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc, unsigned nChunks)
0504 -> InvokeResult_t<F, INTEGER>
0505 {
0506 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
0507 }
0508
0509
0510
0511
0512
0513
0514 template <class F, class T, class R, class Cond>
0515 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc, unsigned nChunks)
0516 -> InvokeResult_t<F, T>
0517 {
0518 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
0519 }
0520
0521
0522
0523
0524 template <class F, class T, class R, class Cond>
0525 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
0526 {
0527 return Reduce(Map(func, args), redfunc);
0528 }
0529
0530
0531
0532
0533 template <class F, class T, class R, class Cond>
0534 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc) -> InvokeResult_t<F, T>
0535 {
0536 return Reduce(Map(func, args), redfunc);
0537 }
0538
0539
0540
0541
0542
0543
0544 template <class F, class T, class R, class Cond>
0545 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc, unsigned nChunks) -> InvokeResult_t<F, T>
0546 {
0547 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
0548 }
0549
0550
0551
0552
0553
0554
0555 template <class F, class T, class R, class Cond>
0556 auto TThreadExecutor::MapReduce(F func, const std::vector<T> &args, R redfunc, unsigned nChunks)
0557 -> InvokeResult_t<F, T>
0558 {
0559 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
0560 }
0561
0562
0563
0564 template<class T, class R>
0565 auto TThreadExecutor::Reduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
0566 {
0567
0568 static_assert(std::is_same<decltype(redfunc(objs)), T>::value, "redfunc does not have the correct signature");
0569 return SeqReduce(objs, redfunc);
0570 }
0571
0572
0573
0574
0575
0576
0577
0578
0579 template<class T, class BINARYOP>
0580 auto TThreadExecutor::Reduce(const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
0581 {
0582
0583 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value, "redfunc does not have the correct signature");
0584 return ParallelReduce(objs, redfunc);
0585 }
0586
0587
0588
0589
0590
0591
0592
0593 template<class T, class R>
0594 auto TThreadExecutor::SeqReduce(const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
0595 {
0596 return redfunc(objs);
0597 }
0598
0599 }
0600
0601 #endif
0602 #endif