File indexing completed on 2025-09-17 09:13:46
0001 #ifndef BVH_V2_EXECUTOR_H
0002 #define BVH_V2_EXECUTOR_H
0003
0004 #include "bvh/v2/thread_pool.h"
0005
0006 #include <cstddef>
0007 #include <algorithm>
0008 #include <vector>
0009
0010 namespace bvh::v2 {
0011
0012
0013 template <typename Derived>
0014 struct Executor {
0015 template <typename Loop>
0016 inline void for_each(size_t begin, size_t end, const Loop& loop) {
0017 return static_cast<Derived*>(this)->for_each(begin, end, loop);
0018 }
0019
0020 template <typename T, typename Reduce, typename Join>
0021 inline T reduce(size_t begin, size_t end, const T& init, const Reduce& reduce, const Join& join) {
0022 return static_cast<Derived*>(this)->reduce(begin, end, init, reduce, join);
0023 }
0024 };
0025
0026
0027 struct SequentialExecutor : Executor<SequentialExecutor> {
0028 template <typename Loop>
0029 void for_each(size_t begin, size_t end, const Loop& loop) {
0030 loop(begin, end);
0031 }
0032
0033 template <typename T, typename Reduce, typename Join>
0034 T reduce(size_t begin, size_t end, const T& init, const Reduce& reduce, const Join&) {
0035 T result(init);
0036 reduce(result, begin, end);
0037 return result;
0038 }
0039 };
0040
0041
0042 struct ParallelExecutor : Executor<ParallelExecutor> {
0043 ThreadPool& thread_pool;
0044 size_t parallel_threshold;
0045
0046 ParallelExecutor(ThreadPool& thread_pool, size_t parallel_threshold = 1024)
0047 : thread_pool(thread_pool), parallel_threshold(parallel_threshold)
0048 {}
0049
0050 template <typename Loop>
0051 void for_each(size_t begin, size_t end, const Loop& loop) {
0052 if (end - begin < parallel_threshold)
0053 return loop(begin, end);
0054
0055 auto chunk_size = std::max(size_t{1}, (end - begin) / thread_pool.get_thread_count());
0056 for (size_t i = begin; i < end; i += chunk_size) {
0057 size_t next = std::min(end, i + chunk_size);
0058 thread_pool.push([=] (size_t) { loop(i, next); });
0059 }
0060 thread_pool.wait();
0061 }
0062
0063 template <typename T, typename Reduce, typename Join>
0064 T reduce(size_t begin, size_t end, const T& init, const Reduce& reduce, const Join& join) {
0065 if (end - begin < parallel_threshold) {
0066 T result(init);
0067 reduce(result, begin, end);
0068 return result;
0069 }
0070
0071 auto chunk_size = std::max(size_t{1}, (end - begin) / thread_pool.get_thread_count());
0072 std::vector<T> per_thread_result(thread_pool.get_thread_count(), init);
0073 for (size_t i = begin; i < end; i += chunk_size) {
0074 size_t next = std::min(end, i + chunk_size);
0075 thread_pool.push([&, i, next] (size_t thread_id) {
0076 auto& result = per_thread_result[thread_id];
0077 reduce(result, i, next);
0078 });
0079 }
0080 thread_pool.wait();
0081 for (size_t i = 1; i < thread_pool.get_thread_count(); ++i)
0082 join(per_thread_result[0], std::move(per_thread_result[i]));
0083 return per_thread_result[0];
0084 }
0085 };
0086
0087 }
0088
0089 #endif