bvh/v2/executor.h

0001 #ifndef BVH_V2_EXECUTOR_H
0002 #define BVH_V2_EXECUTOR_H
0003
0004 #include "bvh/v2/thread_pool.h"
0005
0006 #include <cstddef>
0007 #include <algorithm>
0008 #include <vector>
0009
0010 namespace bvh::v2 {
0011
0012 /// Helper object that provides iteration and reduction over one-dimensional ranges.
0013 template <typename Derived>
0014 struct Executor {
0015     template <typename Loop>
0016     inline void for_each(size_t begin, size_t end, const Loop& loop) {
0017         return static_cast<Derived*>(this)->for_each(begin, end, loop);
0018     }
0019
0020     template <typename T, typename Reduce, typename Join>
0021     inline T reduce(size_t begin, size_t end, const T& init, const Reduce& reduce, const Join& join) {
0022         return static_cast<Derived*>(this)->reduce(begin, end, init, reduce, join);
0023     }
0024 };
0025
0026 /// Executor that executes serially.
0027 struct SequentialExecutor : Executor<SequentialExecutor> {
0028     template <typename Loop>
0029     void for_each(size_t begin, size_t end, const Loop& loop) {
0030         loop(begin, end);
0031     }
0032
0033     template <typename T, typename Reduce, typename Join>
0034     T reduce(size_t begin, size_t end, const T& init, const Reduce& reduce, const Join&) {
0035         T result(init);
0036         reduce(result, begin, end);
0037         return result;
0038     }
0039 };
0040
0041 /// Executor that executes in parallel using the given thread pool.
0042 struct ParallelExecutor : Executor<ParallelExecutor> {
0043     ThreadPool& thread_pool;
0044     size_t parallel_threshold;
0045
0046     ParallelExecutor(ThreadPool& thread_pool, size_t parallel_threshold = 1024)
0047         : thread_pool(thread_pool), parallel_threshold(parallel_threshold)
0048     {}
0049
0050     template <typename Loop>
0051     void for_each(size_t begin, size_t end, const Loop& loop) {
0052         if (end - begin < parallel_threshold)
0053             return loop(begin, end);
0054
0055         auto chunk_size = std::max(size_t{1}, (end - begin) / thread_pool.get_thread_count());
0056         for (size_t i = begin; i < end; i += chunk_size) {
0057             size_t next = std::min(end, i + chunk_size);
0058             thread_pool.push([=] (size_t) { loop(i, next); });
0059         }
0060         thread_pool.wait();
0061     }
0062
0063     template <typename T, typename Reduce, typename Join>
0064     T reduce(size_t begin, size_t end, const T& init, const Reduce& reduce, const Join& join) {
0065         if (end - begin < parallel_threshold) {
0066             T result(init);
0067             reduce(result, begin, end);
0068             return result;
0069         }
0070
0071         auto chunk_size = std::max(size_t{1}, (end - begin) / thread_pool.get_thread_count());
0072         std::vector<T> per_thread_result(thread_pool.get_thread_count(), init);
0073         for (size_t i = begin; i < end; i += chunk_size) {
0074             size_t next = std::min(end, i + chunk_size);
0075             thread_pool.push([&, i, next] (size_t thread_id) {
0076                 auto& result = per_thread_result[thread_id];
0077                 reduce(result, i, next);
0078             });
0079         }
0080         thread_pool.wait();
0081         for (size_t i = 1; i < thread_pool.get_thread_count(); ++i)
0082             join(per_thread_result[0], std::move(per_thread_result[i]));
0083         return per_thread_result[0];
0084     }
0085 };
0086
0087 } // namespace bvh::v2
0088
0089 #endif