File indexing completed on 2025-01-18 09:29:54
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_THREADS_HPP
0013
0014 #include <numeric>
0015
0016 #include <boost/compute/detail/meta_kernel.hpp>
0017 #include <boost/compute/container/vector.hpp>
0018
0019 namespace boost {
0020 namespace compute {
0021 namespace detail {
0022
0023 template<class InputIterator, class Predicate>
0024 class count_if_with_threads_kernel : meta_kernel
0025 {
0026 public:
0027 typedef typename
0028 std::iterator_traits<InputIterator>::value_type
0029 value_type;
0030
0031 count_if_with_threads_kernel()
0032 : meta_kernel("count_if_with_threads")
0033 {
0034 }
0035
0036 void set_args(InputIterator first,
0037 InputIterator last,
0038 Predicate predicate)
0039
0040 {
0041 typedef typename std::iterator_traits<InputIterator>::value_type T;
0042
0043 m_size = detail::iterator_range_size(first, last);
0044
0045 m_size_arg = add_arg<const ulong_>("size");
0046 m_counts_arg = add_arg<ulong_ *>(memory_object::global_memory, "counts");
0047
0048 *this <<
0049
0050 "const uint gid = get_global_id(0);\n" <<
0051 "const uint block_size = size / get_global_size(0);\n" <<
0052 "const uint start = block_size * gid;\n" <<
0053 "uint end = 0;\n" <<
0054 "if(gid == get_global_size(0) - 1)\n" <<
0055 " end = size;\n" <<
0056 "else\n" <<
0057 " end = block_size * gid + block_size;\n" <<
0058
0059
0060 "uint count = 0;\n" <<
0061 "for(uint i = start; i < end; i++){\n" <<
0062 decl<const T>("value") << "="
0063 << first[expr<uint_>("i")] << ";\n" <<
0064 if_(predicate(var<const T>("value"))) << "{\n" <<
0065 "count++;\n" <<
0066 "}\n" <<
0067 "}\n" <<
0068
0069
0070 "counts[gid] = count;\n";
0071 }
0072
0073 size_t exec(command_queue &queue)
0074 {
0075 const device &device = queue.get_device();
0076 const context &context = queue.get_context();
0077
0078 size_t threads = device.compute_units();
0079
0080 const size_t minimum_block_size = 2048;
0081 if(m_size / threads < minimum_block_size){
0082 threads = static_cast<size_t>(
0083 (std::max)(
0084 std::ceil(float(m_size) / minimum_block_size),
0085 1.0f
0086 )
0087 );
0088 }
0089
0090
0091 ::boost::compute::vector<ulong_> counts(threads, context);
0092
0093
0094 set_arg(m_size_arg, static_cast<ulong_>(m_size));
0095 set_arg(m_counts_arg, counts.get_buffer());
0096 exec_1d(queue, 0, threads, 1);
0097
0098
0099 std::vector<ulong_> host_counts(threads);
0100 ::boost::compute::copy(counts.begin(), counts.end(), host_counts.begin(), queue);
0101
0102
0103 return std::accumulate(host_counts.begin(), host_counts.end(), size_t(0));
0104 }
0105
0106 private:
0107 size_t m_size;
0108 size_t m_size_arg;
0109 size_t m_counts_arg;
0110 };
0111
0112
0113
0114 template<class InputIterator, class Predicate>
0115 inline size_t count_if_with_threads(InputIterator first,
0116 InputIterator last,
0117 Predicate predicate,
0118 command_queue &queue)
0119 {
0120 count_if_with_threads_kernel<InputIterator, Predicate> kernel;
0121 kernel.set_args(first, last, predicate);
0122 return kernel.exec(queue);
0123 }
0124
0125 }
0126 }
0127 }
0128
0129 #endif