File indexing completed on 2025-01-18 09:29:53
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
0013
0014 #include <boost/compute/context.hpp>
0015 #include <boost/compute/command_queue.hpp>
0016 #include <boost/compute/container/vector.hpp>
0017 #include <boost/compute/algorithm/reduce.hpp>
0018 #include <boost/compute/functional/detail/nvidia_ballot.hpp>
0019 #include <boost/compute/functional/detail/nvidia_popcount.hpp>
0020 #include <boost/compute/detail/meta_kernel.hpp>
0021
0022 namespace boost {
0023 namespace compute {
0024 namespace detail {
0025
0026 template<class InputIterator, class Predicate>
0027 inline size_t count_if_with_ballot(InputIterator first,
0028 InputIterator last,
0029 Predicate predicate,
0030 command_queue &queue)
0031 {
0032 size_t count = iterator_range_size(first, last);
0033 size_t block_size = 32;
0034 size_t block_count = count / block_size;
0035 if(block_count * block_size != count){
0036 block_count++;
0037 }
0038
0039 const ::boost::compute::context &context = queue.get_context();
0040
0041 ::boost::compute::vector<uint_> counts(block_count, context);
0042
0043 ::boost::compute::detail::nvidia_popcount<uint_> popc;
0044 ::boost::compute::detail::nvidia_ballot<uint_> ballot;
0045
0046 meta_kernel k("count_if_with_ballot");
0047 k <<
0048 "const uint gid = get_global_id(0);\n" <<
0049
0050 "bool value = false;\n" <<
0051 "if(gid < count)\n" <<
0052 " value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" <<
0053
0054 "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" <<
0055
0056 "if(get_local_id(0) == 0)\n" <<
0057 counts.begin()[k.var<uint_>("get_group_id(0)") ]
0058 << " = " << popc(k.var<uint_>("bits")) << ";\n";
0059
0060 k.add_set_arg<const uint_>("count", count);
0061
0062 k.exec_1d(queue, 0, block_size * block_count, block_size);
0063
0064 uint_ result;
0065 ::boost::compute::reduce(
0066 counts.begin(),
0067 counts.end(),
0068 &result,
0069 queue
0070 );
0071 return result;
0072 }
0073
0074 }
0075 }
0076 }
0077
0078 #endif