Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:29:53

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP
0013 
0014 #include <boost/compute/context.hpp>
0015 #include <boost/compute/command_queue.hpp>
0016 #include <boost/compute/container/vector.hpp>
0017 #include <boost/compute/algorithm/reduce.hpp>
0018 #include <boost/compute/functional/detail/nvidia_ballot.hpp>
0019 #include <boost/compute/functional/detail/nvidia_popcount.hpp>
0020 #include <boost/compute/detail/meta_kernel.hpp>
0021 
0022 namespace boost {
0023 namespace compute {
0024 namespace detail {
0025 
0026 template<class InputIterator, class Predicate>
0027 inline size_t count_if_with_ballot(InputIterator first,
0028                                    InputIterator last,
0029                                    Predicate predicate,
0030                                    command_queue &queue)
0031 {
0032     size_t count = iterator_range_size(first, last);
0033     size_t block_size = 32;
0034     size_t block_count = count / block_size;
0035     if(block_count * block_size != count){
0036         block_count++;
0037     }
0038 
0039     const ::boost::compute::context &context = queue.get_context();
0040 
0041     ::boost::compute::vector<uint_> counts(block_count, context);
0042 
0043     ::boost::compute::detail::nvidia_popcount<uint_> popc;
0044     ::boost::compute::detail::nvidia_ballot<uint_> ballot;
0045 
0046     meta_kernel k("count_if_with_ballot");
0047     k <<
0048         "const uint gid = get_global_id(0);\n" <<
0049 
0050         "bool value = false;\n" <<
0051         "if(gid < count)\n" <<
0052         "    value = " << predicate(first[k.var<const uint_>("gid")]) << ";\n" <<
0053 
0054         "uint bits = " << ballot(k.var<const uint_>("value")) << ";\n" <<
0055 
0056         "if(get_local_id(0) == 0)\n" <<
0057             counts.begin()[k.var<uint_>("get_group_id(0)") ]
0058                 << " = " << popc(k.var<uint_>("bits")) << ";\n";
0059 
0060     k.add_set_arg<const uint_>("count", count);
0061 
0062     k.exec_1d(queue, 0, block_size * block_count, block_size);
0063 
0064     uint_ result;
0065     ::boost::compute::reduce(
0066         counts.begin(),
0067         counts.end(),
0068         &result,
0069         queue
0070     );
0071     return result;
0072 }
0073 
0074 } // end detail namespace
0075 } // end compute namespace
0076 } // end boost namespace
0077 
0078 #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COUNT_IF_WITH_BALLOT_HPP