Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:29:55

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_SCAN_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_SCAN_HPP
0013 
0014 #include <iterator>
0015 
0016 #include <boost/compute/device.hpp>
0017 #include <boost/compute/kernel.hpp>
0018 #include <boost/compute/command_queue.hpp>
0019 #include <boost/compute/detail/meta_kernel.hpp>
0020 #include <boost/compute/detail/iterator_range_size.hpp>
0021 
0022 namespace boost {
0023 namespace compute {
0024 namespace detail {
0025 
0026 template<class InputIterator, class OutputIterator, class T, class BinaryOperator>
0027 inline OutputIterator serial_scan(InputIterator first,
0028                                   InputIterator last,
0029                                   OutputIterator result,
0030                                   bool exclusive,
0031                                   T init,
0032                                   BinaryOperator op,
0033                                   command_queue &queue)
0034 {
0035     if(first == last){
0036         return result;
0037     }
0038 
0039     typedef typename
0040         std::iterator_traits<InputIterator>::value_type input_type;
0041     typedef typename
0042         std::iterator_traits<OutputIterator>::value_type output_type;
0043 
0044     const context &context = queue.get_context();
0045 
0046     // create scan kernel
0047     meta_kernel k("serial_scan");
0048 
0049     // Arguments
0050     size_t n_arg = k.add_arg<ulong_>("n");
0051     size_t init_arg = k.add_arg<output_type>("initial_value");
0052 
0053     if(!exclusive){
0054         k <<
0055             k.decl<const ulong_>("start_idx") << " = 1;\n" <<
0056             k.decl<output_type>("sum") << " = " << first[0] << ";\n" <<
0057             result[0] << " = sum;\n";
0058     }
0059     else {
0060         k <<
0061             k.decl<const ulong_>("start_idx") << " = 0;\n" <<
0062             k.decl<output_type>("sum") << " = initial_value;\n";
0063     }
0064 
0065     k <<
0066         "for(ulong i = start_idx; i < n; i++){\n" <<
0067         k.decl<const input_type>("x") << " = "
0068             << first[k.var<ulong_>("i")] << ";\n";
0069 
0070     if(exclusive){
0071         k << result[k.var<ulong_>("i")] << " = sum;\n";
0072     }
0073 
0074     k << "    sum = "
0075         << op(k.var<output_type>("sum"), k.var<output_type>("x"))
0076         << ";\n";
0077 
0078     if(!exclusive){
0079         k << result[k.var<ulong_>("i")] << " = sum;\n";
0080     }
0081 
0082     k << "}\n";
0083 
0084     // compile scan kernel
0085     kernel scan_kernel = k.compile(context);
0086 
0087     // setup kernel arguments
0088     size_t n = detail::iterator_range_size(first, last);
0089     scan_kernel.set_arg<ulong_>(n_arg, n);
0090     scan_kernel.set_arg<output_type>(init_arg, static_cast<output_type>(init));
0091 
0092     // execute the kernel
0093     queue.enqueue_1d_range_kernel(scan_kernel, 0, 1, 1);
0094 
0095     // return iterator pointing to the end of the result range
0096     return result + n;
0097 }
0098 
0099 } // end detail namespace
0100 } // end compute namespace
0101 } // end boost namespace
0102 
0103 #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_SERIAL_SCAN_HPP