Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:29:53

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP
0013 
0014 #include <iterator>
0015 
0016 #include <boost/compute/command_queue.hpp>
0017 #include <boost/compute/async/future.hpp>
0018 #include <boost/compute/iterator/buffer_iterator.hpp>
0019 #include <boost/compute/iterator/discard_iterator.hpp>
0020 #include <boost/compute/memory/svm_ptr.hpp>
0021 #include <boost/compute/detail/iterator_range_size.hpp>
0022 #include <boost/compute/detail/meta_kernel.hpp>
0023 #include <boost/compute/detail/parameter_cache.hpp>
0024 #include <boost/compute/detail/work_size.hpp>
0025 #include <boost/compute/detail/vendor.hpp>
0026 
0027 namespace boost {
0028 namespace compute {
0029 namespace detail {
0030 
0031 template<class InputIterator, class OutputIterator>
0032 inline event copy_on_device_cpu(InputIterator first,
0033                                 OutputIterator result,
0034                                 size_t count,
0035                                 command_queue &queue,
0036                                 const wait_list &events)
0037 {
0038     meta_kernel k("copy");
0039     const device& device = queue.get_device();
0040 
0041     k <<
0042         "uint block = " <<
0043             "(uint)ceil(((float)count)/get_global_size(0));\n" <<
0044         "uint index = get_global_id(0) * block;\n" <<
0045         "uint end = min(count, index + block);\n" <<
0046         "while(index < end){\n" <<
0047             result[k.var<uint_>("index")] << '=' <<
0048                 first[k.var<uint_>("index")] << ";\n" <<
0049             "index++;\n" <<
0050         "}\n";
0051 
0052     k.add_set_arg<const uint_>("count", static_cast<uint_>(count));
0053 
0054     size_t global_work_size = device.compute_units();
0055     if(count <= 1024) global_work_size = 1;
0056     return k.exec_1d(queue, 0, global_work_size, events);
0057 }
0058 
0059 template<class InputIterator, class OutputIterator>
0060 inline event copy_on_device_gpu(InputIterator first,
0061                                 OutputIterator result,
0062                                 size_t count,
0063                                 command_queue &queue,
0064                                 const wait_list &events)
0065 {
0066     typedef typename std::iterator_traits<InputIterator>::value_type input_type;
0067 
0068     const device& device = queue.get_device();
0069     boost::shared_ptr<parameter_cache> parameters =
0070         detail::parameter_cache::get_global_cache(device);
0071     std::string cache_key =
0072         "__boost_copy_kernel_" + boost::lexical_cast<std::string>(sizeof(input_type));
0073 
0074     uint_ vpt = parameters->get(cache_key, "vpt", 4);
0075     uint_ tpb = parameters->get(cache_key, "tpb", 128);
0076 
0077     meta_kernel k("copy");
0078     k <<
0079         "uint index = get_local_id(0) + " <<
0080             "(" << vpt * tpb << " * get_group_id(0));\n" <<
0081         "for(uint i = 0; i < " << vpt << "; i++){\n" <<
0082         "    if(index < count){\n" <<
0083                 result[k.var<uint_>("index")] << '=' <<
0084                     first[k.var<uint_>("index")] << ";\n" <<
0085         "       index += " << tpb << ";\n"
0086         "    }\n"
0087         "}\n";
0088 
0089     k.add_set_arg<const uint_>("count", static_cast<uint_>(count));
0090     size_t global_work_size = calculate_work_size(count, vpt, tpb);
0091     return k.exec_1d(queue, 0, global_work_size, tpb, events);
0092 }
0093 
0094 template<class InputIterator, class OutputIterator>
0095 inline event dispatch_copy_on_device(InputIterator first,
0096                                      InputIterator last,
0097                                      OutputIterator result,
0098                                      command_queue &queue,
0099                                      const wait_list &events)
0100 {
0101     const size_t count = detail::iterator_range_size(first, last);
0102 
0103     if(count == 0){
0104         // nothing to do
0105         return event();
0106     }
0107 
0108     const device& device = queue.get_device();
0109     // copy_on_device_cpu() does not work for CPU on Apple platform
0110     // due to bug in its compiler.
0111     // See https://github.com/boostorg/compute/pull/626
0112     if((device.type() & device::cpu) && !is_apple_platform_device(device))
0113     {
0114         return copy_on_device_cpu(first, result, count, queue, events);
0115     }
0116     return copy_on_device_gpu(first, result, count, queue, events);
0117 }
0118 
0119 template<class InputIterator, class OutputIterator>
0120 inline OutputIterator copy_on_device(InputIterator first,
0121                                      InputIterator last,
0122                                      OutputIterator result,
0123                                      command_queue &queue,
0124                                      const wait_list &events)
0125 {
0126     dispatch_copy_on_device(first, last, result, queue, events);
0127     return result + std::distance(first, last);
0128 }
0129 
0130 template<class InputIterator>
0131 inline discard_iterator copy_on_device(InputIterator first,
0132                                        InputIterator last,
0133                                        discard_iterator result,
0134                                        command_queue &queue,
0135                                        const wait_list &events)
0136 {
0137     (void) queue;
0138     (void) events;
0139 
0140     return result + std::distance(first, last);
0141 }
0142 
0143 template<class InputIterator, class OutputIterator>
0144 inline future<OutputIterator> copy_on_device_async(InputIterator first,
0145                                                    InputIterator last,
0146                                                    OutputIterator result,
0147                                                    command_queue &queue,
0148                                                    const wait_list &events)
0149 {
0150     event event_ = dispatch_copy_on_device(first, last, result, queue, events);
0151     return make_future(result + std::distance(first, last), event_);
0152 }
0153 
0154 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
0155 // copy_on_device() specialization for svm_ptr
0156 template<class T>
0157 inline svm_ptr<T> copy_on_device(svm_ptr<T> first,
0158                                  svm_ptr<T> last,
0159                                  svm_ptr<T> result,
0160                                  command_queue &queue,
0161                                  const wait_list &events)
0162 {
0163     size_t count = iterator_range_size(first, last);
0164     if(count == 0){
0165         return result;
0166     }
0167 
0168     queue.enqueue_svm_memcpy(
0169         result.get(), first.get(), count * sizeof(T), events
0170     );
0171 
0172     return result + count;
0173 }
0174 
0175 template<class T>
0176 inline future<svm_ptr<T> > copy_on_device_async(svm_ptr<T> first,
0177                                                 svm_ptr<T> last,
0178                                                 svm_ptr<T> result,
0179                                                 command_queue &queue,
0180                                                 const wait_list &events)
0181 {
0182     size_t count = iterator_range_size(first, last);
0183     if(count == 0){
0184         return future<svm_ptr<T> >();
0185     }
0186 
0187     event event_ = queue.enqueue_svm_memcpy_async(
0188         result.get(), first.get(), count * sizeof(T), events
0189     );
0190 
0191     return make_future(result + count, event_);
0192 }
0193 #endif // BOOST_COMPUTE_CL_VERSION_2_0
0194 
0195 } // end detail namespace
0196 } // end compute namespace
0197 } // end boost namespace
0198 
0199 #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_COPY_ON_DEVICE_HPP