Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:29:56

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_FILL_HPP
0013 
0014 #include <iterator>
0015 
0016 #include <boost/static_assert.hpp>
0017 #include <boost/mpl/int.hpp>
0018 #include <boost/mpl/vector.hpp>
0019 #include <boost/mpl/contains.hpp>
0020 #include <boost/utility/enable_if.hpp>
0021 
0022 #include <boost/compute/cl.hpp>
0023 #include <boost/compute/system.hpp>
0024 #include <boost/compute/command_queue.hpp>
0025 #include <boost/compute/algorithm/copy.hpp>
0026 #include <boost/compute/async/future.hpp>
0027 #include <boost/compute/iterator/constant_iterator.hpp>
0028 #include <boost/compute/iterator/discard_iterator.hpp>
0029 #include <boost/compute/detail/is_buffer_iterator.hpp>
0030 #include <boost/compute/detail/iterator_range_size.hpp>
0031 #include <boost/compute/type_traits/is_device_iterator.hpp>
0032 
0033 
0034 namespace boost {
0035 namespace compute {
0036 namespace detail {
0037 
0038 namespace mpl = boost::mpl;
0039 
0040 // fills the range [first, first + count) with value using copy()
0041 template<class BufferIterator, class T>
0042 inline void fill_with_copy(BufferIterator first,
0043                            size_t count,
0044                            const T &value,
0045                            command_queue &queue)
0046 {
0047     ::boost::compute::copy(
0048         ::boost::compute::make_constant_iterator(value, 0),
0049         ::boost::compute::make_constant_iterator(value, count),
0050         first,
0051         queue
0052     );
0053 }
0054 
0055 // fills the range [first, first + count) with value using copy_async()
0056 template<class BufferIterator, class T>
0057 inline future<void> fill_async_with_copy(BufferIterator first,
0058                                          size_t count,
0059                                          const T &value,
0060                                          command_queue &queue)
0061 {
0062     return ::boost::compute::copy_async(
0063                ::boost::compute::make_constant_iterator(value, 0),
0064                ::boost::compute::make_constant_iterator(value, count),
0065                first,
0066                queue
0067            );
0068 }
0069 
0070 #if defined(BOOST_COMPUTE_CL_VERSION_1_2)
0071 
0072 // meta-function returing true if Iterator points to a range of values
0073 // that can be filled using clEnqueueFillBuffer(). to meet this criteria
0074 // it must have a buffer accessible through iter.get_buffer() and the
0075 // size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
0076 template<class Iterator>
0077 struct is_valid_fill_buffer_iterator :
0078     public mpl::and_<
0079         is_buffer_iterator<Iterator>,
0080         mpl::contains<
0081             mpl::vector<
0082                 mpl::int_<1>,
0083                 mpl::int_<2>,
0084                 mpl::int_<4>,
0085                 mpl::int_<8>,
0086                 mpl::int_<16>,
0087                 mpl::int_<32>,
0088                 mpl::int_<64>,
0089                 mpl::int_<128>
0090             >,
0091             mpl::int_<
0092                 sizeof(typename std::iterator_traits<Iterator>::value_type)
0093             >
0094         >
0095     >::type { };
0096 
0097 template<>
0098 struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
0099 
0100 // specialization which uses clEnqueueFillBuffer for buffer iterators
0101 template<class BufferIterator, class T>
0102 inline void
0103 dispatch_fill(BufferIterator first,
0104               size_t count,
0105               const T &value,
0106               command_queue &queue,
0107               typename boost::enable_if<
0108                  is_valid_fill_buffer_iterator<BufferIterator>
0109               >::type* = 0)
0110 {
0111     typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
0112 
0113     if(count == 0){
0114         // nothing to do
0115         return;
0116     }
0117 
0118     // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
0119     if(!queue.check_device_version(1, 2)){
0120         return fill_with_copy(first, count, value, queue);
0121     }
0122 
0123     value_type pattern = static_cast<value_type>(value);
0124     size_t offset = static_cast<size_t>(first.get_index());
0125 
0126     if(count == 1){
0127         // use clEnqueueWriteBuffer() directly when writing a single value
0128         // to the device buffer. this is potentially more efficient and also
0129         // works around a bug in the intel opencl driver.
0130         queue.enqueue_write_buffer(
0131             first.get_buffer(),
0132             offset * sizeof(value_type),
0133             sizeof(value_type),
0134             &pattern
0135         );
0136     }
0137     else {
0138         queue.enqueue_fill_buffer(
0139             first.get_buffer(),
0140             &pattern,
0141             sizeof(value_type),
0142             offset * sizeof(value_type),
0143             count * sizeof(value_type)
0144         );
0145     }
0146 }
0147 
0148 template<class BufferIterator, class T>
0149 inline future<void>
0150 dispatch_fill_async(BufferIterator first,
0151                     size_t count,
0152                     const T &value,
0153                     command_queue &queue,
0154                     typename boost::enable_if<
0155                        is_valid_fill_buffer_iterator<BufferIterator>
0156                     >::type* = 0)
0157 {
0158     typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
0159 
0160     // check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
0161     if(!queue.check_device_version(1, 2)){
0162         return fill_async_with_copy(first, count, value, queue);
0163     }
0164 
0165     value_type pattern = static_cast<value_type>(value);
0166     size_t offset = static_cast<size_t>(first.get_index());
0167 
0168     event event_ =
0169         queue.enqueue_fill_buffer(first.get_buffer(),
0170                                   &pattern,
0171                                   sizeof(value_type),
0172                                   offset * sizeof(value_type),
0173                                   count * sizeof(value_type));
0174 
0175     return future<void>(event_);
0176 }
0177 
0178 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
0179 // specializations for svm_ptr<T>
0180 template<class T>
0181 inline void dispatch_fill(svm_ptr<T> first,
0182                           size_t count,
0183                           const T &value,
0184                           command_queue &queue)
0185 {
0186     if(count == 0){
0187         return;
0188     }
0189 
0190     queue.enqueue_svm_fill(
0191         first.get(), &value, sizeof(T), count * sizeof(T)
0192     );
0193 }
0194 
0195 template<class T>
0196 inline future<void> dispatch_fill_async(svm_ptr<T> first,
0197                                         size_t count,
0198                                         const T &value,
0199                                         command_queue &queue)
0200 {
0201     if(count == 0){
0202         return future<void>();
0203     }
0204 
0205     event event_ = queue.enqueue_svm_fill(
0206         first.get(), &value, sizeof(T), count * sizeof(T)
0207     );
0208 
0209     return future<void>(event_);
0210 }
0211 #endif // BOOST_COMPUTE_CL_VERSION_2_0
0212 
0213 // default implementations
0214 template<class BufferIterator, class T>
0215 inline void
0216 dispatch_fill(BufferIterator first,
0217               size_t count,
0218               const T &value,
0219               command_queue &queue,
0220               typename boost::disable_if<
0221                   is_valid_fill_buffer_iterator<BufferIterator>
0222               >::type* = 0)
0223 {
0224     fill_with_copy(first, count, value, queue);
0225 }
0226 
0227 template<class BufferIterator, class T>
0228 inline future<void>
0229 dispatch_fill_async(BufferIterator first,
0230                     size_t count,
0231                     const T &value,
0232                     command_queue &queue,
0233                     typename boost::disable_if<
0234                         is_valid_fill_buffer_iterator<BufferIterator>
0235                     >::type* = 0)
0236 {
0237     return fill_async_with_copy(first, count, value, queue);
0238 }
0239 #else
0240 template<class BufferIterator, class T>
0241 inline void dispatch_fill(BufferIterator first,
0242                           size_t count,
0243                           const T &value,
0244                           command_queue &queue)
0245 {
0246     fill_with_copy(first, count, value, queue);
0247 }
0248 
0249 template<class BufferIterator, class T>
0250 inline future<void> dispatch_fill_async(BufferIterator first,
0251                                         size_t count,
0252                                         const T &value,
0253                                         command_queue &queue)
0254 {
0255     return fill_async_with_copy(first, count, value, queue);
0256 }
0257 #endif // !defined(BOOST_COMPUTE_CL_VERSION_1_2)
0258 
0259 } // end detail namespace
0260 
0261 /// Fills the range [\p first, \p last) with \p value.
0262 ///
0263 /// \param first first element in the range to fill
0264 /// \param last last element in the range to fill
0265 /// \param value value to copy to each element
0266 /// \param queue command queue to perform the operation
0267 ///
0268 /// For example, to fill a vector on the device with sevens:
0269 /// \code
0270 /// // vector on the device
0271 /// boost::compute::vector<int> vec(10, context);
0272 ///
0273 /// // fill vector with sevens
0274 /// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
0275 /// \endcode
0276 ///
0277 /// Space complexity: \Omega(1)
0278 ///
0279 /// \see boost::compute::fill_n()
0280 template<class BufferIterator, class T>
0281 inline void fill(BufferIterator first,
0282                  BufferIterator last,
0283                  const T &value,
0284                  command_queue &queue = system::default_queue())
0285 {
0286     BOOST_STATIC_ASSERT(is_device_iterator<BufferIterator>::value);
0287     size_t count = detail::iterator_range_size(first, last);
0288     if(count == 0){
0289         return;
0290     }
0291 
0292     detail::dispatch_fill(first, count, value, queue);
0293 }
0294 
0295 template<class BufferIterator, class T>
0296 inline future<void> fill_async(BufferIterator first,
0297                                BufferIterator last,
0298                                const T &value,
0299                                command_queue &queue = system::default_queue())
0300 {
0301     BOOST_STATIC_ASSERT(detail::is_buffer_iterator<BufferIterator>::value);
0302     size_t count = detail::iterator_range_size(first, last);
0303     if(count == 0){
0304         return future<void>();
0305     }
0306 
0307     return detail::dispatch_fill_async(first, count, value, queue);
0308 }
0309 
0310 } // end compute namespace
0311 } // end boost namespace
0312 
0313 #endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP