Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:29:55

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
0013 
0014 #include <boost/static_assert.hpp>
0015 #include <boost/preprocessor/seq/for_each.hpp>
0016 
0017 #include <boost/compute/system.hpp>
0018 #include <boost/compute/functional.hpp>
0019 #include <boost/compute/command_queue.hpp>
0020 #include <boost/compute/algorithm/reduce.hpp>
0021 #include <boost/compute/algorithm/detail/serial_accumulate.hpp>
0022 #include <boost/compute/container/array.hpp>
0023 #include <boost/compute/container/vector.hpp>
0024 #include <boost/compute/type_traits/is_device_iterator.hpp>
0025 #include <boost/compute/detail/iterator_range_size.hpp>
0026 
0027 namespace boost {
0028 namespace compute {
0029 namespace detail {
0030 
0031 // Space complexity O(1)
0032 template<class InputIterator, class T, class BinaryFunction>
0033 inline T generic_accumulate(InputIterator first,
0034                             InputIterator last,
0035                             T init,
0036                             BinaryFunction function,
0037                             command_queue &queue)
0038 {
0039     const context &context = queue.get_context();
0040 
0041     size_t size = iterator_range_size(first, last);
0042     if(size == 0){
0043         return init;
0044     }
0045 
0046     // accumulate on device
0047     array<T, 1> device_result(context);
0048     detail::serial_accumulate(
0049         first, last, device_result.begin(), init, function, queue
0050     );
0051 
0052     // copy result to host
0053     T result;
0054     ::boost::compute::copy_n(device_result.begin(), 1, &result, queue);
0055     return result;
0056 }
0057 
0058 // returns true if we can use reduce() instead of accumulate() when
0059 // accumulate() this is true when the function is commutative (such as
0060 // addition of integers) and the initial value is the identity value
0061 // for the operation (zero for addition, one for multiplication).
0062 template<class T, class F>
0063 inline bool can_accumulate_with_reduce(T init, F function)
0064 {
0065     (void) init;
0066     (void) function;
0067 
0068     return false;
0069 }
0070 
0071 /// \internal_
0072 #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \
0073     inline bool can_accumulate_with_reduce(type init, plus<type>) \
0074     { \
0075         return init == type(0); \
0076     } \
0077     inline bool can_accumulate_with_reduce(type init, multiplies<type>) \
0078     { \
0079         return init == type(1); \
0080     }
0081 
0082 BOOST_PP_SEQ_FOR_EACH(
0083     BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE,
0084     _,
0085     (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_)
0086 )
0087 
0088 template<class T>
0089 inline bool can_accumulate_with_reduce(T init, min<T>)
0090 {
0091     return init == (std::numeric_limits<T>::max)();
0092 }
0093 
0094 template<class T>
0095 inline bool can_accumulate_with_reduce(T init, max<T>)
0096 {
0097     return init == (std::numeric_limits<T>::min)();
0098 }
0099 
0100 #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE
0101 
0102 template<class InputIterator, class T, class BinaryFunction>
0103 inline T dispatch_accumulate(InputIterator first,
0104                              InputIterator last,
0105                              T init,
0106                              BinaryFunction function,
0107                              command_queue &queue)
0108 {
0109     size_t size = iterator_range_size(first, last);
0110     if(size == 0){
0111         return init;
0112     }
0113 
0114     if(can_accumulate_with_reduce(init, function)){
0115         T result;
0116         reduce(first, last, &result, function, queue);
0117         return result;
0118     }
0119     else {
0120         return generic_accumulate(first, last, init, function, queue);
0121     }
0122 }
0123 
0124 } // end detail namespace
0125 
0126 /// Returns the result of applying \p function to the elements in the
0127 /// range [\p first, \p last) and \p init.
0128 ///
0129 /// If no function is specified, \c plus will be used.
0130 ///
0131 /// \param first first element in the input range
0132 /// \param last last element in the input range
0133 /// \param init initial value
0134 /// \param function binary reduction function
0135 /// \param queue command queue to perform the operation
0136 ///
0137 /// \return the accumulated result value
0138 ///
0139 /// In specific situations the call to \c accumulate() can be automatically
0140 /// optimized to a call to the more efficient \c reduce() algorithm. This
0141 /// occurs when the binary reduction function is recognized as associative
0142 /// (such as the \c plus<int> function).
0143 ///
0144 /// Note that because floating-point addition is not associative, calling
0145 /// \c accumulate() with \c plus<float> results in a less efficient serial
0146 /// reduction algorithm being executed. If a slight loss in precision is
0147 /// acceptable, the more efficient parallel \c reduce() algorithm should be
0148 /// used instead.
0149 ///
0150 /// For example:
0151 /// \code
0152 /// // with vec = boost::compute::vector<int>
0153 /// accumulate(vec.begin(), vec.end(), 0, plus<int>());   // fast
0154 /// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast
0155 ///
0156 /// // with vec = boost::compute::vector<float>
0157 /// accumulate(vec.begin(), vec.end(), 0, plus<float>());   // slow
0158 /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast
0159 /// \endcode
0160 ///
0161 /// Space complexity: \Omega(1)<br>
0162 /// Space complexity when optimized to \c reduce(): \Omega(n)
0163 ///
0164 /// \see reduce()
0165 template<class InputIterator, class T, class BinaryFunction>
0166 inline T accumulate(InputIterator first,
0167                     InputIterator last,
0168                     T init,
0169                     BinaryFunction function,
0170                     command_queue &queue = system::default_queue())
0171 {
0172     BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value);
0173 
0174     return detail::dispatch_accumulate(first, last, init, function, queue);
0175 }
0176 
0177 /// \overload
0178 template<class InputIterator, class T>
0179 inline T accumulate(InputIterator first,
0180                     InputIterator last,
0181                     T init,
0182                     command_queue &queue = system::default_queue())
0183 {
0184     BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value);
0185     typedef typename std::iterator_traits<InputIterator>::value_type IT;
0186 
0187     return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue);
0188 }
0189 
0190 } // end compute namespace
0191 } // end boost namespace
0192 
0193 #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP