Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:29:54

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2014 Roshan <thisisroshansmail@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
0013 
0014 #include <iterator>
0015 
0016 #include <boost/compute/algorithm/detail/merge_path.hpp>
0017 #include <boost/compute/algorithm/fill_n.hpp>
0018 #include <boost/compute/container/vector.hpp>
0019 #include <boost/compute/detail/iterator_range_size.hpp>
0020 #include <boost/compute/detail/meta_kernel.hpp>
0021 #include <boost/compute/system.hpp>
0022 
0023 namespace boost {
0024 namespace compute {
0025 namespace detail {
0026 
0027 ///
0028 /// \brief Serial merge kernel class
0029 ///
0030 /// Subclass of meta_kernel to perform serial merge after tiling
0031 ///
0032 class serial_merge_kernel : meta_kernel
0033 {
0034 public:
0035     unsigned int tile_size;
0036 
0037     serial_merge_kernel() : meta_kernel("merge")
0038     {
0039         tile_size = 4;
0040     }
0041 
0042     template<class InputIterator1, class InputIterator2,
0043              class InputIterator3, class InputIterator4,
0044              class OutputIterator, class Compare>
0045     void set_range(InputIterator1 first1,
0046                    InputIterator2 first2,
0047                    InputIterator3 tile_first1,
0048                    InputIterator3 tile_last1,
0049                    InputIterator4 tile_first2,
0050                    OutputIterator result,
0051                    Compare comp)
0052     {
0053         m_count = iterator_range_size(tile_first1, tile_last1) - 1;
0054 
0055         *this <<
0056         "uint i = get_global_id(0);\n" <<
0057         "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
0058         "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
0059         "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
0060         "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
0061         "uint index = i*" << tile_size << ";\n" <<
0062         "while(start1<end1 && start2<end2)\n" <<
0063         "{\n" <<
0064         "   if(!(" << comp(first2[expr<uint_>("start2")],
0065                             first1[expr<uint_>("start1")]) << "))\n" <<
0066         "   {\n" <<
0067                 result[expr<uint_>("index")] <<
0068                     " = " << first1[expr<uint_>("start1")] << ";\n" <<
0069         "       index++;\n" <<
0070         "       start1++;\n" <<
0071         "   }\n" <<
0072         "   else\n" <<
0073         "   {\n" <<
0074                 result[expr<uint_>("index")] <<
0075                     " = " << first2[expr<uint_>("start2")] << ";\n" <<
0076         "       index++;\n" <<
0077         "       start2++;\n" <<
0078         "   }\n" <<
0079         "}\n" <<
0080         "while(start1<end1)\n" <<
0081         "{\n" <<
0082             result[expr<uint_>("index")] <<
0083                 " = " << first1[expr<uint_>("start1")] << ";\n" <<
0084         "   index++;\n" <<
0085         "   start1++;\n" <<
0086         "}\n" <<
0087         "while(start2<end2)\n" <<
0088         "{\n" <<
0089             result[expr<uint_>("index")] <<
0090                 " = " << first2[expr<uint_>("start2")] << ";\n" <<
0091         "   index++;\n" <<
0092         "   start2++;\n" <<
0093         "}\n";
0094     }
0095 
0096     template<class InputIterator1, class InputIterator2,
0097              class InputIterator3, class InputIterator4,
0098              class OutputIterator>
0099     void set_range(InputIterator1 first1,
0100                    InputIterator2 first2,
0101                    InputIterator3 tile_first1,
0102                    InputIterator3 tile_last1,
0103                    InputIterator4 tile_first2,
0104                    OutputIterator result)
0105     {
0106         typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
0107         ::boost::compute::less<value_type> less_than;
0108         set_range(first1, first2, tile_first1, tile_last1, tile_first2, result, less_than);
0109     }
0110 
0111     event exec(command_queue &queue)
0112     {
0113         if(m_count == 0) {
0114             return event();
0115         }
0116 
0117         return exec_1d(queue, 0, m_count);
0118     }
0119 
0120 private:
0121     size_t m_count;
0122 };
0123 
0124 ///
0125 /// \brief Merge algorithm with merge path
0126 ///
0127 /// Merges the sorted values in the range [\p first1, \p last1) with
0128 /// the sorted values in the range [\p first2, last2) and stores the
0129 /// result in the range beginning at \p result
0130 ///
0131 /// \param first1 Iterator pointing to start of first set
0132 /// \param last1 Iterator pointing to end of first set
0133 /// \param first2 Iterator pointing to start of second set
0134 /// \param last2 Iterator pointing to end of second set
0135 /// \param result Iterator pointing to start of range in which the result
0136 /// will be stored
0137 /// \param comp Comparator which performs less than function
0138 /// \param queue Queue on which to execute
0139 ///
0140 template<class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
0141 inline OutputIterator
0142 merge_with_merge_path(InputIterator1 first1,
0143                       InputIterator1 last1,
0144                       InputIterator2 first2,
0145                       InputIterator2 last2,
0146                       OutputIterator result,
0147                       Compare comp,
0148                       command_queue &queue = system::default_queue())
0149 {
0150     typedef typename
0151         std::iterator_traits<OutputIterator>::difference_type result_difference_type;
0152 
0153     size_t tile_size = 1024;
0154 
0155     size_t count1 = iterator_range_size(first1, last1);
0156     size_t count2 = iterator_range_size(first2, last2);
0157 
0158     vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
0159     vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
0160 
0161     // Tile the sets
0162     merge_path_kernel tiling_kernel;
0163     tiling_kernel.tile_size = static_cast<unsigned int>(tile_size);
0164     tiling_kernel.set_range(first1, last1, first2, last2,
0165                             tile_a.begin()+1, tile_b.begin()+1, comp);
0166     fill_n(tile_a.begin(), 1, uint_(0), queue);
0167     fill_n(tile_b.begin(), 1, uint_(0), queue);
0168     tiling_kernel.exec(queue);
0169 
0170     fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue);
0171     fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue);
0172 
0173     // Merge
0174     serial_merge_kernel merge_kernel;
0175     merge_kernel.tile_size = static_cast<unsigned int>(tile_size);
0176     merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
0177                            tile_b.begin(), result, comp);
0178 
0179     merge_kernel.exec(queue);
0180 
0181     return result + static_cast<result_difference_type>(count1 + count2);
0182 }
0183 
0184 /// \overload
0185 template<class InputIterator1, class InputIterator2, class OutputIterator>
0186 inline OutputIterator
0187 merge_with_merge_path(InputIterator1 first1,
0188                       InputIterator1 last1,
0189                       InputIterator2 first2,
0190                       InputIterator2 last2,
0191                       OutputIterator result,
0192                       command_queue &queue = system::default_queue())
0193 {
0194     typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
0195     ::boost::compute::less<value_type> less_than;
0196     return merge_with_merge_path(first1, last1, first2, last2, result, less_than, queue);
0197 }
0198 
0199 } //end detail namespace
0200 } //end compute namespace
0201 } //end boost namespace
0202 
0203 #endif // BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP