File indexing completed on 2025-01-18 09:29:54
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
0012 #define BOOST_COMPUTE_ALGORITHM_DETAIL_MERGE_WIH_MERGE_PATH_HPP
0013
0014 #include <iterator>
0015
0016 #include <boost/compute/algorithm/detail/merge_path.hpp>
0017 #include <boost/compute/algorithm/fill_n.hpp>
0018 #include <boost/compute/container/vector.hpp>
0019 #include <boost/compute/detail/iterator_range_size.hpp>
0020 #include <boost/compute/detail/meta_kernel.hpp>
0021 #include <boost/compute/system.hpp>
0022
0023 namespace boost {
0024 namespace compute {
0025 namespace detail {
0026
0027
0028
0029
0030
0031
0032 class serial_merge_kernel : meta_kernel
0033 {
0034 public:
0035 unsigned int tile_size;
0036
0037 serial_merge_kernel() : meta_kernel("merge")
0038 {
0039 tile_size = 4;
0040 }
0041
0042 template<class InputIterator1, class InputIterator2,
0043 class InputIterator3, class InputIterator4,
0044 class OutputIterator, class Compare>
0045 void set_range(InputIterator1 first1,
0046 InputIterator2 first2,
0047 InputIterator3 tile_first1,
0048 InputIterator3 tile_last1,
0049 InputIterator4 tile_first2,
0050 OutputIterator result,
0051 Compare comp)
0052 {
0053 m_count = iterator_range_size(tile_first1, tile_last1) - 1;
0054
0055 *this <<
0056 "uint i = get_global_id(0);\n" <<
0057 "uint start1 = " << tile_first1[expr<uint_>("i")] << ";\n" <<
0058 "uint end1 = " << tile_first1[expr<uint_>("i+1")] << ";\n" <<
0059 "uint start2 = " << tile_first2[expr<uint_>("i")] << ";\n" <<
0060 "uint end2 = " << tile_first2[expr<uint_>("i+1")] << ";\n" <<
0061 "uint index = i*" << tile_size << ";\n" <<
0062 "while(start1<end1 && start2<end2)\n" <<
0063 "{\n" <<
0064 " if(!(" << comp(first2[expr<uint_>("start2")],
0065 first1[expr<uint_>("start1")]) << "))\n" <<
0066 " {\n" <<
0067 result[expr<uint_>("index")] <<
0068 " = " << first1[expr<uint_>("start1")] << ";\n" <<
0069 " index++;\n" <<
0070 " start1++;\n" <<
0071 " }\n" <<
0072 " else\n" <<
0073 " {\n" <<
0074 result[expr<uint_>("index")] <<
0075 " = " << first2[expr<uint_>("start2")] << ";\n" <<
0076 " index++;\n" <<
0077 " start2++;\n" <<
0078 " }\n" <<
0079 "}\n" <<
0080 "while(start1<end1)\n" <<
0081 "{\n" <<
0082 result[expr<uint_>("index")] <<
0083 " = " << first1[expr<uint_>("start1")] << ";\n" <<
0084 " index++;\n" <<
0085 " start1++;\n" <<
0086 "}\n" <<
0087 "while(start2<end2)\n" <<
0088 "{\n" <<
0089 result[expr<uint_>("index")] <<
0090 " = " << first2[expr<uint_>("start2")] << ";\n" <<
0091 " index++;\n" <<
0092 " start2++;\n" <<
0093 "}\n";
0094 }
0095
0096 template<class InputIterator1, class InputIterator2,
0097 class InputIterator3, class InputIterator4,
0098 class OutputIterator>
0099 void set_range(InputIterator1 first1,
0100 InputIterator2 first2,
0101 InputIterator3 tile_first1,
0102 InputIterator3 tile_last1,
0103 InputIterator4 tile_first2,
0104 OutputIterator result)
0105 {
0106 typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
0107 ::boost::compute::less<value_type> less_than;
0108 set_range(first1, first2, tile_first1, tile_last1, tile_first2, result, less_than);
0109 }
0110
0111 event exec(command_queue &queue)
0112 {
0113 if(m_count == 0) {
0114 return event();
0115 }
0116
0117 return exec_1d(queue, 0, m_count);
0118 }
0119
0120 private:
0121 size_t m_count;
0122 };
0123
0124
0125
0126
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140 template<class InputIterator1, class InputIterator2, class OutputIterator, class Compare>
0141 inline OutputIterator
0142 merge_with_merge_path(InputIterator1 first1,
0143 InputIterator1 last1,
0144 InputIterator2 first2,
0145 InputIterator2 last2,
0146 OutputIterator result,
0147 Compare comp,
0148 command_queue &queue = system::default_queue())
0149 {
0150 typedef typename
0151 std::iterator_traits<OutputIterator>::difference_type result_difference_type;
0152
0153 size_t tile_size = 1024;
0154
0155 size_t count1 = iterator_range_size(first1, last1);
0156 size_t count2 = iterator_range_size(first2, last2);
0157
0158 vector<uint_> tile_a((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
0159 vector<uint_> tile_b((count1+count2+tile_size-1)/tile_size+1, queue.get_context());
0160
0161
0162 merge_path_kernel tiling_kernel;
0163 tiling_kernel.tile_size = static_cast<unsigned int>(tile_size);
0164 tiling_kernel.set_range(first1, last1, first2, last2,
0165 tile_a.begin()+1, tile_b.begin()+1, comp);
0166 fill_n(tile_a.begin(), 1, uint_(0), queue);
0167 fill_n(tile_b.begin(), 1, uint_(0), queue);
0168 tiling_kernel.exec(queue);
0169
0170 fill_n(tile_a.end()-1, 1, static_cast<uint_>(count1), queue);
0171 fill_n(tile_b.end()-1, 1, static_cast<uint_>(count2), queue);
0172
0173
0174 serial_merge_kernel merge_kernel;
0175 merge_kernel.tile_size = static_cast<unsigned int>(tile_size);
0176 merge_kernel.set_range(first1, first2, tile_a.begin(), tile_a.end(),
0177 tile_b.begin(), result, comp);
0178
0179 merge_kernel.exec(queue);
0180
0181 return result + static_cast<result_difference_type>(count1 + count2);
0182 }
0183
0184
0185 template<class InputIterator1, class InputIterator2, class OutputIterator>
0186 inline OutputIterator
0187 merge_with_merge_path(InputIterator1 first1,
0188 InputIterator1 last1,
0189 InputIterator2 first2,
0190 InputIterator2 last2,
0191 OutputIterator result,
0192 command_queue &queue = system::default_queue())
0193 {
0194 typedef typename std::iterator_traits<InputIterator1>::value_type value_type;
0195 ::boost::compute::less<value_type> less_than;
0196 return merge_with_merge_path(first1, last1, first2, last2, result, less_than, queue);
0197 }
0198
0199 }
0200 }
0201 }
0202
0203 #endif