File indexing completed on 2025-01-18 09:43:01
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #ifndef boost_numeric_ublas_opencl_prod_hpp_
0011 #define boost_numeric_ublas_opencl_prod_hpp_
0012
0013 #include <boost/numeric/ublas/opencl/library.hpp>
0014 #include <boost/numeric/ublas/opencl/vector.hpp>
0015 #include <boost/numeric/ublas/opencl/matrix.hpp>
0016 #include <boost/numeric/ublas/opencl/transpose.hpp>
0017 #include <boost/compute/buffer.hpp>
0018
0019 namespace boost { namespace numeric { namespace ublas { namespace opencl {
0020
0021 #define ONE_DOUBLE_COMPLEX {{1.0, 00.0}}
0022 #define ONE_FLOAT_COMPLEX {{1.0f, 00.0f}}
0023
0024 template <typename T, typename L1, typename L2>
0025 typename std::enable_if<is_numeric<T>::value>::type
0026 prod(ublas::matrix<T, L1, opencl::storage> const &a,
0027 ublas::matrix<T, L2, opencl::storage> const &b,
0028 ublas::matrix<T, L1, opencl::storage> &result,
0029 compute::command_queue &queue)
0030 {
0031 assert(a.device() == b.device() &&
0032 a.device() == result.device() &&
0033 a.device() == queue.get_device());
0034 assert(a.size2() == b.size1());
0035
0036 result.fill(0, queue);
0037
0038
0039 std::unique_ptr<ublas::matrix<T, L1, opencl::storage>> bl1;
0040
0041 cl_event event = NULL;
0042
0043 cl_mem buffer_a = a.begin().get_buffer().get();
0044 cl_mem buffer_b = b.begin().get_buffer().get();
0045 cl_mem buffer_result = result.begin().get_buffer().get();
0046
0047 if (!(std::is_same<L1, L2>::value))
0048 {
0049 bl1.reset(new ublas::matrix<T, L1, opencl::storage>(b.size1(), b.size2(), queue.get_context()));
0050 change_layout(b, *bl1, queue);
0051 buffer_b = bl1->begin().get_buffer().get();
0052 }
0053
0054 clblasOrder Order = std::is_same<L1, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor;
0055 size_t lda = Order == clblasRowMajor ? a.size2() : a.size1();
0056 size_t ldb = Order == clblasRowMajor ? b.size2() : a.size2();
0057 size_t ldc = Order == clblasRowMajor ? b.size2() : a.size1();
0058
0059 if (std::is_same<T, float>::value)
0060 clblasSgemm(Order, clblasNoTrans, clblasNoTrans,
0061 a.size1(), b.size2(), a.size2(),
0062 1, buffer_a, 0, lda,
0063 buffer_b, 0, ldb, 1,
0064 buffer_result, 0, ldc,
0065 1, &(queue.get()), 0, NULL, &event);
0066 else if (std::is_same<T, double>::value)
0067 clblasDgemm(Order, clblasNoTrans, clblasNoTrans,
0068 a.size1(), b.size2(), a.size2(),
0069 1, buffer_a, 0, lda,
0070 buffer_b, 0, ldb, 1,
0071 buffer_result, 0, ldc,
0072 1, &(queue.get()), 0, NULL, &event);
0073 else if (std::is_same<T, std::complex<float>>::value)
0074 clblasCgemm(Order, clblasNoTrans, clblasNoTrans,
0075 a.size1(), b.size2(), a.size2(),
0076 ONE_FLOAT_COMPLEX, buffer_a, 0, lda,
0077 buffer_b, 0, ldb, ONE_FLOAT_COMPLEX,
0078 buffer_result, 0, ldc,
0079 1, &(queue.get()), 0, NULL, &event);
0080 else if (std::is_same<T, std::complex<double>>::value)
0081 clblasZgemm(Order, clblasNoTrans, clblasNoTrans,
0082 a.size1(), b.size2(), a.size2(),
0083 ONE_DOUBLE_COMPLEX, buffer_a, 0, lda,
0084 buffer_b, 0, ldb, ONE_DOUBLE_COMPLEX,
0085 buffer_result, 0, ldc,
0086 1, &(queue.get()), 0, NULL, &event);
0087 clWaitForEvents(1, &event);
0088 }
0089
0090 template <typename T, typename L1, typename L2, typename A>
0091 typename std::enable_if<is_numeric<T>::value>::type
0092 prod(ublas::matrix<T, L1, A> const &a,
0093 ublas::matrix<T, L2, A> const &b,
0094 ublas::matrix<T, L1, A> &result,
0095 compute::command_queue &queue)
0096 {
0097 ublas::matrix<T, L1, opencl::storage> adev(a, queue);
0098 ublas::matrix<T, L2, opencl::storage> bdev(b, queue);
0099 ublas::matrix<T, L1, opencl::storage> rdev(a.size1(), b.size2(), queue.get_context());
0100 prod(adev, bdev, rdev, queue);
0101 rdev.to_host(result,queue);
0102 }
0103
0104 template <typename T, typename L1, typename L2, typename A>
0105 typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L1, A>>::type
0106 prod(ublas::matrix<T, L1, A> const &a,
0107 ublas::matrix<T, L2, A> const &b,
0108 compute::command_queue &queue)
0109 {
0110 ublas::matrix<T, L1, A> result(a.size1(), b.size2());
0111 prod(a, b, result, queue);
0112 return result;
0113 }
0114
0115 template <typename T, typename L>
0116 typename std::enable_if<is_numeric<T>::value>::type
0117 prod(ublas::matrix<T, L, opencl::storage> const &a,
0118 ublas::vector<T, opencl::storage> const &b,
0119 ublas::vector<T, opencl::storage> &result,
0120 compute::command_queue &queue)
0121 {
0122 assert(a.device() == b.device() &&
0123 a.device() == result.device() &&
0124 a.device() == queue.get_device());
0125 assert(a.size2() == b.size());
0126 result.fill(0, queue);
0127
0128 cl_event event = NULL;
0129 clblasOrder Order = std::is_same<L, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor;
0130 int lda = Order == clblasRowMajor ? a.size2() : a.size1();
0131 int ldb = Order == clblasRowMajor ? 1 : a.size2();
0132 int ldc = Order == clblasRowMajor ? 1 : a.size1();
0133
0134 if (std::is_same<T, float>::value)
0135 clblasSgemm(Order, clblasNoTrans, clblasNoTrans,
0136 a.size1(), 1, a.size2(),
0137 1, a.begin().get_buffer().get(), 0, lda,
0138 b.begin().get_buffer().get(), 0, ldb, 1,
0139 result.begin().get_buffer().get(), 0, ldc,
0140 1, &(queue.get()), 0, NULL, &event);
0141 else if (std::is_same<T, double>::value)
0142 clblasDgemm(Order, clblasNoTrans, clblasNoTrans,
0143 a.size1(), 1, a.size2(),
0144 1, a.begin().get_buffer().get(), 0, lda,
0145 b.begin().get_buffer().get(), 0, ldb, 1,
0146 result.begin().get_buffer().get(), 0, ldc,
0147 1, &(queue.get()), 0, NULL, &event);
0148 else if (std::is_same<T, std::complex<float>>::value)
0149 clblasCgemm(Order, clblasNoTrans, clblasNoTrans,
0150 a.size1(), 1, a.size2(),
0151 ONE_FLOAT_COMPLEX, a.begin().get_buffer().get(), 0, lda,
0152 b.begin().get_buffer().get(), 0, ldb, ONE_FLOAT_COMPLEX,
0153 result.begin().get_buffer().get(), 0, ldc,
0154 1, &(queue.get()), 0, NULL, &event);
0155 else if (std::is_same<T, std::complex<double>>::value)
0156 clblasZgemm(Order, clblasNoTrans, clblasNoTrans,
0157 a.size1(), 1, a.size2(),
0158 ONE_DOUBLE_COMPLEX, a.begin().get_buffer().get(), 0, lda,
0159 b.begin().get_buffer().get(), 0, ldb, ONE_DOUBLE_COMPLEX,
0160 result.begin().get_buffer().get(), 0, ldc,
0161 1, &(queue.get()), 0, NULL, &event);
0162 clWaitForEvents(1, &event);
0163 }
0164
0165 template <typename T, typename L, typename A>
0166 typename std::enable_if<is_numeric<T>::value>::type
0167 prod(ublas::matrix<T, L, A> const &a,
0168 ublas::vector<T, A> const &b,
0169 ublas::vector<T, A> &result,
0170 compute::command_queue &queue)
0171 {
0172 ublas::matrix<T, L, opencl::storage> adev(a, queue);
0173 ublas::vector<T, opencl::storage> bdev(b, queue);
0174 ublas::vector<T, opencl::storage> rdev(a.size1(), queue.get_context());
0175 prod(adev, bdev, rdev, queue);
0176 rdev.to_host(result, queue);
0177 }
0178
0179 template <typename T, typename L, typename A>
0180 typename std::enable_if<is_numeric<T>::value, ublas::vector<T, A>>::type
0181 prod(ublas::matrix<T, L, A> const &a,
0182 ublas::vector<T, A> const &b,
0183 compute::command_queue &queue)
0184 {
0185 ublas::vector<T, A> result(a.size1());
0186 prod(a, b, result, queue);
0187 return result;
0188 }
0189
0190 template <typename T, typename L>
0191 typename std::enable_if<is_numeric<T>::value>::type
0192 prod(ublas::vector<T, opencl::storage> const &a,
0193 ublas::matrix<T, L, opencl::storage> const &b,
0194 ublas::vector<T, opencl::storage> &result,
0195 compute::command_queue &queue)
0196 {
0197 assert(a.device() == b.device() &&
0198 a.device() == result.device() &&
0199 a.device() == queue.get_device());
0200 assert(a.size() == b.size1());
0201 result.fill(0, queue);
0202 cl_event event = NULL;
0203 clblasOrder Order = std::is_same<L, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor;
0204 size_t lda = Order == clblasRowMajor ? a.size() : 1;
0205 size_t ldb = Order == clblasRowMajor ? b.size2() : a.size();
0206 size_t ldc = Order == clblasRowMajor ? b.size2() : 1;
0207
0208 if (std::is_same<T, float>::value)
0209 clblasSgemm(Order, clblasNoTrans, clblasNoTrans,
0210 1, b.size2(), a.size(),
0211 1, a.begin().get_buffer().get(), 0, lda,
0212 b.begin().get_buffer().get(), 0, ldb, 1,
0213 result.begin().get_buffer().get(), 0, ldc,
0214 1, &(queue.get()), 0, NULL, &event);
0215 else if (std::is_same<T, double>::value)
0216 clblasDgemm(Order, clblasNoTrans, clblasNoTrans,
0217 1, b.size2(), a.size(),
0218 1, a.begin().get_buffer().get(), 0, lda,
0219 b.begin().get_buffer().get(), 0, ldb, 1,
0220 result.begin().get_buffer().get(), 0, ldc,
0221 1, &(queue.get()), 0, NULL, &event);
0222 else if (std::is_same<T, std::complex<float>>::value)
0223 clblasCgemm(Order, clblasNoTrans, clblasNoTrans,
0224 1, b.size2(), a.size(),
0225 ONE_FLOAT_COMPLEX, a.begin().get_buffer().get(), 0, lda,
0226 b.begin().get_buffer().get(), 0, ldb, ONE_FLOAT_COMPLEX,
0227 result.begin().get_buffer().get(), 0, ldc,
0228 1, &(queue.get()), 0, NULL, &event);
0229 else if (std::is_same<T, std::complex<double>>::value)
0230 clblasZgemm(Order, clblasNoTrans, clblasNoTrans,
0231 1, b.size2(), a.size(),
0232 ONE_DOUBLE_COMPLEX, a.begin().get_buffer().get(), 0, lda,
0233 b.begin().get_buffer().get(), 0, ldb, ONE_DOUBLE_COMPLEX,
0234 result.begin().get_buffer().get(), 0, ldc,
0235 1, &(queue.get()), 0, NULL, &event);
0236 clWaitForEvents(1, &event);
0237 }
0238
0239 template <class T, class L, class A>
0240 typename std::enable_if<is_numeric<T>::value>::type
0241 prod(ublas::vector<T, A> const &a,
0242 ublas::matrix<T, L, A> const &b,
0243 ublas::vector<T, A> &result,
0244 compute::command_queue &queue)
0245 {
0246 ublas::vector<T, opencl::storage> adev(a, queue);
0247 ublas::matrix<T, L, opencl::storage> bdev(b, queue);
0248 ublas::vector<T, opencl::storage> rdev(b.size2(), queue.get_context());
0249 prod(adev, bdev, rdev, queue);
0250 rdev.to_host(result, queue);
0251 }
0252
0253 template <class T, class L, class A>
0254 typename std::enable_if<is_numeric<T>::value, ublas::vector<T, A>>::type
0255 prod(ublas::vector<T, A> const &a,
0256 ublas::matrix<T, L, A> const &b,
0257 compute::command_queue &queue)
0258 {
0259 ublas::vector<T, A> result(b.size2());
0260 prod(a, b, result, queue);
0261 return result;
0262 }
0263
0264 template<class T>
0265 typename std::enable_if<std::is_fundamental<T>::value, T>::type
0266 inner_prod(ublas::vector<T, opencl::storage> const &a,
0267 ublas::vector<T, opencl::storage> const &b,
0268 compute::command_queue &queue)
0269 {
0270 assert(a.device() == b.device() && a.device() == queue.get_device());
0271 assert(a.size() == b.size());
0272 return compute::inner_product(a.begin(), a.end(), b.begin(), T(0), queue);
0273 }
0274
0275 template<class T, class A>
0276 typename std::enable_if<std::is_fundamental<T>::value, T>::type
0277 inner_prod(ublas::vector<T, A> const &a,
0278 ublas::vector<T, A> const &b,
0279 compute::command_queue& queue)
0280 {
0281 ublas::vector<T, opencl::storage> adev(a, queue);
0282 ublas::vector<T, opencl::storage> bdev(b, queue);
0283 return inner_prod(adev, bdev, queue);
0284 }
0285
0286 template <class T, class L>
0287 typename std::enable_if<is_numeric<T>::value>::type
0288 outer_prod(ublas::vector<T, opencl::storage> const &a,
0289 ublas::vector<T, opencl::storage> const &b,
0290 ublas::matrix<T, L, opencl::storage> &result,
0291 compute::command_queue & queue)
0292 {
0293 assert(a.device() == b.device() &&
0294 a.device() == result.device() &&
0295 a.device() == queue.get_device());
0296 result.fill(0, queue);
0297 cl_event event = NULL;
0298 clblasOrder Order = std::is_same<L, ublas::basic_row_major<> >::value ? clblasRowMajor : clblasColumnMajor;
0299 size_t lda = Order == clblasRowMajor ? 1 : a.size();
0300 size_t ldb = Order == clblasRowMajor ? b.size() : 1;
0301 size_t ldc = Order == clblasRowMajor ? b.size() : a.size();
0302
0303 if (std::is_same<T, float>::value)
0304 clblasSgemm(Order, clblasNoTrans, clblasNoTrans,
0305 a.size(), b.size(), 1,
0306 1, a.begin().get_buffer().get(), 0, lda,
0307 b.begin().get_buffer().get(), 0, ldb, 1,
0308 result.begin().get_buffer().get(), 0, ldc,
0309 1, &(queue.get()), 0, NULL, &event);
0310 else if (std::is_same<T, double>::value)
0311 clblasDgemm(Order, clblasNoTrans, clblasNoTrans,
0312 a.size(), b.size(), 1,
0313 1, a.begin().get_buffer().get(), 0, lda,
0314 b.begin().get_buffer().get(), 0, ldb, 1,
0315 result.begin().get_buffer().get(), 0, ldc,
0316 1, &(queue.get()), 0, NULL, &event);
0317 else if (std::is_same<T, std::complex<float>>::value)
0318 clblasCgemm(Order, clblasNoTrans, clblasNoTrans,
0319 a.size(), b.size(), 1,
0320 ONE_FLOAT_COMPLEX, a.begin().get_buffer().get(), 0, lda,
0321 b.begin().get_buffer().get(), 0, ldb, ONE_FLOAT_COMPLEX,
0322 result.begin().get_buffer().get(), 0, ldc,
0323 1, &(queue.get()), 0, NULL, &event);
0324 else if (std::is_same<T, std::complex<double>>::value)
0325 clblasZgemm(Order, clblasNoTrans, clblasNoTrans,
0326 a.size(), b.size(), 1,
0327 ONE_DOUBLE_COMPLEX, a.begin().get_buffer().get(), 0, lda,
0328 b.begin().get_buffer().get(), 0, ldb, ONE_DOUBLE_COMPLEX,
0329 result.begin().get_buffer().get(), 0, ldc,
0330 1, &(queue.get()), 0, NULL, &event);
0331 clWaitForEvents(1, &event);
0332 }
0333
0334 template <class T, class L, class A>
0335 typename std::enable_if<is_numeric<T>::value>::type
0336 outer_prod(ublas::vector<T, A> const &a,
0337 ublas::vector<T, A> const &b,
0338 ublas::matrix<T, L, A> &result,
0339 compute::command_queue &queue)
0340 {
0341 ublas::vector<T, opencl::storage> adev(a, queue);
0342 ublas::vector<T, opencl::storage> bdev(b, queue);
0343 ublas::matrix<T, L, opencl::storage> rdev(a.size(), b.size(), queue.get_context());
0344 outer_prod(adev, bdev, rdev, queue);
0345 rdev.to_host(result, queue);
0346 }
0347
0348 template <class T,class L = ublas::basic_row_major<>, class A>
0349 typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L, A>>::type
0350 outer_prod(ublas::vector<T, A> const &a,
0351 ublas::vector<T, A> const &b,
0352 compute::command_queue &queue)
0353 {
0354 ublas::matrix<T, L, A> result(a.size(), b.size());
0355 outer_prod(a, b, result, queue);
0356 return result;
0357 }
0358
0359 #undef ONE_DOUBLE_COMPLEX
0360 #undef ONE_FLOAT_COMPLEX
0361
0362 }}}}
0363
0364 #endif