File indexing completed on 2025-01-18 09:43:01
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #ifndef boost_numeric_ublas_opencl_elementwise_hpp_
0011 #define boost_numeric_ublas_opencl_elementwise_hpp_
0012
0013 #include <boost/numeric/ublas/opencl/library.hpp>
0014 #include <boost/numeric/ublas/opencl/vector.hpp>
0015 #include <boost/numeric/ublas/opencl/matrix.hpp>
0016
0017 namespace boost { namespace numeric { namespace ublas { namespace opencl {
0018
0019 namespace compute = boost::compute;
0020 namespace lambda = boost::compute::lambda;
0021
0022 template <typename T, typename L1, typename L2, typename L3, class O>
0023 void element_wise(ublas::matrix<T, L1, opencl::storage> const &a,
0024 ublas::matrix<T, L2, opencl::storage> const &b,
0025 ublas::matrix<T, L3, opencl::storage> &result,
0026 O op, compute::command_queue& queue)
0027 {
0028 assert(a.device() == b.device() &&
0029 a.device() == result.device() &&
0030 a.device() == queue.get_device());
0031 assert(a.size1() == b.size1() && a.size2() == b.size2());
0032
0033 compute::transform(a.begin(),
0034 a.end(),
0035 b.begin(),
0036 result.begin(),
0037 op,
0038 queue);
0039 queue.finish();
0040 }
0041
0042 template <typename T, typename L1, typename L2, typename L3, typename A, class O>
0043 void element_wise(ublas::matrix<T, L1, A> const &a,
0044 ublas::matrix<T, L2, A> const &b,
0045 ublas::matrix<T, L3, A> &result,
0046 O op,
0047 compute::command_queue &queue)
0048 {
0049 ublas::matrix<T, L1, opencl::storage> adev(a, queue);
0050 ublas::matrix<T, L2, opencl::storage> bdev(b, queue);
0051 ublas::matrix<T, L3, opencl::storage> rdev(a.size1(), b.size2(), queue.get_context());
0052 element_wise(adev, bdev, rdev, op, queue);
0053 rdev.to_host(result, queue);
0054 }
0055
0056 template <typename T, typename L1, typename L2, typename A, typename O>
0057 ublas::matrix<T, L1, A> element_wise(ublas::matrix<T, L1, A> const &a,
0058 ublas::matrix<T, L2, A> const &b,
0059 O op,
0060 compute::command_queue &queue)
0061 {
0062 ublas::matrix<T, L1, A> result(a.size1(), b.size2());
0063 element_wise(a, b, result, op, queue);
0064 return result;
0065 }
0066
0067 template <typename T, typename O>
0068 void element_wise(ublas::vector<T, opencl::storage> const &a,
0069 ublas::vector<T, opencl::storage> const &b,
0070 ublas::vector<T, opencl::storage> &result,
0071 O op,
0072 compute::command_queue& queue)
0073 {
0074 assert(a.device() == b.device() &&
0075 a.device() == result.device() &&
0076 a.device() == queue.get_device());
0077 assert(a.size() == b.size());
0078 compute::transform(a.begin(),
0079 a.end(),
0080 b.begin(),
0081 result.begin(),
0082 op,
0083 queue);
0084 queue.finish();
0085 }
0086
0087 template <typename T, typename A, typename O>
0088 void element_wise(ublas::vector<T, A> const &a,
0089 ublas::vector<T, A> const &b,
0090 ublas::vector<T, A>& result,
0091 O op,
0092 compute::command_queue &queue)
0093 {
0094 ublas::vector<T, opencl::storage> adev(a, queue);
0095 ublas::vector<T, opencl::storage> bdev(b, queue);
0096 ublas::vector<T, opencl::storage> rdev(a.size(), queue.get_context());
0097 element_wise(adev, bdev, rdev, op, queue);
0098 rdev.to_host(result, queue);
0099 }
0100
0101 template <typename T, typename A, typename O>
0102 ublas::vector<T, A> element_wise(ublas::vector<T, A> const &a,
0103 ublas::vector<T, A> const &b,
0104 O op,
0105 compute::command_queue &queue)
0106 {
0107 ublas::vector<T, A> result(a.size());
0108 element_wise(a, b, result, op, queue);
0109 return result;
0110 }
0111
0112 template <typename T, typename L1, typename L2, typename L3>
0113 void element_add(ublas::matrix<T, L1, opencl::storage> const &a,
0114 ublas::matrix<T, L2, opencl::storage> const &b,
0115 ublas::matrix<T, L3, opencl::storage> &result,
0116 compute::command_queue &queue)
0117 {
0118 element_wise(a, b, result, compute::plus<T>(), queue);
0119 }
0120
0121 template <typename T, typename L1, typename L2, typename L3, typename A>
0122 void element_add(ublas::matrix<T, L1, A> const &a,
0123 ublas::matrix<T, L2, A> const &b,
0124 ublas::matrix<T, L3, A> &result,
0125 compute::command_queue &queue)
0126 {
0127 element_wise(a, b, result, compute::plus<T>(), queue);
0128 }
0129
0130 template <typename T, typename L1, typename L2, typename A>
0131 ublas::matrix<T, L1, A> element_add(ublas::matrix<T, L1, A> const &a,
0132 ublas::matrix<T, L2, A> const &b,
0133 compute::command_queue &queue)
0134 {
0135 return element_wise(a, b, compute::plus<T>(), queue);
0136 }
0137
0138 template <typename T>
0139 void element_add(ublas::vector<T, opencl::storage> const &a,
0140 ublas::vector<T, opencl::storage> const &b,
0141 ublas::vector<T, opencl::storage> &result,
0142 compute::command_queue& queue)
0143 {
0144 element_wise(a, b, result, compute::plus<T>(), queue);
0145 }
0146
0147 template <typename T, typename A>
0148 void element_add(ublas::vector<T, A> const &a,
0149 ublas::vector<T, A> const &b,
0150 ublas::vector<T, A> &result,
0151 compute::command_queue &queue)
0152 {
0153 element_wise(a, b, result, compute::plus<T>(), queue);
0154 }
0155
0156 template <typename T, typename A>
0157 ublas::vector<T, A> element_add(ublas::vector<T, A> const &a,
0158 ublas::vector<T, A> const &b,
0159 compute::command_queue &queue)
0160 {
0161 return element_wise(a, b, compute::plus<T>(), queue);
0162 }
0163
0164 template<typename T, typename L>
0165 void element_add(ublas::matrix<T, L, opencl::storage> const &m, T value,
0166 ublas::matrix<T, L, opencl::storage> &result,
0167 compute::command_queue& queue)
0168 {
0169 assert(m.device() == result.device() && m.device() == queue.get_device());
0170 assert(m.size1() == result.size1() && m.size2() == result.size2());
0171 compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 + value, queue);
0172 queue.finish();
0173 }
0174
0175 template<typename T, typename L, typename A>
0176 void element_add(ublas::matrix<T, L, A> const &m, T value,
0177 ublas::matrix<T, L, A> &result,
0178 compute::command_queue& queue)
0179 {
0180 ublas::matrix<T, L, opencl::storage> mdev(m, queue);
0181 ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
0182 element_add(mdev, value, rdev, queue);
0183 rdev.to_host(result, queue);
0184 }
0185
0186 template<typename T, typename L, typename A>
0187 ublas::matrix<T, L, A> element_add(ublas::matrix<T, L, A> const &m, T value,
0188 compute::command_queue& queue)
0189 {
0190 ublas::matrix<T, L, A> result(m.size1(), m.size2());
0191 element_add(m, value, result, queue);
0192 return result;
0193 }
0194
0195 template<typename T>
0196 void element_add(ublas::vector<T, opencl::storage> const &v, T value,
0197 ublas::vector<T, opencl::storage> &result,
0198 compute::command_queue& queue)
0199 {
0200 assert(v.device() == result.device() && v.device() == queue.get_device());
0201 assert(v.size() == result.size());
0202 compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 + value, queue);
0203 queue.finish();
0204 }
0205
0206 template<typename T, typename A>
0207 void element_add(ublas::vector<T, A> const &v, T value,
0208 ublas::vector<T, A> &result,
0209 compute::command_queue& queue)
0210 {
0211 ublas::vector<T, opencl::storage> vdev(v, queue);
0212 ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
0213 element_add(vdev, value, rdev, queue);
0214 rdev.to_host(result, queue);
0215 }
0216
0217 template <typename T, typename A>
0218 ublas::vector<T, A> element_add(ublas::vector<T, A> const &v, T value,
0219 compute::command_queue& queue)
0220 {
0221 ublas::vector<T, A> result(v.size());
0222 element_add(v, value, result, queue);
0223 return result;
0224 }
0225
0226 template <typename T, typename L1, typename L2, typename L3>
0227 void element_sub(ublas::matrix<T, L1, opencl::storage> const &a,
0228 ublas::matrix<T, L2, opencl::storage> const &b,
0229 ublas::matrix<T, L3, opencl::storage> &result,
0230 compute::command_queue& queue)
0231 {
0232 element_wise(a, b, compute::minus<T>(), result, queue);
0233 }
0234
0235 template <typename T, typename L1, typename L2, typename L3, typename A>
0236 void element_sub(ublas::matrix<T, L1, A> const &a,
0237 ublas::matrix<T, L2, A> const &b,
0238 ublas::matrix<T, L3, A> &result,
0239 compute::command_queue &queue)
0240 {
0241 element_wise(a, b, result, compute::minus<T>(), queue);
0242 }
0243
0244 template <typename T, typename L1, typename L2, typename A>
0245 ublas::matrix<T, L1, A> element_sub(ublas::matrix<T, L1, A> const &a,
0246 ublas::matrix<T, L2, A> const &b,
0247 compute::command_queue &queue)
0248 {
0249 return element_wise(a, b, compute::minus<T>(), queue);
0250 }
0251
0252 template <typename T>
0253 void element_sub(ublas::vector<T, opencl::storage> const &a,
0254 ublas::vector<T, opencl::storage> const &b,
0255 ublas::vector<T, opencl::storage> &result,
0256 compute::command_queue& queue)
0257 {
0258 element_wise(a, b, result, compute::minus<T>(), queue);
0259 }
0260
0261 template <typename T, typename A>
0262 void element_sub(ublas::vector<T, A> const &a,
0263 ublas::vector<T, A> const &b,
0264 ublas::vector<T, A> &result,
0265 compute::command_queue &queue)
0266 {
0267 element_wise(a, b, result, compute::minus<T>(), queue);
0268 }
0269
0270 template <typename T, typename A>
0271 ublas::vector<T, A> element_sub(ublas::vector<T, A> const &a,
0272 ublas::vector<T, A> const &b,
0273 compute::command_queue &queue)
0274 {
0275 return element_wise(a, b, compute::minus<T>(), queue);
0276 }
0277
0278 template <typename T, typename L>
0279 void element_sub(ublas::matrix<T, L, opencl::storage> const &m, T value,
0280 ublas::matrix<T, L, opencl::storage> &result,
0281 compute::command_queue& queue)
0282 {
0283 assert(m.device() == result.device() && m.device() == queue.get_device());
0284 assert(m.size1() == result.size1() && m.size2() == result.size2());
0285 compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 - value, queue);
0286 queue.finish();
0287 }
0288
0289 template <typename T, typename L, typename A>
0290 void element_sub(ublas::matrix<T, L, A> const &m, T value,
0291 ublas::matrix<T, L, A> &result,
0292 compute::command_queue& queue)
0293 {
0294 ublas::matrix<T, L, opencl::storage> mdev(m, queue);
0295 ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
0296 element_sub(mdev, value, rdev, queue);
0297 rdev.to_host(result, queue);
0298 }
0299
0300 template <typename T, typename L, typename A>
0301 ublas::matrix<T, L, A> element_sub(ublas::matrix<T, L, A> const &m, T value,
0302 compute::command_queue& queue)
0303 {
0304 ublas::matrix<T, L, A> result(m.size1(), m.size2());
0305 element_sub(m, value, result, queue);
0306 return result;
0307 }
0308
0309 template <typename T>
0310 void element_sub(ublas::vector<T, opencl::storage> const &v, T value,
0311 ublas::vector<T, opencl::storage> &result,
0312 compute::command_queue& queue)
0313 {
0314 assert(v.device() == result.device() && v.device() == queue.get_device());
0315 assert(v.size() == result.size());
0316 compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 - value, queue);
0317 queue.finish();
0318 }
0319
0320 template <typename T, typename A>
0321 void element_sub(ublas::vector<T, A> const &v, T value,
0322 ublas::vector<T, A> &result,
0323 compute::command_queue& queue)
0324 {
0325 ublas::vector<T, opencl::storage> vdev(v, queue);
0326 ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
0327 element_sub(vdev, value, rdev, queue);
0328 rdev.to_host(result, queue);
0329 }
0330
0331 template <typename T, typename A>
0332 ublas::vector<T, A> element_sub(ublas::vector<T, A> const &v, T value,
0333 compute::command_queue& queue)
0334 {
0335 ublas::vector<T, A> result(v.size());
0336 element_sub(v, value, result, queue);
0337 return result;
0338 }
0339
0340 template <typename T, typename L1, typename L2, typename L3>
0341 void element_prod(ublas::matrix<T, L1, opencl::storage> const &a,
0342 ublas::matrix<T, L2, opencl::storage> const &b,
0343 ublas::matrix<T, L3, opencl::storage> &result,
0344 compute::command_queue& queue)
0345 {
0346 element_wise(a, b, result, compute::multiplies<T>(), queue);
0347 }
0348
0349 template <typename T, typename L1, typename L2, typename L3, typename A>
0350 void element_prod(ublas::matrix<T, L1, A> const &a,
0351 ublas::matrix<T, L2, A> const &b,
0352 ublas::matrix<T, L3, A> &result,
0353 compute::command_queue &queue)
0354 {
0355 element_wise(a, b, result, compute::multiplies<T>(), queue);
0356 }
0357
0358 template <typename T, typename L1, typename L2, typename A>
0359 ublas::matrix<T, L1, A> element_prod(ublas::matrix<T, L1, A> const &a,
0360 ublas::matrix<T, L2, A> const &b,
0361 compute::command_queue &queue)
0362 {
0363 return element_wise(a, b, compute::multiplies<T>(), queue);
0364 }
0365
0366 template <typename T>
0367 void element_prod(ublas::vector<T, opencl::storage> const &a,
0368 ublas::vector<T, opencl::storage> const &b,
0369 ublas::vector<T, opencl::storage> &result,
0370 compute::command_queue& queue)
0371 {
0372 element_wise(a, b, result, compute::multiplies<T>(), queue);
0373 }
0374
0375 template <typename T, typename A>
0376 void element_prod(ublas::vector<T, A> const &a,
0377 ublas::vector<T, A> const &b,
0378 ublas::vector<T, A> &result,
0379 compute::command_queue &queue)
0380 {
0381 element_wise(a, b, result, compute::multiplies<T>(), queue);
0382 }
0383
0384 template <typename T, typename A>
0385 ublas::vector<T, A> element_prod(ublas::vector<T, A> const &a,
0386 ublas::vector<T, A> const &b,
0387 compute::command_queue &queue)
0388 {
0389 return element_wise(a, b, compute::multiplies<T>(), queue);
0390 }
0391
0392 template <typename T, typename L>
0393 void element_scale(ublas::matrix<T, L, opencl::storage> const &m, T value,
0394 ublas::matrix<T, L, opencl::storage> &result,
0395 compute::command_queue& queue)
0396 {
0397 assert(m.device() == result.device() && m.device() == queue.get_device());
0398 assert(m.size1() == result.size1() && m.size2() == result.size2());
0399 compute::transform(m.begin(), m.end(), result.begin(), lambda::_1 * value, queue);
0400 queue.finish();
0401 }
0402
0403 template <typename T, typename L, typename A>
0404 void element_scale(ublas::matrix<T, L, A> const &m, T value,
0405 ublas::matrix<T, L, A> &result,
0406 compute::command_queue& queue)
0407 {
0408 ublas::matrix<T, L, opencl::storage> mdev(m, queue);
0409 ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
0410 element_scale(mdev, value, rdev, queue);
0411 rdev.to_host(result, queue);
0412 }
0413
0414 template <typename T, typename L, typename A>
0415 ublas::matrix<T, L, A> element_scale(ublas::matrix<T, L, A> const &m, T value,
0416 compute::command_queue& queue)
0417 {
0418 ublas::matrix<T, L, A> result(m.size1(), m.size2());
0419 element_scale(m, value, result, queue);
0420 return result;
0421 }
0422
0423 template <typename T>
0424 void element_scale(ublas::vector<T, opencl::storage> const &v, T value,
0425 ublas::vector<T, opencl::storage> &result,
0426 compute::command_queue& queue)
0427 {
0428 assert(v.device() == result.device() && v.device() == queue.get_device());
0429 assert(v.size() == result.size());
0430 compute::transform(v.begin(), v.end(), result.begin(), lambda::_1 * value, queue);
0431 queue.finish();
0432 }
0433
0434 template <typename T, typename A>
0435 void element_scale(ublas::vector<T, A> const &v, T value,
0436 ublas::vector<T, A> & result,
0437 compute::command_queue& queue)
0438 {
0439 ublas::vector<T, opencl::storage> vdev(v, queue);
0440 ublas::vector<T, opencl::storage> rdev(v.size(), queue.get_context());
0441 element_scale(vdev, value, rdev, queue);
0442 rdev.to_host(result, queue);
0443 }
0444
0445 template <typename T, typename A>
0446 ublas::vector<T,A> element_scale(ublas::vector<T, A> const &v, T value,
0447 compute::command_queue& queue)
0448 {
0449 ublas::vector<T, A> result(v.size());
0450 element_scale(v, value, result, queue);
0451 return result;
0452 }
0453
0454 template <typename T, typename L1, typename L2, typename L3>
0455 void element_div(ublas::matrix<T, L1, opencl::storage> const &a,
0456 ublas::matrix<T, L2, opencl::storage> const &b,
0457 ublas::matrix<T, L3, opencl::storage> &result,
0458 compute::command_queue& queue)
0459 {
0460 element_wise(a, b, result, compute::divides<T>(), queue);
0461 }
0462
0463 template <typename T, typename L1, typename L2, typename L3, typename A>
0464 void element_div(ublas::matrix<T, L1, A> const &a,
0465 ublas::matrix<T, L2, A> const &b,
0466 ublas::matrix<T, L3, A> &result,
0467 compute::command_queue &queue)
0468 {
0469 element_wise(a, b, result, compute::divides<T>(), queue);
0470 }
0471
0472 template <typename T, typename L1, typename L2, typename A>
0473 ublas::matrix<T, L1, A> element_div(ublas::matrix<T, L1, A> const &a,
0474 ublas::matrix<T, L2, A> const &b,
0475 compute::command_queue &queue)
0476 {
0477 return element_wise(a, b, compute::divides<T>(), queue);
0478 }
0479
0480 template <typename T>
0481 void element_div(ublas::vector<T, opencl::storage> const &a,
0482 ublas::vector<T, opencl::storage> const &b,
0483 ublas::vector<T, opencl::storage> &result,
0484 compute::command_queue& queue)
0485 {
0486 element_wise(a, b, result, compute::divides<T>(), queue);
0487 }
0488
0489 template <typename T, typename A>
0490 void element_div(ublas::vector<T, A> const &a,
0491 ublas::vector<T, A> const &b,
0492 ublas::vector<T, A> &result,
0493 compute::command_queue &queue)
0494 {
0495 element_wise(a, b, result, compute::divides<T>(), queue);
0496 }
0497
0498 template <typename T, typename A>
0499 ublas::vector<T, A> element_div(ublas::vector<T, A> const &a,
0500 ublas::vector<T, A> const &b,
0501 compute::command_queue &queue)
0502 {
0503 return element_wise(a, b, compute::divides<T>(), queue);
0504 }
0505
0506 }}}}
0507
0508 #endif