File indexing completed on 2025-01-18 09:30:05
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef BOOST_COMPUTE_KERNEL_HPP
0012 #define BOOST_COMPUTE_KERNEL_HPP
0013
0014 #include <string>
0015
0016 #include <boost/assert.hpp>
0017 #include <boost/utility/enable_if.hpp>
0018 #include <boost/optional.hpp>
0019
0020 #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
0021
0022 #include <boost/compute/config.hpp>
0023 #include <boost/compute/exception.hpp>
0024 #include <boost/compute/program.hpp>
0025 #include <boost/compute/platform.hpp>
0026 #include <boost/compute/type_traits/is_fundamental.hpp>
0027 #include <boost/compute/detail/diagnostic.hpp>
0028 #include <boost/compute/detail/get_object_info.hpp>
0029 #include <boost/compute/detail/assert_cl_success.hpp>
0030
0031 namespace boost {
0032 namespace compute {
0033 namespace detail {
0034
0035 template<class T> struct set_kernel_arg;
0036
0037 }
0038
0039
0040
0041
0042
0043 class kernel
0044 {
0045 public:
0046
0047 kernel()
0048 : m_kernel(0)
0049 {
0050 }
0051
0052
0053
0054 explicit kernel(cl_kernel kernel, bool retain = true)
0055 : m_kernel(kernel)
0056 {
0057 if(m_kernel && retain){
0058 clRetainKernel(m_kernel);
0059 }
0060 }
0061
0062
0063 kernel(const program &program, const std::string &name)
0064 {
0065 cl_int error = 0;
0066 m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
0067
0068 if(!m_kernel){
0069 BOOST_THROW_EXCEPTION(opencl_error(error));
0070 }
0071 }
0072
0073
0074 kernel(const kernel &other)
0075 : m_kernel(other.m_kernel)
0076 {
0077 if(m_kernel){
0078 clRetainKernel(m_kernel);
0079 }
0080 }
0081
0082
0083 kernel& operator=(const kernel &other)
0084 {
0085 if(this != &other){
0086 if(m_kernel){
0087 clReleaseKernel(m_kernel);
0088 }
0089
0090 m_kernel = other.m_kernel;
0091
0092 if(m_kernel){
0093 clRetainKernel(m_kernel);
0094 }
0095 }
0096
0097 return *this;
0098 }
0099
0100 #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
0101
0102 kernel(kernel&& other) BOOST_NOEXCEPT
0103 : m_kernel(other.m_kernel)
0104 {
0105 other.m_kernel = 0;
0106 }
0107
0108
0109 kernel& operator=(kernel&& other) BOOST_NOEXCEPT
0110 {
0111 if(m_kernel){
0112 clReleaseKernel(m_kernel);
0113 }
0114
0115 m_kernel = other.m_kernel;
0116 other.m_kernel = 0;
0117
0118 return *this;
0119 }
0120 #endif
0121
0122
0123 ~kernel()
0124 {
0125 if(m_kernel){
0126 BOOST_COMPUTE_ASSERT_CL_SUCCESS(
0127 clReleaseKernel(m_kernel)
0128 );
0129 }
0130 }
0131
0132 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0133
0134
0135
0136
0137
0138
0139 kernel clone()
0140 {
0141 cl_int ret = 0;
0142 cl_kernel k = clCloneKernel(m_kernel, &ret);
0143 return kernel(k, false);
0144 }
0145 #endif
0146
0147
0148 cl_kernel& get() const
0149 {
0150 return const_cast<cl_kernel &>(m_kernel);
0151 }
0152
0153
0154 std::string name() const
0155 {
0156 return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
0157 }
0158
0159
0160 size_t arity() const
0161 {
0162 return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
0163 }
0164
0165
0166 program get_program() const
0167 {
0168 return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
0169 }
0170
0171
0172 context get_context() const
0173 {
0174 return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
0175 }
0176
0177
0178
0179
0180 template<class T>
0181 T get_info(cl_kernel_info info) const
0182 {
0183 return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
0184 }
0185
0186
0187 template<int Enum>
0188 typename detail::get_object_info_type<kernel, Enum>::type
0189 get_info() const;
0190
0191 #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208 template<class T>
0209 T get_arg_info(size_t index, cl_kernel_arg_info info) const
0210 {
0211 return detail::get_object_info<T>(
0212 clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
0213 );
0214 }
0215
0216
0217 template<int Enum>
0218 typename detail::get_object_info_type<kernel, Enum>::type
0219 get_arg_info(size_t index) const;
0220 #endif
0221
0222
0223
0224
0225 template<class T>
0226 T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
0227 {
0228 return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
0229 }
0230
0231 #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0232
0233
0234
0235
0236
0237
0238
0239 template<class T>
0240 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0241 const size_t input_size, const void * input) const
0242 {
0243 if(device.check_version(2, 1))
0244 {
0245 return detail::get_object_info<T>(
0246 clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
0247 );
0248 }
0249 else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
0250 {
0251 return boost::optional<T>();
0252 }
0253
0254
0255 else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
0256 {
0257 return boost::optional<T>();
0258 }
0259
0260 BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
0261 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
0262 reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
0263 reinterpret_cast<size_t>(
0264 device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
0265 )
0266 );
0267 BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
0268
0269 return detail::get_object_info<T>(
0270 clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
0271 );
0272 }
0273
0274
0275 template<class T>
0276 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
0277 {
0278 return get_sub_group_info<T>(device, info, 0, 0);
0279 }
0280
0281
0282 template<class T>
0283 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0284 const size_t input) const
0285 {
0286 return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
0287 }
0288 #endif
0289
0290 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
0291
0292
0293
0294
0295
0296 template<class T>
0297 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0298 const size_t input_size, const void * input) const
0299 {
0300 if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
0301 {
0302 return boost::optional<T>();
0303 }
0304
0305 BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
0306 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
0307 reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
0308 reinterpret_cast<size_t>(
0309 device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
0310 )
0311 );
0312 BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
0313
0314 return detail::get_object_info<T>(
0315 clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
0316 );
0317 }
0318 #endif
0319
0320 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0321
0322 template<class T>
0323 boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0324 const std::vector<size_t> input) const
0325 {
0326 BOOST_ASSERT(input.size() > 0);
0327 return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
0328 }
0329 #endif
0330
0331
0332
0333
0334 void set_arg(size_t index, size_t size, const void *value)
0335 {
0336 BOOST_ASSERT(index < arity());
0337
0338 cl_int ret = clSetKernelArg(m_kernel,
0339 static_cast<cl_uint>(index),
0340 size,
0341 value);
0342 if(ret != CL_SUCCESS){
0343 BOOST_THROW_EXCEPTION(opencl_error(ret));
0344 }
0345 }
0346
0347
0348
0349
0350
0351
0352
0353
0354
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364
0365
0366 template<class T>
0367 void set_arg(size_t index, const T &value)
0368 {
0369
0370
0371 detail::set_kernel_arg<T>()(*this, index, value);
0372 }
0373
0374
0375 void set_arg(size_t index, const cl_mem mem)
0376 {
0377 set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
0378 }
0379
0380
0381 void set_arg(size_t index, const cl_sampler sampler)
0382 {
0383 set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
0384 }
0385
0386
0387 void set_arg_svm_ptr(size_t index, void* ptr)
0388 {
0389 #ifdef BOOST_COMPUTE_CL_VERSION_2_0
0390 cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
0391 if(ret != CL_SUCCESS){
0392 BOOST_THROW_EXCEPTION(opencl_error(ret));
0393 }
0394 #else
0395 (void) index;
0396 (void) ptr;
0397 BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
0398 #endif
0399 }
0400
0401 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
0402
0403 template<class... T>
0404 void set_args(T&&... args)
0405 {
0406 BOOST_ASSERT(sizeof...(T) <= arity());
0407
0408 _set_args<0>(args...);
0409 }
0410 #endif
0411
0412 #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0413
0414
0415
0416
0417
0418 void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
0419 {
0420 cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
0421 if(ret != CL_SUCCESS){
0422 BOOST_THROW_EXCEPTION(opencl_error(ret));
0423 }
0424 }
0425 #endif
0426
0427
0428 bool operator==(const kernel &other) const
0429 {
0430 return m_kernel == other.m_kernel;
0431 }
0432
0433
0434 bool operator!=(const kernel &other) const
0435 {
0436 return m_kernel != other.m_kernel;
0437 }
0438
0439
0440 operator cl_kernel() const
0441 {
0442 return m_kernel;
0443 }
0444
0445
0446 static kernel create_with_source(const std::string &source,
0447 const std::string &name,
0448 const context &context)
0449 {
0450 return program::build_with_source(source, context).create_kernel(name);
0451 }
0452
0453 private:
0454 #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
0455
0456 template<size_t N>
0457 void _set_args()
0458 {
0459 }
0460
0461
0462 template<size_t N, class T, class... Args>
0463 void _set_args(T&& arg, Args&&... rest)
0464 {
0465 set_arg(N, arg);
0466 _set_args<N+1>(rest...);
0467 }
0468 #endif
0469
0470 private:
0471 cl_kernel m_kernel;
0472 };
0473
0474 inline kernel program::create_kernel(const std::string &name) const
0475 {
0476 return kernel(*this, name);
0477 }
0478
0479
0480 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
0481 ((std::string, CL_KERNEL_FUNCTION_NAME))
0482 ((cl_uint, CL_KERNEL_NUM_ARGS))
0483 ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
0484 ((cl_context, CL_KERNEL_CONTEXT))
0485 ((cl_program, CL_KERNEL_PROGRAM))
0486 )
0487
0488 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
0489 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
0490 ((std::string, CL_KERNEL_ATTRIBUTES))
0491 )
0492 #endif
0493
0494
0495 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
0496 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
0497 namespace detail { \
0498 template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
0499 } \
0500 template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
0501 return get_arg_info<result_type>(index, value); \
0502 }
0503
0504 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
0505 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
0506 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
0507 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
0508 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
0509 #endif
0510
0511 namespace detail {
0512
0513
0514 template<class T>
0515 struct set_kernel_arg
0516 {
0517 typename boost::enable_if<is_fundamental<T> >::type
0518 operator()(kernel &kernel_, size_t index, const T &value)
0519 {
0520 kernel_.set_arg(index, sizeof(T), &value);
0521 }
0522 };
0523
0524
0525 template<>
0526 struct set_kernel_arg<char>
0527 {
0528 void operator()(kernel &kernel_, size_t index, const char c)
0529 {
0530 kernel_.set_arg(index, sizeof(char), &c);
0531 }
0532 };
0533
0534 }
0535 }
0536 }
0537
0538 #endif