Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-15 08:30:50

0001 //---------------------------------------------------------------------------//
0002 // Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
0003 //
0004 // Distributed under the Boost Software License, Version 1.0
0005 // See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt
0007 //
0008 // See http://boostorg.github.com/compute for more information.
0009 //---------------------------------------------------------------------------//
0010 
0011 #ifndef BOOST_COMPUTE_KERNEL_HPP
0012 #define BOOST_COMPUTE_KERNEL_HPP
0013 
0014 #include <string>
0015 
0016 #include <boost/assert.hpp>
0017 #include <boost/utility/enable_if.hpp>
0018 #include <boost/optional.hpp>
0019 
0020 #include <boost/compute/cl_ext.hpp> // cl_khr_subgroups
0021 
0022 #include <boost/compute/config.hpp>
0023 #include <boost/compute/exception.hpp>
0024 #include <boost/compute/program.hpp>
0025 #include <boost/compute/platform.hpp>
0026 #include <boost/compute/type_traits/is_fundamental.hpp>
0027 #include <boost/compute/detail/diagnostic.hpp>
0028 #include <boost/compute/detail/get_object_info.hpp>
0029 #include <boost/compute/detail/assert_cl_success.hpp>
0030 
0031 namespace boost {
0032 namespace compute {
0033 namespace detail {
0034 
0035 template<class T> struct set_kernel_arg;
0036 
0037 } // end detail namespace
0038 
0039 /// \class kernel
0040 /// \brief A compute kernel.
0041 ///
0042 /// \see command_queue, program
0043 class kernel
0044 {
0045 public:
0046     /// Creates a null kernel object.
0047     kernel()
0048         : m_kernel(0)
0049     {
0050     }
0051 
0052     /// Creates a new kernel object for \p kernel. If \p retain is
0053     /// \c true, the reference count for \p kernel will be incremented.
0054     explicit kernel(cl_kernel kernel, bool retain = true)
0055         : m_kernel(kernel)
0056     {
0057         if(m_kernel && retain){
0058             clRetainKernel(m_kernel);
0059         }
0060     }
0061 
0062     /// Creates a new kernel object with \p name from \p program.
0063     kernel(const program &program, const std::string &name)
0064     {
0065         cl_int error = 0;
0066         m_kernel = clCreateKernel(program.get(), name.c_str(), &error);
0067 
0068         if(!m_kernel){
0069             BOOST_THROW_EXCEPTION(opencl_error(error));
0070         }
0071     }
0072 
0073     /// Creates a new kernel object as a copy of \p other.
0074     kernel(const kernel &other)
0075         : m_kernel(other.m_kernel)
0076     {
0077         if(m_kernel){
0078             clRetainKernel(m_kernel);
0079         }
0080     }
0081 
0082     /// Copies the kernel object from \p other to \c *this.
0083     kernel& operator=(const kernel &other)
0084     {
0085         if(this != &other){
0086             if(m_kernel){
0087                 clReleaseKernel(m_kernel);
0088             }
0089 
0090             m_kernel = other.m_kernel;
0091 
0092             if(m_kernel){
0093                 clRetainKernel(m_kernel);
0094             }
0095         }
0096 
0097         return *this;
0098     }
0099 
0100     #ifndef BOOST_COMPUTE_NO_RVALUE_REFERENCES
0101     /// Move-constructs a new kernel object from \p other.
0102     kernel(kernel&& other) BOOST_NOEXCEPT
0103         : m_kernel(other.m_kernel)
0104     {
0105         other.m_kernel = 0;
0106     }
0107 
0108     /// Move-assigns the kernel from \p other to \c *this.
0109     kernel& operator=(kernel&& other) BOOST_NOEXCEPT
0110     {
0111         if(m_kernel){
0112             clReleaseKernel(m_kernel);
0113         }
0114 
0115         m_kernel = other.m_kernel;
0116         other.m_kernel = 0;
0117 
0118         return *this;
0119     }
0120     #endif // BOOST_COMPUTE_NO_RVALUE_REFERENCES
0121 
0122     /// Destroys the kernel object.
0123     ~kernel()
0124     {
0125         if(m_kernel){
0126             BOOST_COMPUTE_ASSERT_CL_SUCCESS(
0127                 clReleaseKernel(m_kernel)
0128             );
0129         }
0130     }
0131 
0132     #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0133     /// Creates a new kernel object based on a shallow copy of
0134     /// the undelying OpenCL kernel object.
0135     ///
0136     /// \opencl_version_warning{2,1}
0137     ///
0138     /// \see_opencl21_ref{clCloneKernel}
0139     kernel clone()
0140     {
0141         cl_int ret = 0;
0142         cl_kernel k = clCloneKernel(m_kernel, &ret);
0143         return kernel(k, false);
0144     }
0145     #endif // BOOST_COMPUTE_CL_VERSION_2_1
0146 
0147     /// Returns a reference to the underlying OpenCL kernel object.
0148     cl_kernel& get() const
0149     {
0150         return const_cast<cl_kernel &>(m_kernel);
0151     }
0152 
0153     /// Returns the function name for the kernel.
0154     std::string name() const
0155     {
0156         return get_info<std::string>(CL_KERNEL_FUNCTION_NAME);
0157     }
0158 
0159     /// Returns the number of arguments for the kernel.
0160     size_t arity() const
0161     {
0162         return get_info<cl_uint>(CL_KERNEL_NUM_ARGS);
0163     }
0164 
0165     /// Returns the program for the kernel.
0166     program get_program() const
0167     {
0168         return program(get_info<cl_program>(CL_KERNEL_PROGRAM));
0169     }
0170 
0171     /// Returns the context for the kernel.
0172     context get_context() const
0173     {
0174         return context(get_info<cl_context>(CL_KERNEL_CONTEXT));
0175     }
0176 
0177     /// Returns information about the kernel.
0178     ///
0179     /// \see_opencl_ref{clGetKernelInfo}
0180     template<class T>
0181     T get_info(cl_kernel_info info) const
0182     {
0183         return detail::get_object_info<T>(clGetKernelInfo, m_kernel, info);
0184     }
0185 
0186     /// \overload
0187     template<int Enum>
0188     typename detail::get_object_info_type<kernel, Enum>::type
0189     get_info() const;
0190 
0191     #if defined(BOOST_COMPUTE_CL_VERSION_1_2) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0192     /// Returns information about the argument at \p index.
0193     ///
0194     /// For example, to get the name of the first argument:
0195     /// \code
0196     /// std::string arg = kernel.get_arg_info<std::string>(0, CL_KERNEL_ARG_NAME);
0197     /// \endcode
0198     ///
0199     /// Note, this function requires that the program be compiled with the
0200     /// \c "-cl-kernel-arg-info" flag. For example:
0201     /// \code
0202     /// program.build("-cl-kernel-arg-info");
0203     /// \endcode
0204     ///
0205     /// \opencl_version_warning{1,2}
0206     ///
0207     /// \see_opencl_ref{clGetKernelArgInfo}
0208     template<class T>
0209     T get_arg_info(size_t index, cl_kernel_arg_info info) const
0210     {
0211         return detail::get_object_info<T>(
0212             clGetKernelArgInfo, m_kernel, info, static_cast<cl_uint>(index)
0213         );
0214     }
0215 
0216     /// \overload
0217     template<int Enum>
0218     typename detail::get_object_info_type<kernel, Enum>::type
0219     get_arg_info(size_t index) const;
0220     #endif // BOOST_COMPUTE_CL_VERSION_1_2
0221 
0222     /// Returns work-group information for the kernel with \p device.
0223     ///
0224     /// \see_opencl_ref{clGetKernelWorkGroupInfo}
0225     template<class T>
0226     T get_work_group_info(const device &device, cl_kernel_work_group_info info) const
0227     {
0228         return detail::get_object_info<T>(clGetKernelWorkGroupInfo, m_kernel, info, device.id());
0229     }
0230 
0231     #if defined(BOOST_COMPUTE_CL_VERSION_2_1) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0232     /// Returns sub-group information for the kernel with \p device. Returns a null
0233     /// optional if \p device is not 2.1 device, or is not 2.0 device with support
0234     /// for cl_khr_subgroups extension.
0235     ///
0236     /// \opencl_version_warning{2,1}
0237     /// \see_opencl21_ref{clGetKernelSubGroupInfo}
0238     /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
0239     template<class T>
0240     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0241                                           const size_t input_size, const void * input) const
0242     {
0243         if(device.check_version(2, 1))
0244         {
0245             return detail::get_object_info<T>(
0246                 clGetKernelSubGroupInfo, m_kernel, info, device.id(), input_size, input
0247             );
0248         }
0249         else if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
0250         {
0251             return boost::optional<T>();
0252         }
0253         // Only CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE and CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE
0254         // are supported in cl_khr_subgroups extension for 2.0 devices.
0255         else if(info != CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE && info != CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE)
0256         {
0257             return boost::optional<T>();
0258         }
0259 
0260         BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
0261         clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
0262             reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
0263                 reinterpret_cast<size_t>(
0264                     device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
0265                 )
0266             );
0267         BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
0268 
0269         return detail::get_object_info<T>(
0270             clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
0271         );
0272     }
0273 
0274     /// \overload
0275     template<class T>
0276     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info) const
0277     {
0278         return get_sub_group_info<T>(device, info, 0, 0);
0279     }
0280 
0281     /// \overload
0282     template<class T>
0283     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0284                                           const size_t input) const
0285     {
0286         return get_sub_group_info<T>(device, info, sizeof(size_t), &input);
0287     }
0288     #endif // BOOST_COMPUTE_CL_VERSION_2_1
0289 
0290     #if defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
0291     /// Returns sub-group information for the kernel with \p device. Returns a null
0292     /// optional if cl_khr_subgroups extension is not supported by \p device.
0293     ///
0294     /// \opencl_version_warning{2,0}
0295     /// \see_opencl2_ref{clGetKernelSubGroupInfoKHR}
0296     template<class T>
0297     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0298                                           const size_t input_size, const void * input) const
0299     {
0300         if(!device.check_version(2, 0) || !device.supports_extension("cl_khr_subgroups"))
0301         {
0302             return boost::optional<T>();
0303         }
0304 
0305         BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
0306         clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfoKHR_fptr =
0307             reinterpret_cast<clGetKernelSubGroupInfoKHR_fn>(
0308                 reinterpret_cast<size_t>(
0309                     device.platform().get_extension_function_address("clGetKernelSubGroupInfoKHR")
0310                 )
0311             );
0312         BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
0313 
0314         return detail::get_object_info<T>(
0315             clGetKernelSubGroupInfoKHR_fptr, m_kernel, info, device.id(), input_size, input
0316         );
0317     }
0318     #endif // defined(BOOST_COMPUTE_CL_VERSION_2_0) && !defined(BOOST_COMPUTE_CL_VERSION_2_1)
0319 
0320     #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0321     /// \overload
0322     template<class T>
0323     boost::optional<T> get_sub_group_info(const device &device, cl_kernel_sub_group_info info,
0324                                           const std::vector<size_t> input) const
0325     {
0326         BOOST_ASSERT(input.size() > 0);
0327         return get_sub_group_info<T>(device, info, input.size() * sizeof(size_t), &input[0]);
0328     }
0329     #endif // BOOST_COMPUTE_CL_VERSION_2_0
0330 
0331     /// Sets the argument at \p index to \p value with \p size.
0332     ///
0333     /// \see_opencl_ref{clSetKernelArg}
0334     void set_arg(size_t index, size_t size, const void *value)
0335     {
0336         BOOST_ASSERT(index < arity());
0337 
0338         cl_int ret = clSetKernelArg(m_kernel,
0339                                     static_cast<cl_uint>(index),
0340                                     size,
0341                                     value);
0342         if(ret != CL_SUCCESS){
0343             BOOST_THROW_EXCEPTION(opencl_error(ret));
0344         }
0345     }
0346 
0347     /// Sets the argument at \p index to \p value.
0348     ///
0349     /// For built-in types (e.g. \c float, \c int4_), this is equivalent to
0350     /// calling set_arg(index, sizeof(type), &value).
0351     ///
0352     /// Additionally, this method is specialized for device memory objects
0353     /// such as buffer and image2d. This allows for them to be passed directly
0354     /// without having to extract their underlying cl_mem object.
0355     ///
0356     /// This method is also specialized for device container types such as
0357     /// vector<T> and array<T, N>. This allows for them to be passed directly
0358     /// as kernel arguments without having to extract their underlying buffer.
0359     ///
0360     /// For setting local memory arguments (e.g. "__local float *buf"), the
0361     /// local_buffer<T> class may be used:
0362     /// \code
0363     /// // set argument to a local buffer with storage for 32 float's
0364     /// kernel.set_arg(0, local_buffer<float>(32));
0365     /// \endcode
0366     ///
0367     /// For setting NULL to global and constant memory arguments (C++11):
0368     /// \code
0369     /// kernel.set_arg(0, nullptr);
0370     /// \endcode
0371     template<class T>
0372     void set_arg(size_t index, const T &value)
0373     {
0374         // if you get a compilation error pointing here it means you
0375         // attempted to set a kernel argument from an invalid type.
0376         detail::set_kernel_arg<T>()(*this, index, value);
0377     }
0378 
0379     #ifndef BOOST_NO_CXX11_NULLPTR
0380     /// \overload
0381     void set_arg(size_t index, std::nullptr_t nul)
0382     {
0383         set_arg(index, sizeof(cl_mem), NULL);
0384     }
0385     #endif // BOOST_NO_CXX11_NULLPTR
0386 
0387     /// \internal_
0388     void set_arg(size_t index, const cl_mem mem)
0389     {
0390         set_arg(index, sizeof(cl_mem), static_cast<const void *>(&mem));
0391     }
0392 
0393     /// \internal_
0394     void set_arg(size_t index, const cl_sampler sampler)
0395     {
0396         set_arg(index, sizeof(cl_sampler), static_cast<const void *>(&sampler));
0397     }
0398 
0399     /// \internal_
0400     void set_arg_svm_ptr(size_t index, void* ptr)
0401     {
0402         #ifdef BOOST_COMPUTE_CL_VERSION_2_0
0403         cl_int ret = clSetKernelArgSVMPointer(m_kernel, static_cast<cl_uint>(index), ptr);
0404         if(ret != CL_SUCCESS){
0405             BOOST_THROW_EXCEPTION(opencl_error(ret));
0406         }
0407         #else
0408         (void) index;
0409         (void) ptr;
0410         BOOST_THROW_EXCEPTION(opencl_error(CL_INVALID_ARG_VALUE));
0411         #endif
0412     }
0413 
0414     #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
0415     /// Sets the arguments for the kernel to \p args.
0416     template<class... T>
0417     void set_args(T&&... args)
0418     {
0419         BOOST_ASSERT(sizeof...(T) <= arity());
0420 
0421         _set_args<0>(args...);
0422     }
0423     #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
0424 
0425     #if defined(BOOST_COMPUTE_CL_VERSION_2_0) || defined(BOOST_COMPUTE_DOXYGEN_INVOKED)
0426     /// Sets additional execution information for the kernel.
0427     ///
0428     /// \opencl_version_warning{2,0}
0429     ///
0430     /// \see_opencl2_ref{clSetKernelExecInfo}
0431     void set_exec_info(cl_kernel_exec_info info, size_t size, const void *value)
0432     {
0433         cl_int ret = clSetKernelExecInfo(m_kernel, info, size, value);
0434         if(ret != CL_SUCCESS){
0435             BOOST_THROW_EXCEPTION(opencl_error(ret));
0436         }
0437     }
0438     #endif // BOOST_COMPUTE_CL_VERSION_2_0
0439 
0440     /// Returns \c true if the kernel is the same at \p other.
0441     bool operator==(const kernel &other) const
0442     {
0443         return m_kernel == other.m_kernel;
0444     }
0445 
0446     /// Returns \c true if the kernel is different from \p other.
0447     bool operator!=(const kernel &other) const
0448     {
0449         return m_kernel != other.m_kernel;
0450     }
0451 
0452     /// \internal_
0453     operator cl_kernel() const
0454     {
0455         return m_kernel;
0456     }
0457 
0458     /// \internal_
0459     static kernel create_with_source(const std::string &source,
0460                                      const std::string &name,
0461                                      const context &context)
0462     {
0463         return program::build_with_source(source, context).create_kernel(name);
0464     }
0465 
0466 private:
0467     #ifndef BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
0468     /// \internal_
0469     template<size_t N>
0470     void _set_args()
0471     {
0472     }
0473 
0474     /// \internal_
0475     template<size_t N, class T, class... Args>
0476     void _set_args(T&& arg, Args&&... rest)
0477     {
0478         set_arg(N, arg);
0479         _set_args<N+1>(rest...);
0480     }
0481     #endif // BOOST_COMPUTE_NO_VARIADIC_TEMPLATES
0482 
0483 private:
0484     cl_kernel m_kernel;
0485 };
0486 
0487 inline kernel program::create_kernel(const std::string &name) const
0488 {
0489     return kernel(*this, name);
0490 }
0491 
0492 /// \internal_ define get_info() specializations for kernel
0493 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
0494     ((std::string, CL_KERNEL_FUNCTION_NAME))
0495     ((cl_uint, CL_KERNEL_NUM_ARGS))
0496     ((cl_uint, CL_KERNEL_REFERENCE_COUNT))
0497     ((cl_context, CL_KERNEL_CONTEXT))
0498     ((cl_program, CL_KERNEL_PROGRAM))
0499 )
0500 
0501 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
0502 BOOST_COMPUTE_DETAIL_DEFINE_GET_INFO_SPECIALIZATIONS(kernel,
0503     ((std::string, CL_KERNEL_ATTRIBUTES))
0504 )
0505 #endif // BOOST_COMPUTE_CL_VERSION_1_2
0506 
0507 /// \internal_ define get_arg_info() specializations for kernel
0508 #ifdef BOOST_COMPUTE_CL_VERSION_1_2
0509 #define BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(result_type, value) \
0510     namespace detail { \
0511         template<> struct get_object_info_type<kernel, value> { typedef result_type type; }; \
0512     } \
0513     template<> inline result_type kernel::get_arg_info<value>(size_t index) const { \
0514         return get_arg_info<result_type>(index, value); \
0515     }
0516 
0517 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_address_qualifier, CL_KERNEL_ARG_ADDRESS_QUALIFIER)
0518 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_access_qualifier, CL_KERNEL_ARG_ACCESS_QUALIFIER)
0519 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_TYPE_NAME)
0520 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(cl_kernel_arg_type_qualifier, CL_KERNEL_ARG_TYPE_QUALIFIER)
0521 BOOST_COMPUTE_DETAIL_DEFINE_KERNEL_GET_ARG_INFO_SPECIALIZATION(std::string, CL_KERNEL_ARG_NAME)
0522 #endif // BOOST_COMPUTE_CL_VERSION_1_2
0523 
0524 namespace detail {
0525 
0526 // set_kernel_arg implementation for built-in types
0527 template<class T>
0528 struct set_kernel_arg
0529 {
0530     typename boost::enable_if<is_fundamental<T> >::type
0531     operator()(kernel &kernel_, size_t index, const T &value)
0532     {
0533         kernel_.set_arg(index, sizeof(T), &value);
0534     }
0535 };
0536 
0537 // set_kernel_arg specialization for char (different from built-in cl_char)
0538 template<>
0539 struct set_kernel_arg<char>
0540 {
0541     void operator()(kernel &kernel_, size_t index, const char c)
0542     {
0543         kernel_.set_arg(index, sizeof(char), &c);
0544     }
0545 };
0546 
0547 } // end detail namespace
0548 } // end namespace compute
0549 } // end namespace boost
0550 
0551 #endif // BOOST_COMPUTE_KERNEL_HPP