Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:43:01

0001 // Boost.uBLAS
0002 //
0003 // Copyright (c) 2018 Fady Essam
0004 // Copyright (c) 2018 Stefan Seefeld
0005 //
0006 // Distributed under the Boost Software License, Version 1.0.
0007 // (See accompanying file LICENSE_1_0.txt or
0008 // copy at http://www.boost.org/LICENSE_1_0.txt)
0009 
0010 #ifndef boost_numeric_ublas_opencl_transpose_hpp_
0011 #define boost_numeric_ublas_opencl_transpose_hpp_
0012 
0013 #include <boost/numeric/ublas/opencl/library.hpp>
0014 #include <boost/numeric/ublas/opencl/vector.hpp>
0015 #include <boost/numeric/ublas/opencl/matrix.hpp>
0016 
0017 // Kernel for transposition of various data types
0018 #define OPENCL_TRANSPOSITION_KERNEL(DATA_TYPE)  \
0019 "__kernel void transpose(__global "  #DATA_TYPE "* in, __global " #DATA_TYPE "* result, unsigned int width, unsigned int height) \n"                       \
0020 "{ \n"                                      \
0021 "  unsigned int column_index = get_global_id(0); \n"            \
0022 "  unsigned int row_index = get_global_id(1); \n"           \
0023 "  if (column_index < width && row_index < height) \n"          \
0024 "  { \n"                                    \
0025 "    unsigned int index_in = column_index + width * row_index; \n"  \
0026 "    unsigned int index_result = row_index + height * column_index; \n" \
0027 "    result[index_result] = in[index_in]; \n"               \
0028 "  } \n"                                \
0029 "} \n"
0030 
0031 
0032 namespace boost { namespace numeric { namespace ublas { namespace opencl {
0033 
0034 template<class T, class L1, class L2>
0035 typename std::enable_if<is_numeric<T>::value>::type
0036 change_layout(ublas::matrix<T, L1, opencl::storage> const &m,
0037           ublas::matrix<T, L2, opencl::storage> &result,
0038           compute::command_queue& queue)
0039 {
0040   assert(m.size1() == result.size1() && m.size2() == result.size2());
0041   assert(m.device() == result.device() && m.device() == queue.get_device());
0042   assert(!(std::is_same<L1, L2>::value));
0043   char const *kernel;
0044   if (std::is_same<T, float>::value)
0045     kernel = OPENCL_TRANSPOSITION_KERNEL(float);
0046   else if (std::is_same<T, double>::value)
0047     kernel = OPENCL_TRANSPOSITION_KERNEL(double);
0048   else if (std::is_same<T, std::complex<float>>::value)
0049     kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
0050   else if (std::is_same<T, std::complex<double>>::value)
0051     kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
0052   size_t len = strlen(kernel);
0053   cl_int err;
0054   cl_context c_context = queue.get_context().get();
0055   cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
0056   clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
0057   cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
0058   size_t width = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
0059   size_t height = std::is_same < L1, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
0060   size_t global_size[2] = { width , height };
0061   clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
0062   clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
0063   clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
0064   clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
0065   cl_command_queue c_queue = queue.get();
0066   cl_event event = NULL;
0067   clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
0068   clWaitForEvents(1, &event);
0069 }
0070 
0071 template<class T, class L1, class L2, class A>
0072 typename std::enable_if<is_numeric<T>::value>::type
0073 change_layout(ublas::matrix<T, L1, A> const &m,
0074           ublas::matrix<T, L2, A> &result,
0075           compute::command_queue& queue)
0076 {
0077   ublas::matrix<T, L1, opencl::storage> mdev(m, queue);
0078   ublas::matrix<T, L2, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
0079   change_layout(mdev, rdev, queue);
0080   rdev.to_host(result, queue);
0081 }
0082 
0083 template<class T, class L>
0084 typename std::enable_if<is_numeric<T>::value>::type
0085 trans(ublas::matrix<T, L, opencl::storage> const &m,
0086       ublas::matrix<T, L, opencl::storage> &result,
0087       compute::command_queue& queue)
0088 {
0089   assert(m.size1() == result.size2() && m.size2() == result.size1());
0090   assert(m.device() == result.device() && m.device() == queue.get_device());
0091   char const *kernel;
0092   if (std::is_same<T, float>::value)
0093     kernel = OPENCL_TRANSPOSITION_KERNEL(float);
0094   else if (std::is_same<T, double>::value)
0095     kernel = OPENCL_TRANSPOSITION_KERNEL(double);
0096   else if (std::is_same<T, std::complex<float>>::value)
0097     kernel = OPENCL_TRANSPOSITION_KERNEL(float2);
0098   else if (std::is_same<T, std::complex<double>>::value)
0099     kernel = OPENCL_TRANSPOSITION_KERNEL(double2);
0100   size_t len = strlen(kernel);
0101   cl_int err;
0102   cl_context c_context = queue.get_context().get();
0103   cl_program program = clCreateProgramWithSource(c_context, 1, &kernel, &len, &err);
0104   clBuildProgram(program, 1, &queue.get_device().get(), NULL, NULL, NULL);
0105   cl_kernel c_kernel = clCreateKernel(program, "transpose", &err);
0106   size_t width = std::is_same <L, ublas::basic_row_major<>>::value ? m.size2() : m.size1();
0107   size_t height = std::is_same <L, ublas::basic_row_major<>>::value ? m.size1() : m.size2();
0108   size_t global_size[2] = { width , height };
0109   clSetKernelArg(c_kernel, 0, sizeof(T*), &m.begin().get_buffer().get());
0110   clSetKernelArg(c_kernel, 1, sizeof(T*), &result.begin().get_buffer().get());
0111   clSetKernelArg(c_kernel, 2, sizeof(unsigned int), &width);
0112   clSetKernelArg(c_kernel, 3, sizeof(unsigned int), &height);
0113   cl_command_queue c_queue = queue.get();
0114   cl_event event = NULL;
0115   clEnqueueNDRangeKernel(c_queue, c_kernel, 2, NULL, global_size, NULL, 0, NULL, &event);
0116   clWaitForEvents(1, &event);
0117 }
0118 
0119 template<class T, class L, class A>
0120 typename std::enable_if<is_numeric<T>::value>::type
0121 trans(ublas::matrix<T, L, A> const &m,
0122       ublas::matrix<T, L, A> &result,
0123       compute::command_queue& queue)
0124 {
0125   ublas::matrix<T, L, opencl::storage> mdev(m, queue);
0126   ublas::matrix<T, L, opencl::storage> rdev(result.size1(), result.size2(), queue.get_context());
0127   trans(mdev, rdev, queue);
0128   rdev.to_host(result, queue);
0129 }
0130 
0131 template<class T, class L, class A>
0132 typename std::enable_if<is_numeric<T>::value, ublas::matrix<T, L, A>>::type
0133 trans(ublas::matrix<T, L, A>& m, compute::command_queue& queue)
0134 {
0135   ublas::matrix<T, L, A> result(m.size2(), m.size1());
0136   trans(m, result, queue);
0137   return result;
0138 }
0139 
0140 }}}}
0141 
0142 #endif