File indexing completed on 2025-02-21 10:15:50
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017 #include "internal/_deprecated_header_message_guard.h"
0018
0019 #if !defined(__TBB_show_deprecation_message_flow_graph_opencl_node_H) && defined(__TBB_show_deprecated_header_message)
0020 #define __TBB_show_deprecation_message_flow_graph_opencl_node_H
0021 #pragma message("TBB Warning: tbb/flow_graph_opencl_node.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
0022 #endif
0023
0024 #if defined(__TBB_show_deprecated_header_message)
0025 #undef __TBB_show_deprecated_header_message
0026 #endif
0027
0028 #ifndef __TBB_flow_graph_opencl_node_H
0029 #define __TBB_flow_graph_opencl_node_H
0030
0031 #define __TBB_flow_graph_opencl_node_H_include_area
0032 #include "internal/_warning_suppress_enable_notice.h"
0033
0034 #include "tbb/tbb_config.h"
0035 #if __TBB_PREVIEW_OPENCL_NODE
0036
0037 #include "flow_graph.h"
0038
0039 #include <vector>
0040 #include <string>
0041 #include <algorithm>
0042 #include <iostream>
0043 #include <fstream>
0044 #include <map>
0045 #include <mutex>
0046
0047 #ifdef __APPLE__
0048 #include <OpenCL/opencl.h>
0049 #else
0050 #include <CL/cl.h>
0051 #endif
0052
0053 namespace tbb {
0054 namespace flow {
0055
0056 namespace interface11 {
0057
0058 template <typename DeviceFilter>
0059 class opencl_factory;
0060
0061 namespace opencl_info {
0062 class default_opencl_factory;
0063 }
0064
0065 template <typename Factory>
0066 class opencl_program;
0067
0068 inline void enforce_cl_retcode(cl_int err, std::string msg) {
0069 if (err != CL_SUCCESS) {
0070 std::cerr << msg << "; error code: " << err << std::endl;
0071 throw msg;
0072 }
0073 }
0074
0075 template <typename T>
0076 T event_info(cl_event e, cl_event_info i) {
0077 T res;
0078 enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
0079 return res;
0080 }
0081
0082 template <typename T>
0083 T device_info(cl_device_id d, cl_device_info i) {
0084 T res;
0085 enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
0086 return res;
0087 }
0088
0089 template <>
0090 inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
0091 size_t required;
0092 enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
0093
0094 char *buff = (char*)alloca(required);
0095 enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
0096
0097 return buff;
0098 }
0099
0100 template <typename T>
0101 T platform_info(cl_platform_id p, cl_platform_info i) {
0102 T res;
0103 enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
0104 return res;
0105 }
0106
0107 template <>
0108 inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info i) {
0109 size_t required;
0110 enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
0111
0112 char *buff = (char*)alloca(required);
0113 enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
0114
0115 return buff;
0116 }
0117
0118
0119 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_device {
0120 public:
0121 typedef size_t device_id_type;
0122 enum : device_id_type {
0123 unknown = device_id_type( -2 ),
0124 host = device_id_type( -1 )
0125 };
0126
0127 opencl_device() : my_device_id( unknown ), my_cl_device_id( NULL ), my_cl_command_queue( NULL ) {}
0128
0129 opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
0130
0131 opencl_device( cl_device_id cl_d_id, device_id_type device_id ) : my_device_id( device_id ), my_cl_device_id( cl_d_id ), my_cl_command_queue( NULL ) {}
0132
0133 std::string platform_profile() const {
0134 return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
0135 }
0136 std::string platform_version() const {
0137 return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
0138 }
0139 std::string platform_name() const {
0140 return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
0141 }
0142 std::string platform_vendor() const {
0143 return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
0144 }
0145 std::string platform_extensions() const {
0146 return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
0147 }
0148
0149 template <typename T>
0150 void info( cl_device_info i, T &t ) const {
0151 t = device_info<T>( my_cl_device_id, i );
0152 }
0153 std::string version() const {
0154
0155 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
0156 }
0157 int major_version() const {
0158 int major;
0159 std::sscanf( version().c_str(), "OpenCL %d", &major );
0160 return major;
0161 }
0162 int minor_version() const {
0163 int major, minor;
0164 std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
0165 return minor;
0166 }
0167 bool out_of_order_exec_mode_on_host_present() const {
0168 #if CL_VERSION_2_0
0169 if ( major_version() >= 2 )
0170 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
0171 else
0172 #endif
0173 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
0174 }
0175 bool out_of_order_exec_mode_on_device_present() const {
0176 #if CL_VERSION_2_0
0177 if ( major_version() >= 2 )
0178 return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
0179 else
0180 #endif
0181 return false;
0182 }
0183 std::array<size_t, 3> max_work_item_sizes() const {
0184 return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
0185 }
0186 size_t max_work_group_size() const {
0187 return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
0188 }
0189 bool built_in_kernel_available( const std::string& k ) const {
0190 const std::string semi = ";";
0191
0192 return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
0193 }
0194 std::string built_in_kernels() const {
0195 return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
0196 }
0197 std::string name() const {
0198 return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
0199 }
0200 cl_bool available() const {
0201 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
0202 }
0203 cl_bool compiler_available() const {
0204 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
0205 }
0206 cl_bool linker_available() const {
0207 return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
0208 }
0209 bool extension_available( const std::string &ext ) const {
0210 const std::string space = " ";
0211
0212 return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
0213 }
0214 std::string extensions() const {
0215 return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
0216 }
0217
0218 cl_device_type type() const {
0219 return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
0220 }
0221
0222 std::string vendor() const {
0223 return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
0224 }
0225
0226 cl_uint address_bits() const {
0227 return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
0228 }
0229
0230 cl_device_id device_id() const {
0231 return my_cl_device_id;
0232 }
0233
0234 cl_command_queue command_queue() const {
0235 return my_cl_command_queue;
0236 }
0237
0238 void set_command_queue( cl_command_queue cmd_queue ) {
0239 my_cl_command_queue = cmd_queue;
0240 }
0241
0242 cl_platform_id platform_id() const {
0243 return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
0244 }
0245
0246 private:
0247
0248 device_id_type my_device_id;
0249 cl_device_id my_cl_device_id;
0250 cl_command_queue my_cl_command_queue;
0251
0252 friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
0253
0254 template <typename DeviceFilter>
0255 friend class opencl_factory;
0256 template <typename Factory>
0257 friend class opencl_memory;
0258 template <typename Factory>
0259 friend class opencl_program;
0260
0261 #if TBB_USE_ASSERT
0262 template <typename T, typename Factory>
0263 friend class opencl_buffer;
0264 #endif
0265 };
0266
0267 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_device_list {
0268 typedef std::vector<opencl_device> container_type;
0269 public:
0270 typedef container_type::iterator iterator;
0271 typedef container_type::const_iterator const_iterator;
0272 typedef container_type::size_type size_type;
0273
0274 opencl_device_list() {}
0275 opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
0276
0277 void add( opencl_device d ) { my_container.push_back( d ); }
0278 size_type size() const { return my_container.size(); }
0279 bool empty() const { return my_container.empty(); }
0280 iterator begin() { return my_container.begin(); }
0281 iterator end() { return my_container.end(); }
0282 const_iterator begin() const { return my_container.begin(); }
0283 const_iterator end() const { return my_container.end(); }
0284 const_iterator cbegin() const { return my_container.cbegin(); }
0285 const_iterator cend() const { return my_container.cend(); }
0286
0287 private:
0288 container_type my_container;
0289 };
0290
0291 namespace internal {
0292
0293
0294 inline opencl_device_list find_available_devices() {
0295 opencl_device_list opencl_devices;
0296
0297 cl_uint num_platforms;
0298 enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
0299
0300 std::vector<cl_platform_id> platforms(num_platforms);
0301 enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
0302
0303 cl_uint num_devices;
0304 std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
0305 cl_uint num_all_devices = 0;
0306 while (platforms_it != platforms.end()) {
0307 cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
0308 if (err == CL_DEVICE_NOT_FOUND) {
0309 platforms_it = platforms.erase(platforms_it);
0310 }
0311 else {
0312 enforce_cl_retcode(err, "clGetDeviceIDs failed");
0313 num_all_devices += num_devices;
0314 ++platforms_it;
0315 }
0316 }
0317
0318 std::vector<cl_device_id> devices(num_all_devices);
0319 std::vector<cl_device_id>::iterator devices_it = devices.begin();
0320 for (auto p = platforms.begin(); p != platforms.end(); ++p) {
0321 enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
0322 devices_it += num_devices;
0323 }
0324
0325 for (auto d = devices.begin(); d != devices.end(); ++d) {
0326 opencl_devices.add(opencl_device((*d)));
0327 }
0328
0329 return opencl_devices;
0330 }
0331
0332 }
0333
0334
0335 namespace opencl_info {
0336
0337 inline const opencl_device_list& available_devices() {
0338
0339 static const opencl_device_list my_devices = internal::find_available_devices();
0340 return my_devices;
0341 }
0342
0343 }
0344
0345
0346 class callback_base : tbb::internal::no_copy {
0347 public:
0348 virtual void call() = 0;
0349 virtual ~callback_base() {}
0350 };
0351
0352 template <typename Callback, typename T>
0353 class callback : public callback_base {
0354 Callback my_callback;
0355 T my_data;
0356 public:
0357 callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
0358
0359 void call() __TBB_override {
0360 my_callback( my_data );
0361 }
0362 };
0363
0364 template <typename T, typename Factory = opencl_info::default_opencl_factory>
0365 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_async_msg : public async_msg<T> {
0366 public:
0367 typedef T value_type;
0368
0369 opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
0370 my_callback_flag_ptr->store<tbb::relaxed>(false);
0371 }
0372
0373 explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
0374 my_callback_flag_ptr->store<tbb::relaxed>(false);
0375 }
0376
0377 opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
0378 my_callback_flag_ptr->store<tbb::relaxed>(false);
0379 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
0380 }
0381
0382 T& data( bool wait = true ) {
0383 if ( my_is_event && wait ) {
0384 enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
0385 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0386 my_is_event = false;
0387 }
0388 return my_data;
0389 }
0390
0391 const T& data( bool wait = true ) const {
0392 if ( my_is_event && wait ) {
0393 enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
0394 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0395 my_is_event = false;
0396 }
0397 return my_data;
0398 }
0399
0400 opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
0401 my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
0402 my_callback_flag_ptr(dmsg.my_callback_flag_ptr)
0403 {
0404 if ( my_is_event )
0405 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
0406 }
0407
0408 opencl_async_msg( opencl_async_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
0409 my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
0410 my_callback_flag_ptr( std::move(dmsg.my_callback_flag_ptr) )
0411 {
0412 dmsg.my_is_event = false;
0413 }
0414
0415 opencl_async_msg& operator=(const opencl_async_msg &dmsg) {
0416 async_msg<T>::operator =(dmsg);
0417
0418
0419 if ( my_is_event )
0420 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
0421
0422 my_data = dmsg.my_data;
0423 my_event = dmsg.my_event;
0424 my_is_event = dmsg.my_is_event;
0425
0426
0427 if ( my_is_event )
0428 enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
0429
0430 my_callback_flag_ptr = dmsg.my_callback_flag_ptr;
0431 return *this;
0432 }
0433
0434 ~opencl_async_msg() {
0435 if ( my_is_event )
0436 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0437 }
0438
0439 cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
0440 void set_event( cl_event e ) const {
0441 if ( my_is_event ) {
0442 cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
0443 if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
0444 enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
0445 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0446 }
0447 my_is_event = true;
0448 my_event = e;
0449 clRetainEvent( my_event );
0450 }
0451
0452 void clear_event() const {
0453 if ( my_is_event ) {
0454 enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
0455 enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0456 }
0457 my_is_event = false;
0458 }
0459
0460 template <typename Callback>
0461 void register_callback( Callback c ) const {
0462 __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
0463 enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
0464 }
0465
0466 operator T&() { return data(); }
0467 operator const T&() const { return data(); }
0468
0469 protected:
0470
0471
0472 void finalize() const __TBB_override {
0473 receive_if_memory_object(*this);
0474 if (! my_callback_flag_ptr->fetch_and_store(true)) {
0475 opencl_async_msg a(*this);
0476 if (my_is_event) {
0477 register_callback([a](const T& t) mutable {
0478 a.set(t);
0479 });
0480 }
0481 else {
0482 a.set(my_data);
0483 }
0484 }
0485 clear_event();
0486 }
0487
0488 private:
0489 static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
0490 tbb::internal::suppress_unused_warning( event_command_exec_status );
0491 __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
0492 __TBB_ASSERT( data, NULL );
0493 callback_base *c = static_cast<callback_base*>(data);
0494 c->call();
0495 delete c;
0496 }
0497
0498 T my_data;
0499 mutable cl_event my_event;
0500 mutable bool my_is_event = false;
0501
0502 std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
0503 };
0504
0505 template <typename K, typename T, typename Factory>
0506 K key_from_message( const opencl_async_msg<T, Factory> &dmsg ) {
0507 using tbb::flow::key_from_message;
0508 const T &t = dmsg.data( false );
0509 __TBB_STATIC_ASSERT( true, "" );
0510 return key_from_message<K, T>( t );
0511 }
0512
0513 template <typename Factory>
0514 class opencl_memory {
0515 public:
0516 opencl_memory() {}
0517 opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
0518 my_curr_device_id = my_factory->devices().begin()->my_device_id;
0519 }
0520
0521 virtual ~opencl_memory() {
0522 if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
0523 enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
0524 }
0525
0526 cl_mem get_cl_mem() const {
0527 return my_cl_mem;
0528 }
0529
0530 void* get_host_ptr() {
0531 if ( !my_host_ptr ) {
0532 opencl_async_msg<void*, Factory> d = receive( NULL );
0533 d.data();
0534 __TBB_ASSERT( d.data() == my_host_ptr, NULL );
0535 }
0536 return my_host_ptr;
0537 }
0538
0539 Factory *factory() const { return my_factory; }
0540
0541 opencl_async_msg<void*, Factory> receive(const cl_event *e) {
0542 opencl_async_msg<void*, Factory> d;
0543 if (e) {
0544 d = opencl_async_msg<void*, Factory>(my_host_ptr, *e);
0545 } else {
0546 d = opencl_async_msg<void*, Factory>(my_host_ptr);
0547 }
0548
0549
0550 if (my_curr_device_id.load<tbb::relaxed>() != opencl_device::host) {
0551 map_memory(*my_factory->devices().begin(), d);
0552 my_curr_device_id.store<tbb::relaxed>(opencl_device::host);
0553 my_host_ptr = d.data(false);
0554 }
0555
0556 if (my_sending_event_present) {
0557 enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
0558 my_sending_event_present = false;
0559 }
0560 return d;
0561 }
0562
0563 opencl_async_msg<void*, Factory> send(opencl_device device, const cl_event *e) {
0564 opencl_device::device_id_type device_id = device.my_device_id;
0565 if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
0566 {
0567 tbb::spin_mutex::scoped_lock lock(my_sending_lock);
0568 if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
0569 __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
0570 opencl_async_msg<void*, Factory> d(my_host_ptr);
0571 my_factory->enqueue_unmap_buffer(device, *this, d);
0572 my_sending_event = *d.get_event();
0573 my_sending_event_present = true;
0574 enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
0575 my_host_ptr = NULL;
0576 my_curr_device_id.store<tbb::release>(device_id);
0577 }
0578 }
0579 __TBB_ASSERT(my_sending_event_present, NULL);
0580 }
0581
0582
0583 if (!e && my_sending_event_present) e = &my_sending_event;
0584
0585 __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
0586 return e ? opencl_async_msg<void*, Factory>(NULL, *e) : opencl_async_msg<void*, Factory>(NULL);
0587 }
0588
0589 virtual void map_memory( opencl_device, opencl_async_msg<void*, Factory> & ) = 0;
0590 protected:
0591 cl_mem my_cl_mem;
0592 tbb::atomic<opencl_device::device_id_type> my_curr_device_id;
0593 void* my_host_ptr;
0594 Factory *my_factory;
0595
0596 tbb::spin_mutex my_sending_lock;
0597 bool my_sending_event_present;
0598 cl_event my_sending_event;
0599 };
0600
0601 template <typename Factory>
0602 class opencl_buffer_impl : public opencl_memory<Factory> {
0603 size_t my_size;
0604 public:
0605 opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
0606 cl_int err;
0607 this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
0608 enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
0609 }
0610
0611
0612 opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
0613 cl_int err;
0614 cl_buffer_region region = { index, size };
0615 this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err );
0616 enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
0617 }
0618
0619 size_t size() const {
0620 return my_size;
0621 }
0622
0623 void map_memory( opencl_device device, opencl_async_msg<void*, Factory> &dmsg ) __TBB_override {
0624 this->my_factory->enqueue_map_buffer( device, *this, dmsg );
0625 }
0626
0627 #if TBB_USE_ASSERT
0628 template <typename, typename>
0629 friend class opencl_buffer;
0630 #endif
0631 };
0632
0633 enum access_type {
0634 read_write,
0635 write_only,
0636 read_only
0637 };
0638
0639 template <typename T, typename Factory = opencl_info::default_opencl_factory>
0640 class __TBB_DEPRECATED_IN_VERBOSE_MODE
0641 opencl_subbuffer;
0642
0643 template <typename T, typename Factory = opencl_info::default_opencl_factory>
0644 class __TBB_DEPRECATED_IN_VERBOSE_MODE
0645 opencl_buffer {
0646 public:
0647 typedef cl_mem native_object_type;
0648 typedef opencl_buffer memory_object_type;
0649 typedef Factory opencl_factory_type;
0650
0651 template<access_type a> using iterator = T*;
0652
0653 template <access_type a>
0654 iterator<a> access() const {
0655 T* ptr = (T*)my_impl->get_host_ptr();
0656 __TBB_ASSERT( ptr, NULL );
0657 return iterator<a>( ptr );
0658 }
0659
0660 T* data() const { return &access<read_write>()[0]; }
0661
0662 template <access_type a = read_write>
0663 iterator<a> begin() const { return access<a>(); }
0664
0665 template <access_type a = read_write>
0666 iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
0667
0668 size_t size() const { return my_impl->size()/sizeof(T); }
0669
0670 T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
0671
0672 opencl_buffer() {}
0673 opencl_buffer( size_t size );
0674 opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
0675
0676 cl_mem native_object() const {
0677 return my_impl->get_cl_mem();
0678 }
0679
0680 const opencl_buffer& memory_object() const {
0681 return *this;
0682 }
0683
0684 void send( opencl_device device, opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
0685 __TBB_ASSERT( dependency.data( false ) == *this, NULL );
0686 opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
0687 const cl_event *e = d.get_event();
0688 if ( e ) dependency.set_event( *e );
0689 else dependency.clear_event();
0690 }
0691 void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
0692 __TBB_ASSERT( dependency.data( false ) == *this, NULL );
0693 opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
0694 const cl_event *e = d.get_event();
0695 if ( e ) dependency.set_event( *e );
0696 else dependency.clear_event();
0697 }
0698
0699 opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
0700 private:
0701
0702 opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
0703
0704 typedef opencl_buffer_impl<Factory> impl_type;
0705
0706 std::shared_ptr<impl_type> my_impl;
0707
0708 friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
0709 return lhs.my_impl == rhs.my_impl;
0710 }
0711
0712 template <typename>
0713 friend class opencl_factory;
0714 template <typename, typename>
0715 friend class opencl_subbuffer;
0716 };
0717
0718 template <typename T, typename Factory>
0719 class __TBB_DEPRECATED_IN_VERBOSE_MODE
0720 opencl_subbuffer : public opencl_buffer<T, Factory> {
0721 opencl_buffer<T, Factory> my_owner;
0722 public:
0723 opencl_subbuffer() {}
0724 opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
0725 opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
0726 };
0727
0728 template <typename T, typename Factory>
0729 opencl_subbuffer<T, Factory> opencl_buffer<T, Factory>::subbuffer( size_t index, size_t size ) const {
0730 return opencl_subbuffer<T, Factory>( *this, index, size );
0731 }
0732
0733
0734 #define is_typedef(type) \
0735 template <typename T> \
0736 struct is_##type { \
0737 template <typename C> \
0738 static std::true_type check( typename C::type* ); \
0739 template <typename C> \
0740 static std::false_type check( ... ); \
0741 \
0742 static const bool value = decltype(check<T>(0))::value; \
0743 }
0744
0745 is_typedef( native_object_type );
0746 is_typedef( memory_object_type );
0747
0748 template <typename T>
0749 typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
0750 return t.native_object();
0751 }
0752
0753 template <typename T>
0754 typename std::enable_if<!is_native_object_type<T>::value, T>::type get_native_object( T t ) {
0755 return t;
0756 }
0757
0758
0759 template <typename T, typename Factory>
0760 typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, opencl_async_msg<T, Factory> &dmsg ) {
0761 const T &t = dmsg.data( false );
0762 typedef typename T::memory_object_type mem_obj_t;
0763 mem_obj_t mem_obj = t.memory_object();
0764 opencl_async_msg<mem_obj_t, Factory> d( mem_obj );
0765 if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
0766 mem_obj.send( device, d );
0767 if ( d.get_event() ) dmsg.set_event( *d.get_event() );
0768 }
0769
0770 template <typename T>
0771 typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, T &t ) {
0772 typedef typename T::memory_object_type mem_obj_t;
0773 mem_obj_t mem_obj = t.memory_object();
0774 opencl_async_msg<mem_obj_t, typename mem_obj_t::opencl_factory_type> dmsg( mem_obj );
0775 mem_obj.send( device, dmsg );
0776 }
0777
0778 template <typename T>
0779 typename std::enable_if<!is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device, T& ) {};
0780
0781
0782 template <typename T, typename Factory>
0783 typename std::enable_if<is_memory_object_type<T>::value>::type receive_if_memory_object( const opencl_async_msg<T, Factory> &dmsg ) {
0784 const T &t = dmsg.data( false );
0785 typedef typename T::memory_object_type mem_obj_t;
0786 mem_obj_t mem_obj = t.memory_object();
0787 opencl_async_msg<mem_obj_t, Factory> d( mem_obj );
0788 if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
0789 mem_obj.receive( d );
0790 if ( d.get_event() ) dmsg.set_event( *d.get_event() );
0791 }
0792
0793 template <typename T>
0794 typename std::enable_if<!is_memory_object_type<T>::value>::type receive_if_memory_object( const T& ) {}
0795
0796 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_range {
0797 public:
0798 typedef size_t range_index_type;
0799 typedef std::array<range_index_type, 3> nd_range_type;
0800
0801 template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
0802 typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
0803 opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
0804 auto g_it = global_work.begin();
0805 auto l_it = local_work.begin();
0806 my_global_work_size = { {size_t(-1), size_t(-1), size_t(-1)} };
0807
0808 for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
0809 __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
0810 my_global_work_size[s] = *g_it;
0811 my_local_work_size[s] = *l_it;
0812 }
0813 }
0814
0815 const nd_range_type& global_range() const { return my_global_work_size; }
0816 const nd_range_type& local_range() const { return my_local_work_size; }
0817
0818 private:
0819 nd_range_type my_global_work_size;
0820 nd_range_type my_local_work_size;
0821 };
0822
0823 template <typename DeviceFilter>
0824 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_factory {
0825 public:
0826 template<typename T> using async_msg_type = opencl_async_msg<T, opencl_factory<DeviceFilter>>;
0827 typedef opencl_device device_type;
0828
0829 class kernel : tbb::internal::no_assign {
0830 public:
0831 kernel( const kernel& k ) : my_factory( k.my_factory ) {
0832
0833 size_t ret_size = 0;
0834
0835 std::vector<char> kernel_name;
0836 for ( size_t curr_size = 32;; curr_size <<= 1 ) {
0837 kernel_name.resize( curr_size <<= 1 );
0838 enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
0839 if ( ret_size < curr_size ) break;
0840 }
0841
0842 cl_program program;
0843 enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
0844 __TBB_ASSERT( ret_size == sizeof(program), NULL );
0845
0846 my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
0847 }
0848
0849 ~kernel() {
0850 enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
0851 }
0852
0853 private:
0854 typedef opencl_factory<DeviceFilter> factory_type;
0855
0856 kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
0857
0858
0859 cl_kernel my_cl_kernel;
0860 factory_type& my_factory;
0861
0862 template <typename DeviceFilter_>
0863 friend class opencl_factory;
0864
0865 template <typename Factory>
0866 friend class opencl_program;
0867 };
0868
0869 typedef kernel kernel_type;
0870
0871
0872
0873 typedef opencl_range range_type;
0874
0875 opencl_factory() {}
0876 ~opencl_factory() {
0877 if ( my_devices.size() ) {
0878 for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
0879 enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
0880 }
0881 enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
0882 }
0883 }
0884
0885 bool init( const opencl_device_list &device_list ) {
0886 tbb::spin_mutex::scoped_lock lock( my_devices_mutex );
0887 if ( !my_devices.size() ) {
0888 my_devices = device_list;
0889 return true;
0890 }
0891 return false;
0892 }
0893
0894
0895 private:
0896 template <typename Factory>
0897 void enqueue_map_buffer( opencl_device device, opencl_buffer_impl<Factory> &buffer, opencl_async_msg<void*, Factory>& dmsg ) {
0898 cl_event const* e1 = dmsg.get_event();
0899 cl_event e2;
0900 cl_int err;
0901 void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
0902 e1 == NULL ? 0 : 1, e1, &e2, &err );
0903 enforce_cl_retcode( err, "Failed to map a buffer" );
0904 dmsg.data( false ) = ptr;
0905 dmsg.set_event( e2 );
0906 enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
0907 }
0908
0909
0910 template <typename Factory>
0911 void enqueue_unmap_buffer( opencl_device device, opencl_memory<Factory> &memory, opencl_async_msg<void*, Factory>& dmsg ) {
0912 cl_event const* e1 = dmsg.get_event();
0913 cl_event e2;
0914 enforce_cl_retcode(
0915 clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
0916 "Failed to unmap a buffer" );
0917 dmsg.set_event( e2 );
0918 enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
0919 }
0920
0921
0922 template <size_t NUM_ARGS, typename T>
0923 void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
0924 auto p = get_native_object(t);
0925 enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
0926 }
0927
0928 template <size_t NUM_ARGS, typename T, typename F>
0929 void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
0930 __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
0931
0932 const cl_event * const e = msg.get_event();
0933 if (e != NULL) {
0934 events[num_events++] = *e;
0935 }
0936
0937 process_one_arg( kernel, events, num_events, place, msg.data(false) );
0938 }
0939
0940 template <size_t NUM_ARGS, typename T, typename ...Rest>
0941 void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
0942 process_one_arg( kernel, events, num_events, place, t );
0943 process_arg_list( kernel, events, num_events, place, args... );
0944 }
0945
0946 template <size_t NUM_ARGS>
0947 void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
0948
0949 template <typename T>
0950 void update_one_arg( cl_event, T& ) {}
0951
0952 template <typename T, typename F>
0953 void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
0954 msg.set_event( e );
0955 }
0956
0957 template <typename T, typename ...Rest>
0958 void update_arg_list( cl_event e, T& t, Rest&... args ) {
0959 update_one_arg( e, t );
0960 update_arg_list( e, args... );
0961 }
0962
0963 void update_arg_list( cl_event ) {}
0964
0965 public:
0966 template <typename ...Args>
0967 void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
0968 std::array<cl_event, sizeof...(Args)> events;
0969 int num_events = 0;
0970 int place = 0;
0971 process_arg_list( kernel, events, num_events, place, args... );
0972
0973 const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
0974
0975 update_arg_list(e, args...);
0976
0977
0978 enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
0979 }
0980
0981
0982 template <typename T, typename ...Rest>
0983 void send_data(opencl_device device, T& t, Rest&... args) {
0984 send_if_memory_object( device, t );
0985 send_data( device, args... );
0986 }
0987
0988 void send_data(opencl_device) {}
0989
0990
0991 private:
0992 cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
0993 const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
0994 const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
0995 const typename range_type::nd_range_type& g_size = work_size.global_range();
0996 const typename range_type::nd_range_type& l_size = work_size.local_range();
0997 cl_uint s;
0998 for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
0999 cl_event event;
1000 enforce_cl_retcode(
1001 clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
1002 g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
1003 "Failed to enqueue a kernel" );
1004 return event;
1005 }
1006
1007
1008 template <typename T>
1009 bool get_event_from_one_arg( cl_event&, const T& ) {
1010 return false;
1011 }
1012
1013 template <typename T, typename F>
1014 bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
1015 cl_event const *e_ptr = msg.get_event();
1016
1017 if ( e_ptr != NULL ) {
1018 e = *e_ptr;
1019 return true;
1020 }
1021
1022 return false;
1023 }
1024
1025 template <typename T, typename ...Rest>
1026 bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1027 if ( get_event_from_one_arg( e, t ) ) {
1028 return true;
1029 }
1030
1031 return get_event_from_args( e, args... );
1032 }
1033
1034 bool get_event_from_args( cl_event& ) {
1035 return false;
1036 }
1037
1038
1039 struct finalize_fn : tbb::internal::no_assign {
1040 virtual ~finalize_fn() {}
1041 virtual void operator() () {}
1042 };
1043
1044 template<typename Fn>
1045 struct finalize_fn_leaf : public finalize_fn {
1046 Fn my_fn;
1047 finalize_fn_leaf(Fn fn) : my_fn(fn) {}
1048 void operator() () __TBB_override { my_fn(); }
1049 };
1050
1051 static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1052 tbb::internal::suppress_unused_warning(event_command_exec_status);
1053 __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1054
1055 finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1056 __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1057 (*fn_ptr)();
1058
1059
1060 delete fn_ptr;
1061 }
1062 public:
1063 template <typename FinalizeFn, typename ...Args>
1064 void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1065 cl_event e;
1066
1067 if ( get_event_from_args( e, args... ) ) {
1068 enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1069 new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1070 }
1071
1072 enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1073 }
1074
1075 const opencl_device_list& devices() {
1076 std::call_once( my_once_flag, &opencl_factory::init_once, this );
1077 return my_devices;
1078 }
1079
1080 private:
1081 bool is_same_context( opencl_device::device_id_type d1, opencl_device::device_id_type d2 ) {
1082 __TBB_ASSERT( d1 != opencl_device::unknown && d2 != opencl_device::unknown, NULL );
1083
1084 if ( d1 != opencl_device::host && d2 != opencl_device::host )
1085 return true;
1086 return d1 == d2;
1087 }
1088 private:
1089 opencl_factory( const opencl_factory& );
1090 opencl_factory& operator=(const opencl_factory&);
1091
1092 cl_context context() {
1093 std::call_once( my_once_flag, &opencl_factory::init_once, this );
1094 return my_cl_context;
1095 }
1096
1097 void init_once() {
1098 {
1099 tbb::spin_mutex::scoped_lock lock(my_devices_mutex);
1100 if (!my_devices.size())
1101 my_devices = DeviceFilter()( opencl_info::available_devices() );
1102 }
1103
1104 enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1105 cl_platform_id platform_id = my_devices.begin()->platform_id();
1106 for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1107 enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1108
1109 std::vector<cl_device_id> cl_device_ids;
1110 for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1111 cl_device_ids.push_back((*d).my_cl_device_id);
1112 }
1113
1114 cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1115 cl_int err;
1116 cl_context ctx = clCreateContext(context_properties,
1117 (cl_uint)cl_device_ids.size(),
1118 cl_device_ids.data(),
1119 NULL, NULL, &err);
1120 enforce_cl_retcode(err, "Failed to create context");
1121 my_cl_context = ctx;
1122
1123 size_t device_counter = 0;
1124 for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1125 (*d).my_device_id = device_counter++;
1126 cl_int err2;
1127 cl_command_queue cq;
1128 #if CL_VERSION_2_0
1129 if ((*d).major_version() >= 2) {
1130 if ((*d).out_of_order_exec_mode_on_host_present()) {
1131 cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1132 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1133 } else {
1134 cl_queue_properties props[] = { 0 };
1135 cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1136 }
1137 } else
1138 #endif
1139 {
1140 cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1141
1142 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1143 #pragma GCC diagnostic push
1144 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1145 #endif
1146 #if _MSC_VER || __INTEL_COMPILER
1147 #pragma warning( push )
1148 #if __INTEL_COMPILER
1149 #pragma warning (disable: 1478)
1150 #else
1151 #pragma warning (disable: 4996)
1152 #endif
1153 #endif
1154 cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1155 #if _MSC_VER || __INTEL_COMPILER
1156 #pragma warning( pop )
1157 #endif
1158 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1159 #pragma GCC diagnostic pop
1160 #endif
1161 }
1162 enforce_cl_retcode(err2, "Failed to create command queue");
1163 (*d).my_cl_command_queue = cq;
1164 }
1165 }
1166
1167 std::once_flag my_once_flag;
1168 opencl_device_list my_devices;
1169 cl_context my_cl_context;
1170
1171 tbb::spin_mutex my_devices_mutex;
1172
1173 template <typename Factory>
1174 friend class opencl_program;
1175 template <typename Factory>
1176 friend class opencl_buffer_impl;
1177 template <typename Factory>
1178 friend class opencl_memory;
1179 };
1180
1181
1182 namespace opencl_info {
1183
1184
1185
1186 template <typename Factory>
1187 struct default_device_selector {
1188 opencl_device operator()(Factory& f) {
1189 __TBB_ASSERT(!f.devices().empty(), "No available devices");
1190 return *(f.devices().begin());
1191 }
1192 };
1193
1194 struct default_device_filter {
1195 opencl_device_list operator()(const opencl_device_list &devices) {
1196 opencl_device_list dl;
1197 cl_platform_id platform_id = devices.begin()->platform_id();
1198 for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1199 if (it->platform_id() == platform_id) {
1200 dl.add(*it);
1201 }
1202 }
1203 return dl;
1204 }
1205 };
1206
1207 class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1208 public:
1209 template<typename T> using async_msg_type = opencl_async_msg<T, default_opencl_factory>;
1210
1211 friend default_opencl_factory& default_factory();
1212
1213 private:
1214 default_opencl_factory() = default;
1215 };
1216
1217 inline default_opencl_factory& default_factory() {
1218 static default_opencl_factory default_factory;
1219 return default_factory;
1220 }
1221
1222 }
1223
1224 template <typename T, typename Factory>
1225 opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1226
1227
1228 enum class opencl_program_type {
1229 SOURCE,
1230 PRECOMPILED,
1231 SPIR
1232 };
1233
1234 template <typename Factory = opencl_info::default_opencl_factory>
1235 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_program : tbb::internal::no_assign {
1236 public:
1237 typedef typename Factory::kernel_type kernel_type;
1238
1239 opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1240 opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1241 opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1242
1243 opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1244 opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1245 opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1246 opencl_program( opencl_program_type type ) : opencl_program( opencl_info::default_factory(), type ) {}
1247
1248 opencl_program( const opencl_program &src ) : my_factory( src.my_factory ), my_type( src.type ), my_arg_str( src.my_arg_str ), my_cl_program( src.my_cl_program ) {
1249
1250 std::call_once( my_do_once_flag, [](){} );
1251 }
1252
1253 kernel_type get_kernel( const std::string& k ) const {
1254 return kernel_type( get_cl_kernel(k), my_factory );
1255 }
1256
1257 private:
1258 opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1259
1260 std::call_once( my_do_once_flag, [](){} );
1261 }
1262
1263 cl_kernel get_cl_kernel( const std::string& k ) const {
1264 std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1265 cl_int err;
1266 cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1267 enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1268 return kernel;
1269 }
1270
1271 class file_reader {
1272 public:
1273 file_reader( const std::string& filepath ) {
1274 std::ifstream file_descriptor( filepath, std::ifstream::binary );
1275 if ( !file_descriptor.is_open() ) {
1276 std::string str = std::string( "Could not open file: " ) + filepath;
1277 std::cerr << str << std::endl;
1278 throw str;
1279 }
1280 file_descriptor.seekg( 0, file_descriptor.end );
1281 size_t length = size_t( file_descriptor.tellg() );
1282 file_descriptor.seekg( 0, file_descriptor.beg );
1283 my_content.resize( length );
1284 char* begin = &*my_content.begin();
1285 file_descriptor.read( begin, length );
1286 file_descriptor.close();
1287 }
1288 const char* content() { return &*my_content.cbegin(); }
1289 size_t length() { return my_content.length(); }
1290 private:
1291 std::string my_content;
1292 };
1293
1294 class opencl_program_builder {
1295 public:
1296 typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1297 opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1298 cl_uint num_devices, cl_device_id* device_list,
1299 const char* options, cl_callback_type callback,
1300 void* user_data ) {
1301 cl_int err = clBuildProgram( program, num_devices, device_list, options,
1302 callback, user_data );
1303 if( err == CL_SUCCESS )
1304 return;
1305 std::string str = std::string( "Failed to build program: " ) + name;
1306 if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1307 const opencl_device_list &devices = f.devices();
1308 for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1309 std::cerr << "Build log for device: " << (*d).name() << std::endl;
1310 size_t log_size;
1311 cl_int query_err = clGetProgramBuildInfo(
1312 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1313 &log_size );
1314 enforce_cl_retcode( query_err, "Failed to get build log size" );
1315 if( log_size ) {
1316 std::vector<char> output;
1317 output.resize( log_size );
1318 query_err = clGetProgramBuildInfo(
1319 program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1320 output.size(), output.data(), NULL );
1321 enforce_cl_retcode( query_err, "Failed to get build output" );
1322 std::cerr << output.data() << std::endl;
1323 } else {
1324 std::cerr << "No build log available" << std::endl;
1325 }
1326 }
1327 }
1328 enforce_cl_retcode( err, str );
1329 }
1330 };
1331
1332 class opencl_device_filter {
1333 public:
1334 template<typename Filter>
1335 opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1336 Filter filter, const char* message ) {
1337 for ( cl_uint i = 0; i < num_devices; ++i )
1338 if ( filter(device_list[i]) ) {
1339 device_list[i--] = device_list[--num_devices];
1340 }
1341 if ( !num_devices )
1342 enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1343 }
1344 };
1345
1346 void init( const std::string& ) const {
1347 cl_uint num_devices;
1348 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1349 "Failed to get OpenCL context info" );
1350 if ( !num_devices )
1351 enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1352 cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1353 enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1354 "Failed to get OpenCL context info" );
1355 const char *options = NULL;
1356 switch ( my_type ) {
1357 case opencl_program_type::SOURCE: {
1358 file_reader fr( my_arg_str );
1359 const char *s[] = { fr.content() };
1360 const size_t l[] = { fr.length() };
1361 cl_int err;
1362 my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1363 enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1364 opencl_device_filter(
1365 num_devices, device_list,
1366 []( const opencl_device& d ) -> bool {
1367 return !d.compiler_available() || !d.linker_available();
1368 }, "No one device supports building program from sources" );
1369 opencl_program_builder(
1370 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1371 options, NULL, NULL );
1372 break;
1373 }
1374 case opencl_program_type::SPIR:
1375 options = "-x spir";
1376 case opencl_program_type::PRECOMPILED: {
1377 file_reader fr( my_arg_str );
1378 std::vector<const unsigned char*> s(
1379 num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1380 std::vector<size_t> l( num_devices, fr.length() );
1381 std::vector<cl_int> bin_statuses( num_devices, -1 );
1382 cl_int err;
1383 my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1384 device_list, l.data(), s.data(),
1385 bin_statuses.data(), &err );
1386 if( err != CL_SUCCESS ) {
1387 std::string statuses_str;
1388 for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1389 statuses_str += std::to_string((*st));
1390 }
1391
1392 enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1393 std::string( ", binary_statuses = " ) + statuses_str );
1394 }
1395 opencl_program_builder(
1396 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1397 options, NULL, NULL );
1398 break;
1399 }
1400 default:
1401 __TBB_ASSERT( false, "Unsupported program type" );
1402 }
1403 }
1404
1405 Factory& my_factory;
1406 opencl_program_type my_type;
1407 std::string my_arg_str;
1408 mutable cl_program my_cl_program;
1409 mutable std::once_flag my_do_once_flag;
1410
1411 template <typename DeviceFilter>
1412 friend class opencl_factory;
1413
1414 friend class Factory::kernel;
1415 };
1416
1417 template<typename... Args>
1418 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_node;
1419
1420 template<typename JP, typename Factory, typename... Ports>
1421 class __TBB_DEPRECATED_IN_VERBOSE_MODE
1422 opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1423 typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1424 public:
1425 typedef typename base_type::kernel_type kernel_type;
1426
1427 opencl_node( graph &g, const kernel_type& kernel )
1428 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1429 {
1430 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1431 }
1432
1433 opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1434 : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1435 {
1436 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1437 }
1438
1439 template <typename DeviceSelector>
1440 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1441 : base_type( g, kernel, d, f)
1442 {
1443 tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1444 }
1445 };
1446
1447 template<typename JP, typename... Ports>
1448 class __TBB_DEPRECATED_IN_VERBOSE_MODE
1449 opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1450 typedef opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > base_type;
1451 public:
1452 typedef typename base_type::kernel_type kernel_type;
1453
1454 opencl_node( graph &g, const kernel_type& kernel )
1455 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1456 {}
1457
1458 template <typename DeviceSelector>
1459 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1460 : base_type( g, kernel, d, opencl_info::default_factory() )
1461 {}
1462 };
1463
1464 template<typename... Ports>
1465 class __TBB_DEPRECATED_IN_VERBOSE_MODE
1466 opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1467 typedef opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > base_type;
1468 public:
1469 typedef typename base_type::kernel_type kernel_type;
1470
1471 opencl_node( graph &g, const kernel_type& kernel )
1472 : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1473 {}
1474
1475 template <typename DeviceSelector>
1476 opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1477 : base_type( g, kernel, d, opencl_info::default_factory() )
1478 {}
1479 };
1480
1481 }
1482
1483 using interface11::opencl_node;
1484 using interface11::read_only;
1485 using interface11::read_write;
1486 using interface11::write_only;
1487 using interface11::opencl_buffer;
1488 using interface11::opencl_subbuffer;
1489 using interface11::opencl_device;
1490 using interface11::opencl_device_list;
1491 using interface11::opencl_program;
1492 using interface11::opencl_program_type;
1493 using interface11::opencl_async_msg;
1494 using interface11::opencl_factory;
1495 using interface11::opencl_range;
1496
1497 }
1498 }
1499 #endif
1500
1501 #include "internal/_warning_suppress_disable_notice.h"
1502 #undef __TBB_flow_graph_opencl_node_H_include_area
1503
1504 #endif