Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-02-21 10:15:50

0001 /*
0002     Copyright (c) 2005-2020 Intel Corporation
0003 
0004     Licensed under the Apache License, Version 2.0 (the "License");
0005     you may not use this file except in compliance with the License.
0006     You may obtain a copy of the License at
0007 
0008         http://www.apache.org/licenses/LICENSE-2.0
0009 
0010     Unless required by applicable law or agreed to in writing, software
0011     distributed under the License is distributed on an "AS IS" BASIS,
0012     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
0013     See the License for the specific language governing permissions and
0014     limitations under the License.
0015 */
0016 
0017 #include "internal/_deprecated_header_message_guard.h"
0018 
0019 #if !defined(__TBB_show_deprecation_message_flow_graph_opencl_node_H) && defined(__TBB_show_deprecated_header_message)
0020 #define  __TBB_show_deprecation_message_flow_graph_opencl_node_H
0021 #pragma message("TBB Warning: tbb/flow_graph_opencl_node.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
0022 #endif
0023 
0024 #if defined(__TBB_show_deprecated_header_message)
0025 #undef __TBB_show_deprecated_header_message
0026 #endif
0027 
0028 #ifndef __TBB_flow_graph_opencl_node_H
0029 #define __TBB_flow_graph_opencl_node_H
0030 
0031 #define __TBB_flow_graph_opencl_node_H_include_area
0032 #include "internal/_warning_suppress_enable_notice.h"
0033 
0034 #include "tbb/tbb_config.h"
0035 #if __TBB_PREVIEW_OPENCL_NODE
0036 
0037 #include "flow_graph.h"
0038 
0039 #include <vector>
0040 #include <string>
0041 #include <algorithm>
0042 #include <iostream>
0043 #include <fstream>
0044 #include <map>
0045 #include <mutex>
0046 
0047 #ifdef __APPLE__
0048 #include <OpenCL/opencl.h>
0049 #else
0050 #include <CL/cl.h>
0051 #endif
0052 
0053 namespace tbb {
0054 namespace flow {
0055 
0056 namespace interface11 {
0057 
0058 template <typename DeviceFilter>
0059 class opencl_factory;
0060 
0061 namespace opencl_info {
0062 class default_opencl_factory;
0063 }
0064 
0065 template <typename Factory>
0066 class opencl_program;
0067 
0068 inline void enforce_cl_retcode(cl_int err, std::string msg) {
0069     if (err != CL_SUCCESS) {
0070         std::cerr << msg << "; error code: " << err << std::endl;
0071         throw msg;
0072     }
0073 }
0074 
0075 template <typename T>
0076 T event_info(cl_event e, cl_event_info i) {
0077     T res;
0078     enforce_cl_retcode(clGetEventInfo(e, i, sizeof(res), &res, NULL), "Failed to get OpenCL event information");
0079     return res;
0080 }
0081 
0082 template <typename T>
0083 T device_info(cl_device_id d, cl_device_info i) {
0084     T res;
0085     enforce_cl_retcode(clGetDeviceInfo(d, i, sizeof(res), &res, NULL), "Failed to get OpenCL device information");
0086     return res;
0087 }
0088 
0089 template <>
0090 inline std::string device_info<std::string>(cl_device_id d, cl_device_info i) {
0091     size_t required;
0092     enforce_cl_retcode(clGetDeviceInfo(d, i, 0, NULL, &required), "Failed to get OpenCL device information");
0093 
0094     char *buff = (char*)alloca(required);
0095     enforce_cl_retcode(clGetDeviceInfo(d, i, required, buff, NULL), "Failed to get OpenCL device information");
0096 
0097     return buff;
0098 }
0099 
0100 template <typename T>
0101 T platform_info(cl_platform_id p, cl_platform_info i) {
0102     T res;
0103     enforce_cl_retcode(clGetPlatformInfo(p, i, sizeof(res), &res, NULL), "Failed to get OpenCL platform information");
0104     return res;
0105 }
0106 
0107 template <>
0108 inline std::string platform_info<std::string>(cl_platform_id p, cl_platform_info  i) {
0109     size_t required;
0110     enforce_cl_retcode(clGetPlatformInfo(p, i, 0, NULL, &required), "Failed to get OpenCL platform information");
0111 
0112     char *buff = (char*)alloca(required);
0113     enforce_cl_retcode(clGetPlatformInfo(p, i, required, buff, NULL), "Failed to get OpenCL platform information");
0114 
0115     return buff;
0116 }
0117 
0118 
0119 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_device {
0120 public:
0121     typedef size_t device_id_type;
0122     enum : device_id_type {
0123         unknown = device_id_type( -2 ),
0124         host = device_id_type( -1 )
0125     };
0126 
0127     opencl_device() : my_device_id( unknown ), my_cl_device_id( NULL ), my_cl_command_queue( NULL ) {}
0128 
0129     opencl_device( cl_device_id d_id ) : my_device_id( unknown ), my_cl_device_id( d_id ), my_cl_command_queue( NULL ) {}
0130 
0131     opencl_device( cl_device_id cl_d_id, device_id_type device_id ) : my_device_id( device_id ), my_cl_device_id( cl_d_id ), my_cl_command_queue( NULL ) {}
0132 
0133     std::string platform_profile() const {
0134         return platform_info<std::string>( platform_id(), CL_PLATFORM_PROFILE );
0135     }
0136     std::string platform_version() const {
0137         return platform_info<std::string>( platform_id(), CL_PLATFORM_VERSION );
0138     }
0139     std::string platform_name() const {
0140         return platform_info<std::string>( platform_id(), CL_PLATFORM_NAME );
0141     }
0142     std::string platform_vendor() const {
0143         return platform_info<std::string>( platform_id(), CL_PLATFORM_VENDOR );
0144     }
0145     std::string platform_extensions() const {
0146         return platform_info<std::string>( platform_id(), CL_PLATFORM_EXTENSIONS );
0147     }
0148 
0149     template <typename T>
0150     void info( cl_device_info i, T &t ) const {
0151         t = device_info<T>( my_cl_device_id, i );
0152     }
0153     std::string version() const {
0154         // The version string format: OpenCL<space><major_version.minor_version><space><vendor-specific information>
0155         return device_info<std::string>( my_cl_device_id, CL_DEVICE_VERSION );
0156     }
0157     int major_version() const {
0158         int major;
0159         std::sscanf( version().c_str(), "OpenCL %d", &major );
0160         return major;
0161     }
0162     int minor_version() const {
0163         int major, minor;
0164         std::sscanf( version().c_str(), "OpenCL %d.%d", &major, &minor );
0165         return minor;
0166     }
0167     bool out_of_order_exec_mode_on_host_present() const {
0168 #if CL_VERSION_2_0
0169         if ( major_version() >= 2 )
0170             return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
0171         else
0172 #endif /* CL_VERSION_2_0 */
0173             return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
0174     }
0175     bool out_of_order_exec_mode_on_device_present() const {
0176 #if CL_VERSION_2_0
0177         if ( major_version() >= 2 )
0178             return (device_info<cl_command_queue_properties>( my_cl_device_id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES ) & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) != 0;
0179         else
0180 #endif /* CL_VERSION_2_0 */
0181             return false;
0182     }
0183     std::array<size_t, 3> max_work_item_sizes() const {
0184         return device_info<std::array<size_t, 3>>( my_cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES );
0185     }
0186     size_t max_work_group_size() const {
0187         return device_info<size_t>( my_cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE );
0188     }
0189     bool built_in_kernel_available( const std::string& k ) const {
0190         const std::string semi = ";";
0191         // Added semicolumns to force an exact match (to avoid a partial match, e.g. "add" is partly matched with "madd").
0192         return (semi + built_in_kernels() + semi).find( semi + k + semi ) != std::string::npos;
0193     }
0194     std::string built_in_kernels() const {
0195         return device_info<std::string>( my_cl_device_id, CL_DEVICE_BUILT_IN_KERNELS );
0196     }
0197     std::string name() const {
0198         return device_info<std::string>( my_cl_device_id, CL_DEVICE_NAME );
0199     }
0200     cl_bool available() const {
0201         return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_AVAILABLE );
0202     }
0203     cl_bool compiler_available() const {
0204         return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_COMPILER_AVAILABLE );
0205     }
0206     cl_bool linker_available() const {
0207         return device_info<cl_bool>( my_cl_device_id, CL_DEVICE_LINKER_AVAILABLE );
0208     }
0209     bool extension_available( const std::string &ext ) const {
0210         const std::string space = " ";
0211         // Added space to force an exact match (to avoid a partial match, e.g. "ext" is partly matched with "ext2").
0212         return (space + extensions() + space).find( space + ext + space ) != std::string::npos;
0213     }
0214     std::string extensions() const {
0215         return device_info<std::string>( my_cl_device_id, CL_DEVICE_EXTENSIONS );
0216     }
0217 
0218     cl_device_type type() const {
0219         return device_info<cl_device_type>( my_cl_device_id, CL_DEVICE_TYPE );
0220     }
0221 
0222     std::string vendor() const {
0223         return device_info<std::string>( my_cl_device_id, CL_DEVICE_VENDOR );
0224     }
0225 
0226     cl_uint address_bits() const {
0227         return device_info<cl_uint>( my_cl_device_id, CL_DEVICE_ADDRESS_BITS );
0228     }
0229 
0230     cl_device_id device_id() const {
0231         return my_cl_device_id;
0232     }
0233 
0234     cl_command_queue command_queue() const {
0235         return my_cl_command_queue;
0236     }
0237 
0238     void set_command_queue( cl_command_queue cmd_queue ) {
0239         my_cl_command_queue = cmd_queue;
0240     }
0241 
0242     cl_platform_id platform_id() const {
0243         return device_info<cl_platform_id>( my_cl_device_id, CL_DEVICE_PLATFORM );
0244     }
0245 
0246 private:
0247 
0248     device_id_type my_device_id;
0249     cl_device_id my_cl_device_id;
0250     cl_command_queue my_cl_command_queue;
0251 
0252     friend bool operator==(opencl_device d1, opencl_device d2) { return d1.my_cl_device_id == d2.my_cl_device_id; }
0253 
0254     template <typename DeviceFilter>
0255     friend class opencl_factory;
0256     template <typename Factory>
0257     friend class opencl_memory;
0258     template <typename Factory>
0259     friend class opencl_program;
0260 
0261 #if TBB_USE_ASSERT
0262     template <typename T, typename Factory>
0263     friend class opencl_buffer;
0264 #endif
0265 };
0266 
0267 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_device_list {
0268     typedef std::vector<opencl_device> container_type;
0269 public:
0270     typedef container_type::iterator iterator;
0271     typedef container_type::const_iterator const_iterator;
0272     typedef container_type::size_type size_type;
0273 
0274     opencl_device_list() {}
0275     opencl_device_list( std::initializer_list<opencl_device> il ) : my_container( il ) {}
0276 
0277     void add( opencl_device d ) { my_container.push_back( d ); }
0278     size_type size() const { return my_container.size(); }
0279     bool empty() const { return my_container.empty(); }
0280     iterator begin() { return my_container.begin(); }
0281     iterator end() { return my_container.end(); }
0282     const_iterator begin() const { return my_container.begin(); }
0283     const_iterator end() const { return my_container.end(); }
0284     const_iterator cbegin() const { return my_container.cbegin(); }
0285     const_iterator cend() const { return my_container.cend(); }
0286 
0287 private:
0288     container_type my_container;
0289 };
0290 
0291 namespace internal {
0292 
0293 // Retrieve all OpenCL devices from machine
0294 inline opencl_device_list find_available_devices() {
0295     opencl_device_list opencl_devices;
0296 
0297     cl_uint num_platforms;
0298     enforce_cl_retcode(clGetPlatformIDs(0, NULL, &num_platforms), "clGetPlatformIDs failed");
0299 
0300     std::vector<cl_platform_id> platforms(num_platforms);
0301     enforce_cl_retcode(clGetPlatformIDs(num_platforms, platforms.data(), NULL), "clGetPlatformIDs failed");
0302 
0303     cl_uint num_devices;
0304     std::vector<cl_platform_id>::iterator platforms_it = platforms.begin();
0305     cl_uint num_all_devices = 0;
0306     while (platforms_it != platforms.end()) {
0307         cl_int err = clGetDeviceIDs(*platforms_it, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
0308         if (err == CL_DEVICE_NOT_FOUND) {
0309             platforms_it = platforms.erase(platforms_it);
0310         }
0311         else {
0312             enforce_cl_retcode(err, "clGetDeviceIDs failed");
0313             num_all_devices += num_devices;
0314             ++platforms_it;
0315         }
0316     }
0317 
0318     std::vector<cl_device_id> devices(num_all_devices);
0319     std::vector<cl_device_id>::iterator devices_it = devices.begin();
0320     for (auto p = platforms.begin(); p != platforms.end(); ++p) {
0321         enforce_cl_retcode(clGetDeviceIDs((*p), CL_DEVICE_TYPE_ALL, (cl_uint)std::distance(devices_it, devices.end()), &*devices_it, &num_devices), "clGetDeviceIDs failed");
0322         devices_it += num_devices;
0323     }
0324 
0325     for (auto d = devices.begin(); d != devices.end(); ++d) {
0326         opencl_devices.add(opencl_device((*d)));
0327     }
0328 
0329     return opencl_devices;
0330 }
0331 
0332 } // namespace internal
0333 
0334 // TODO: consider this namespace as public API
0335 namespace opencl_info {
0336 
0337     inline const opencl_device_list& available_devices() {
0338         // Static storage for all available OpenCL devices on machine
0339         static const opencl_device_list my_devices = internal::find_available_devices();
0340         return my_devices;
0341     }
0342 
0343 } // namespace opencl_info
0344 
0345 
0346 class callback_base : tbb::internal::no_copy {
0347 public:
0348     virtual void call() = 0;
0349     virtual ~callback_base() {}
0350 };
0351 
0352 template <typename Callback, typename T>
0353 class callback : public callback_base {
0354     Callback my_callback;
0355     T my_data;
0356 public:
0357     callback( Callback c, const T& t ) : my_callback( c ), my_data( t ) {}
0358 
0359     void call() __TBB_override {
0360         my_callback( my_data );
0361     }
0362 };
0363 
0364 template <typename T, typename Factory = opencl_info::default_opencl_factory>
0365 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_async_msg : public async_msg<T> {
0366 public:
0367     typedef T value_type;
0368 
0369     opencl_async_msg() : my_callback_flag_ptr( std::make_shared< tbb::atomic<bool>>() ) {
0370         my_callback_flag_ptr->store<tbb::relaxed>(false);
0371     }
0372 
0373     explicit opencl_async_msg( const T& data ) : my_data(data), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
0374         my_callback_flag_ptr->store<tbb::relaxed>(false);
0375     }
0376 
0377     opencl_async_msg( const T& data, cl_event event ) : my_data(data), my_event(event), my_is_event(true), my_callback_flag_ptr( std::make_shared<tbb::atomic<bool>>() ) {
0378         my_callback_flag_ptr->store<tbb::relaxed>(false);
0379         enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
0380     }
0381 
0382     T& data( bool wait = true ) {
0383         if ( my_is_event && wait ) {
0384             enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
0385             enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0386             my_is_event = false;
0387         }
0388         return my_data;
0389     }
0390 
0391     const T& data( bool wait = true ) const {
0392         if ( my_is_event && wait ) {
0393             enforce_cl_retcode( clWaitForEvents( 1, &my_event ), "Failed to wait for an event" );
0394             enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0395             my_is_event = false;
0396         }
0397         return my_data;
0398     }
0399 
0400     opencl_async_msg( const opencl_async_msg &dmsg ) : async_msg<T>(dmsg),
0401         my_data(dmsg.my_data), my_event(dmsg.my_event), my_is_event( dmsg.my_is_event ),
0402         my_callback_flag_ptr(dmsg.my_callback_flag_ptr)
0403     {
0404         if ( my_is_event )
0405             enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
0406     }
0407 
0408     opencl_async_msg( opencl_async_msg &&dmsg ) : async_msg<T>(std::move(dmsg)),
0409         my_data(std::move(dmsg.my_data)), my_event(dmsg.my_event), my_is_event(dmsg.my_is_event),
0410         my_callback_flag_ptr( std::move(dmsg.my_callback_flag_ptr) )
0411     {
0412         dmsg.my_is_event = false;
0413     }
0414 
0415     opencl_async_msg& operator=(const opencl_async_msg &dmsg) {
0416         async_msg<T>::operator =(dmsg);
0417 
0418         // Release original event
0419         if ( my_is_event )
0420             enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to retain an event" );
0421 
0422         my_data = dmsg.my_data;
0423         my_event = dmsg.my_event;
0424         my_is_event = dmsg.my_is_event;
0425 
0426         // Retain copied event
0427         if ( my_is_event )
0428             enforce_cl_retcode( clRetainEvent( my_event ), "Failed to retain an event" );
0429 
0430         my_callback_flag_ptr = dmsg.my_callback_flag_ptr;
0431         return *this;
0432     }
0433 
0434     ~opencl_async_msg() {
0435         if ( my_is_event )
0436             enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0437     }
0438 
0439     cl_event const * get_event() const { return my_is_event ? &my_event : NULL; }
0440     void set_event( cl_event e ) const {
0441         if ( my_is_event ) {
0442             cl_command_queue cq = event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE );
0443             if ( cq != event_info<cl_command_queue>( e, CL_EVENT_COMMAND_QUEUE ) )
0444                 enforce_cl_retcode( clFlush( cq ), "Failed to flush an OpenCL command queue" );
0445             enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0446         }
0447         my_is_event = true;
0448         my_event = e;
0449         clRetainEvent( my_event );
0450     }
0451 
0452     void clear_event() const {
0453         if ( my_is_event ) {
0454             enforce_cl_retcode( clFlush( event_info<cl_command_queue>( my_event, CL_EVENT_COMMAND_QUEUE ) ), "Failed to flush an OpenCL command queue" );
0455             enforce_cl_retcode( clReleaseEvent( my_event ), "Failed to release an event" );
0456         }
0457         my_is_event = false;
0458     }
0459 
0460     template <typename Callback>
0461     void register_callback( Callback c ) const {
0462         __TBB_ASSERT( my_is_event, "The OpenCL event is not set" );
0463         enforce_cl_retcode( clSetEventCallback( my_event, CL_COMPLETE, register_callback_func, new callback<Callback, T>( c, my_data ) ), "Failed to set an OpenCL callback" );
0464     }
0465 
0466     operator T&() { return data(); }
0467     operator const T&() const { return data(); }
0468 
0469 protected:
0470     // Overridden in this derived class to inform that
0471     // async calculation chain is over
0472     void finalize() const __TBB_override {
0473         receive_if_memory_object(*this);
0474         if (! my_callback_flag_ptr->fetch_and_store(true)) {
0475             opencl_async_msg a(*this);
0476             if (my_is_event) {
0477                 register_callback([a](const T& t) mutable {
0478                     a.set(t);
0479                 });
0480             }
0481             else {
0482                 a.set(my_data);
0483             }
0484         }
0485         clear_event();
0486     }
0487 
0488 private:
0489     static void CL_CALLBACK register_callback_func( cl_event, cl_int event_command_exec_status, void *data ) {
0490         tbb::internal::suppress_unused_warning( event_command_exec_status );
0491         __TBB_ASSERT( event_command_exec_status == CL_COMPLETE, NULL );
0492         __TBB_ASSERT( data, NULL );
0493         callback_base *c = static_cast<callback_base*>(data);
0494         c->call();
0495         delete c;
0496     }
0497 
0498     T my_data;
0499     mutable cl_event my_event;
0500     mutable bool my_is_event = false;
0501 
0502     std::shared_ptr< tbb::atomic<bool> > my_callback_flag_ptr;
0503 };
0504 
0505 template <typename K, typename T, typename Factory>
0506 K key_from_message( const opencl_async_msg<T, Factory> &dmsg ) {
0507     using tbb::flow::key_from_message;
0508     const T &t = dmsg.data( false );
0509     __TBB_STATIC_ASSERT( true, "" );
0510     return key_from_message<K, T>( t );
0511 }
0512 
0513 template <typename Factory>
0514 class opencl_memory {
0515 public:
0516     opencl_memory() {}
0517     opencl_memory( Factory &f ) : my_host_ptr( NULL ), my_factory( &f ), my_sending_event_present( false ) {
0518         my_curr_device_id = my_factory->devices().begin()->my_device_id;
0519     }
0520 
0521     virtual ~opencl_memory() {
0522         if ( my_sending_event_present ) enforce_cl_retcode( clReleaseEvent( my_sending_event ), "Failed to release an event for the OpenCL buffer" );
0523         enforce_cl_retcode( clReleaseMemObject( my_cl_mem ), "Failed to release an memory object" );
0524     }
0525 
0526     cl_mem get_cl_mem() const {
0527         return my_cl_mem;
0528     }
0529 
0530     void* get_host_ptr() {
0531         if ( !my_host_ptr ) {
0532             opencl_async_msg<void*, Factory> d = receive( NULL );
0533             d.data();
0534             __TBB_ASSERT( d.data() == my_host_ptr, NULL );
0535         }
0536         return my_host_ptr;
0537     }
0538 
0539     Factory *factory() const { return my_factory; }
0540 
0541     opencl_async_msg<void*, Factory> receive(const cl_event *e) {
0542         opencl_async_msg<void*, Factory> d;
0543         if (e) {
0544             d = opencl_async_msg<void*, Factory>(my_host_ptr, *e);
0545         } else {
0546             d = opencl_async_msg<void*, Factory>(my_host_ptr);
0547         }
0548 
0549         // Concurrent receives are prohibited so we do not worry about synchronization.
0550         if (my_curr_device_id.load<tbb::relaxed>() != opencl_device::host) {
0551             map_memory(*my_factory->devices().begin(), d);
0552             my_curr_device_id.store<tbb::relaxed>(opencl_device::host);
0553             my_host_ptr = d.data(false);
0554         }
0555         // Release the sending event
0556         if (my_sending_event_present) {
0557             enforce_cl_retcode(clReleaseEvent(my_sending_event), "Failed to release an event");
0558             my_sending_event_present = false;
0559         }
0560         return d;
0561     }
0562 
0563     opencl_async_msg<void*, Factory> send(opencl_device device, const cl_event *e) {
0564         opencl_device::device_id_type device_id = device.my_device_id;
0565         if (!my_factory->is_same_context(my_curr_device_id.load<tbb::acquire>(), device_id)) {
0566             {
0567                 tbb::spin_mutex::scoped_lock lock(my_sending_lock);
0568                 if (!my_factory->is_same_context(my_curr_device_id.load<tbb::relaxed>(), device_id)) {
0569                     __TBB_ASSERT(my_host_ptr, "The buffer has not been mapped");
0570                     opencl_async_msg<void*, Factory> d(my_host_ptr);
0571                     my_factory->enqueue_unmap_buffer(device, *this, d);
0572                     my_sending_event = *d.get_event();
0573                     my_sending_event_present = true;
0574                     enforce_cl_retcode(clRetainEvent(my_sending_event), "Failed to retain an event");
0575                     my_host_ptr = NULL;
0576                     my_curr_device_id.store<tbb::release>(device_id);
0577                 }
0578             }
0579             __TBB_ASSERT(my_sending_event_present, NULL);
0580         }
0581 
0582         // !e means that buffer has come from the host
0583         if (!e && my_sending_event_present) e = &my_sending_event;
0584 
0585         __TBB_ASSERT(!my_host_ptr, "The buffer has not been unmapped");
0586         return e ? opencl_async_msg<void*, Factory>(NULL, *e) : opencl_async_msg<void*, Factory>(NULL);
0587     }
0588 
0589     virtual void map_memory( opencl_device, opencl_async_msg<void*, Factory> & ) = 0;
0590 protected:
0591     cl_mem my_cl_mem;
0592     tbb::atomic<opencl_device::device_id_type> my_curr_device_id;
0593     void* my_host_ptr;
0594     Factory *my_factory;
0595 
0596     tbb::spin_mutex my_sending_lock;
0597     bool my_sending_event_present;
0598     cl_event my_sending_event;
0599 };
0600 
0601 template <typename Factory>
0602 class opencl_buffer_impl : public opencl_memory<Factory> {
0603     size_t my_size;
0604 public:
0605     opencl_buffer_impl( size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
0606         cl_int err;
0607         this->my_cl_mem = clCreateBuffer( this->my_factory->context(), CL_MEM_ALLOC_HOST_PTR, size, NULL, &err );
0608         enforce_cl_retcode( err, "Failed to create an OpenCL buffer" );
0609     }
0610 
0611     // The constructor for subbuffers.
0612     opencl_buffer_impl( cl_mem m, size_t index, size_t size, Factory& f ) : opencl_memory<Factory>( f ), my_size( size ) {
0613         cl_int err;
0614         cl_buffer_region region = { index, size };
0615         this->my_cl_mem = clCreateSubBuffer( m, 0, CL_BUFFER_CREATE_TYPE_REGION, &region, &err );
0616         enforce_cl_retcode( err, "Failed to create an OpenCL subbuffer" );
0617     }
0618 
0619     size_t size() const {
0620         return my_size;
0621     }
0622 
0623     void map_memory( opencl_device device, opencl_async_msg<void*, Factory> &dmsg ) __TBB_override {
0624         this->my_factory->enqueue_map_buffer( device, *this, dmsg );
0625     }
0626 
0627 #if TBB_USE_ASSERT
0628     template <typename, typename>
0629     friend class opencl_buffer;
0630 #endif
0631 };
0632 
0633 enum access_type {
0634     read_write,
0635     write_only,
0636     read_only
0637 };
0638 
0639 template <typename T, typename Factory = opencl_info::default_opencl_factory>
0640 class __TBB_DEPRECATED_IN_VERBOSE_MODE
0641 opencl_subbuffer;
0642 
0643 template <typename T, typename Factory = opencl_info::default_opencl_factory>
0644 class __TBB_DEPRECATED_IN_VERBOSE_MODE
0645 opencl_buffer {
0646 public:
0647     typedef cl_mem native_object_type;
0648     typedef opencl_buffer memory_object_type;
0649     typedef Factory opencl_factory_type;
0650 
0651     template<access_type a> using iterator = T*;
0652 
0653     template <access_type a>
0654     iterator<a> access() const {
0655         T* ptr = (T*)my_impl->get_host_ptr();
0656         __TBB_ASSERT( ptr, NULL );
0657         return iterator<a>( ptr );
0658     }
0659 
0660     T* data() const { return &access<read_write>()[0]; }
0661 
0662     template <access_type a = read_write>
0663     iterator<a> begin() const { return access<a>(); }
0664 
0665     template <access_type a = read_write>
0666     iterator<a> end() const { return access<a>()+my_impl->size()/sizeof(T); }
0667 
0668     size_t size() const { return my_impl->size()/sizeof(T); }
0669 
0670     T& operator[] ( ptrdiff_t k ) { return begin()[k]; }
0671 
0672     opencl_buffer() {}
0673     opencl_buffer( size_t size );
0674     opencl_buffer( Factory &f, size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), f ) ) {}
0675 
0676     cl_mem native_object() const {
0677         return my_impl->get_cl_mem();
0678     }
0679 
0680     const opencl_buffer& memory_object() const {
0681         return *this;
0682     }
0683 
0684     void send( opencl_device device, opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
0685         __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
0686         opencl_async_msg<void*, Factory> d = my_impl->send( device, dependency.get_event() );
0687         const cl_event *e = d.get_event();
0688         if ( e ) dependency.set_event( *e );
0689         else dependency.clear_event();
0690     }
0691     void receive( const opencl_async_msg<opencl_buffer, Factory> &dependency ) const {
0692         __TBB_ASSERT( dependency.data( /*wait = */false ) == *this, NULL );
0693         opencl_async_msg<void*, Factory> d = my_impl->receive( dependency.get_event() );
0694         const cl_event *e = d.get_event();
0695         if ( e ) dependency.set_event( *e );
0696         else dependency.clear_event();
0697     }
0698 
0699     opencl_subbuffer<T, Factory> subbuffer( size_t index, size_t size ) const;
0700 private:
0701     // The constructor for subbuffers.
0702     opencl_buffer( Factory &f, cl_mem m, size_t index, size_t size ) : my_impl( std::make_shared<impl_type>( m, index*sizeof(T), size*sizeof(T), f ) ) {}
0703 
0704     typedef opencl_buffer_impl<Factory> impl_type;
0705 
0706     std::shared_ptr<impl_type> my_impl;
0707 
0708     friend bool operator==(const opencl_buffer<T, Factory> &lhs, const opencl_buffer<T, Factory> &rhs) {
0709         return lhs.my_impl == rhs.my_impl;
0710     }
0711 
0712     template <typename>
0713     friend class opencl_factory;
0714     template <typename, typename>
0715     friend class opencl_subbuffer;
0716 };
0717 
0718 template <typename T, typename Factory>
0719 class __TBB_DEPRECATED_IN_VERBOSE_MODE
0720 opencl_subbuffer : public opencl_buffer<T, Factory> {
0721     opencl_buffer<T, Factory> my_owner;
0722 public:
0723     opencl_subbuffer() {}
0724     opencl_subbuffer( const opencl_buffer<T, Factory> &owner, size_t index, size_t size ) :
0725         opencl_buffer<T, Factory>( *owner.my_impl->factory(), owner.native_object(), index, size ), my_owner( owner ) {}
0726 };
0727 
0728 template <typename T, typename Factory>
0729 opencl_subbuffer<T, Factory> opencl_buffer<T, Factory>::subbuffer( size_t index, size_t size ) const {
0730     return opencl_subbuffer<T, Factory>( *this, index, size );
0731 }
0732 
0733 
0734 #define is_typedef(type)                                                    \
0735     template <typename T>                                                   \
0736     struct is_##type {                                                      \
0737         template <typename C>                                               \
0738         static std::true_type check( typename C::type* );                   \
0739         template <typename C>                                               \
0740         static std::false_type check( ... );                                \
0741                                                                             \
0742         static const bool value = decltype(check<T>(0))::value;             \
0743     }
0744 
0745 is_typedef( native_object_type );
0746 is_typedef( memory_object_type );
0747 
0748 template <typename T>
0749 typename std::enable_if<is_native_object_type<T>::value, typename T::native_object_type>::type get_native_object( const T &t ) {
0750     return t.native_object();
0751 }
0752 
0753 template <typename T>
0754 typename std::enable_if<!is_native_object_type<T>::value, T>::type get_native_object( T t ) {
0755     return t;
0756 }
0757 
0758 // send_if_memory_object checks if the T type has memory_object_type and call the send method for the object.
0759 template <typename T, typename Factory>
0760 typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, opencl_async_msg<T, Factory> &dmsg ) {
0761     const T &t = dmsg.data( false );
0762     typedef typename T::memory_object_type mem_obj_t;
0763     mem_obj_t mem_obj = t.memory_object();
0764     opencl_async_msg<mem_obj_t, Factory> d( mem_obj );
0765     if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
0766     mem_obj.send( device, d );
0767     if ( d.get_event() ) dmsg.set_event( *d.get_event() );
0768 }
0769 
0770 template <typename T>
0771 typename std::enable_if<is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device device, T &t ) {
0772     typedef typename T::memory_object_type mem_obj_t;
0773     mem_obj_t mem_obj = t.memory_object();
0774     opencl_async_msg<mem_obj_t, typename mem_obj_t::opencl_factory_type> dmsg( mem_obj );
0775     mem_obj.send( device, dmsg );
0776 }
0777 
0778 template <typename T>
0779 typename std::enable_if<!is_memory_object_type<T>::value>::type send_if_memory_object( opencl_device, T& ) {};
0780 
0781 // receive_if_memory_object checks if the T type has memory_object_type and call the receive method for the object.
0782 template <typename T, typename Factory>
0783 typename std::enable_if<is_memory_object_type<T>::value>::type receive_if_memory_object( const opencl_async_msg<T, Factory> &dmsg ) {
0784     const T &t = dmsg.data( false );
0785     typedef typename T::memory_object_type mem_obj_t;
0786     mem_obj_t mem_obj = t.memory_object();
0787     opencl_async_msg<mem_obj_t, Factory> d( mem_obj );
0788     if ( dmsg.get_event() ) d.set_event( *dmsg.get_event() );
0789     mem_obj.receive( d );
0790     if ( d.get_event() ) dmsg.set_event( *d.get_event() );
0791 }
0792 
0793 template <typename T>
0794 typename std::enable_if<!is_memory_object_type<T>::value>::type  receive_if_memory_object( const T& ) {}
0795 
0796 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_range {
0797 public:
0798     typedef size_t range_index_type;
0799     typedef std::array<range_index_type, 3> nd_range_type;
0800 
0801     template <typename G = std::initializer_list<int>, typename L = std::initializer_list<int>,
0802         typename = typename std::enable_if<!std::is_same<typename std::decay<G>::type, opencl_range>::value>::type>
0803     opencl_range(G&& global_work = std::initializer_list<int>({ 0 }), L&& local_work = std::initializer_list<int>({ 0, 0, 0 })) {
0804         auto g_it = global_work.begin();
0805         auto l_it = local_work.begin();
0806         my_global_work_size = { {size_t(-1), size_t(-1), size_t(-1)} };
0807         // my_local_work_size is still uninitialized
0808         for (int s = 0; s < 3 && g_it != global_work.end(); ++g_it, ++l_it, ++s) {
0809             __TBB_ASSERT(l_it != local_work.end(), "global_work & local_work must have same size");
0810             my_global_work_size[s] = *g_it;
0811             my_local_work_size[s] = *l_it;
0812         }
0813     }
0814 
0815     const nd_range_type& global_range() const { return my_global_work_size; }
0816     const nd_range_type& local_range() const { return my_local_work_size; }
0817 
0818 private:
0819     nd_range_type my_global_work_size;
0820     nd_range_type my_local_work_size;
0821 };
0822 
0823 template <typename DeviceFilter>
0824 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_factory {
0825 public:
0826     template<typename T> using async_msg_type = opencl_async_msg<T, opencl_factory<DeviceFilter>>;
0827     typedef opencl_device device_type;
0828 
0829     class kernel : tbb::internal::no_assign {
0830     public:
0831         kernel( const kernel& k ) : my_factory( k.my_factory ) {
0832             // Clone my_cl_kernel via opencl_program
0833             size_t ret_size = 0;
0834 
0835             std::vector<char> kernel_name;
0836             for ( size_t curr_size = 32;; curr_size <<= 1 ) {
0837                 kernel_name.resize( curr_size <<= 1 );
0838                 enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_FUNCTION_NAME, curr_size, kernel_name.data(), &ret_size ), "Failed to get kernel info" );
0839                 if ( ret_size < curr_size ) break;
0840             }
0841 
0842             cl_program program;
0843             enforce_cl_retcode( clGetKernelInfo( k.my_cl_kernel, CL_KERNEL_PROGRAM, sizeof(program), &program, &ret_size ), "Failed to get kernel info" );
0844             __TBB_ASSERT( ret_size == sizeof(program), NULL );
0845 
0846             my_cl_kernel = opencl_program< factory_type >( my_factory, program ).get_cl_kernel( kernel_name.data() );
0847         }
0848 
0849         ~kernel() {
0850             enforce_cl_retcode( clReleaseKernel( my_cl_kernel ), "Failed to release a kernel" );
0851         }
0852 
0853     private:
0854         typedef opencl_factory<DeviceFilter> factory_type;
0855 
0856         kernel( const cl_kernel& k, factory_type& f ) : my_cl_kernel( k ), my_factory( f ) {}
0857 
0858         // Data
0859         cl_kernel my_cl_kernel;
0860         factory_type& my_factory;
0861 
0862         template <typename DeviceFilter_>
0863         friend class opencl_factory;
0864 
0865         template <typename Factory>
0866         friend class opencl_program;
0867     };
0868 
0869     typedef kernel kernel_type;
0870 
0871     // 'range_type' enables kernel_executor with range support
0872     // it affects expectations for enqueue_kernel(.....) interface method
0873     typedef opencl_range range_type;
0874 
0875     opencl_factory() {}
0876     ~opencl_factory() {
0877         if ( my_devices.size() ) {
0878             for ( auto d = my_devices.begin(); d != my_devices.end(); ++d ) {
0879                 enforce_cl_retcode( clReleaseCommandQueue( (*d).my_cl_command_queue ), "Failed to release a command queue" );
0880             }
0881             enforce_cl_retcode( clReleaseContext( my_cl_context ), "Failed to release a context" );
0882         }
0883     }
0884 
0885     bool init( const opencl_device_list &device_list ) {
0886         tbb::spin_mutex::scoped_lock lock( my_devices_mutex );
0887         if ( !my_devices.size() ) {
0888             my_devices = device_list;
0889             return true;
0890         }
0891         return false;
0892     }
0893 
0894 
0895 private:
0896     template <typename Factory>
0897     void enqueue_map_buffer( opencl_device device, opencl_buffer_impl<Factory> &buffer, opencl_async_msg<void*, Factory>& dmsg ) {
0898         cl_event const* e1 = dmsg.get_event();
0899         cl_event e2;
0900         cl_int err;
0901         void *ptr = clEnqueueMapBuffer( device.my_cl_command_queue, buffer.get_cl_mem(), false, CL_MAP_READ | CL_MAP_WRITE, 0, buffer.size(),
0902             e1 == NULL ? 0 : 1, e1, &e2, &err );
0903         enforce_cl_retcode( err, "Failed to map a buffer" );
0904         dmsg.data( false ) = ptr;
0905         dmsg.set_event( e2 );
0906         enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
0907     }
0908 
0909 
0910     template <typename Factory>
0911     void enqueue_unmap_buffer( opencl_device device, opencl_memory<Factory> &memory, opencl_async_msg<void*, Factory>& dmsg ) {
0912         cl_event const* e1 = dmsg.get_event();
0913         cl_event e2;
0914         enforce_cl_retcode(
0915             clEnqueueUnmapMemObject( device.my_cl_command_queue, memory.get_cl_mem(), memory.get_host_ptr(), e1 == NULL ? 0 : 1, e1, &e2 ),
0916            "Failed to unmap a buffer" );
0917         dmsg.set_event( e2 );
0918         enforce_cl_retcode( clReleaseEvent( e2 ), "Failed to release an event" );
0919     }
0920 
0921     // --------- Kernel argument & event list helpers --------- //
0922     template <size_t NUM_ARGS, typename T>
0923     void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>&, int&, int& place, const T& t ) {
0924         auto p = get_native_object(t);
0925         enforce_cl_retcode( clSetKernelArg(kernel.my_cl_kernel, place++, sizeof(p), &p), "Failed to set a kernel argument" );
0926     }
0927 
0928     template <size_t NUM_ARGS, typename T, typename F>
0929     void process_one_arg( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const opencl_async_msg<T, F>& msg ) {
0930         __TBB_ASSERT((static_cast<typename std::array<cl_event, NUM_ARGS>::size_type>(num_events) < events.size()), NULL);
0931 
0932         const cl_event * const e = msg.get_event();
0933         if (e != NULL) {
0934             events[num_events++] = *e;
0935         }
0936 
0937         process_one_arg( kernel, events, num_events, place, msg.data(false) );
0938     }
0939 
0940     template <size_t NUM_ARGS, typename T, typename ...Rest>
0941     void process_arg_list( const kernel_type& kernel, std::array<cl_event, NUM_ARGS>& events, int& num_events, int& place, const T& t, const Rest&... args ) {
0942         process_one_arg( kernel, events, num_events, place, t );
0943         process_arg_list( kernel, events, num_events, place, args... );
0944     }
0945 
0946     template <size_t NUM_ARGS>
0947     void process_arg_list( const kernel_type&, std::array<cl_event, NUM_ARGS>&, int&, int& ) {}
0948     // ------------------------------------------- //
0949     template <typename T>
0950     void update_one_arg( cl_event, T& ) {}
0951 
0952     template <typename T, typename F>
0953     void update_one_arg( cl_event e, opencl_async_msg<T, F>& msg ) {
0954         msg.set_event( e );
0955     }
0956 
0957     template <typename T, typename ...Rest>
0958     void update_arg_list( cl_event e, T& t, Rest&... args ) {
0959         update_one_arg( e, t );
0960         update_arg_list( e, args... );
0961     }
0962 
0963     void update_arg_list( cl_event ) {}
0964     // ------------------------------------------- //
0965 public:
0966     template <typename ...Args>
0967     void send_kernel( opencl_device device, const kernel_type& kernel, const range_type& work_size, Args&... args ) {
0968         std::array<cl_event, sizeof...(Args)> events;
0969         int num_events = 0;
0970         int place = 0;
0971         process_arg_list( kernel, events, num_events, place, args... );
0972 
0973         const cl_event e = send_kernel_impl( device, kernel.my_cl_kernel, work_size, num_events, events.data() );
0974 
0975         update_arg_list(e, args...);
0976 
0977         // Release our own reference to cl_event
0978         enforce_cl_retcode( clReleaseEvent(e), "Failed to release an event" );
0979     }
0980 
0981     // ------------------------------------------- //
0982     template <typename T, typename ...Rest>
0983     void send_data(opencl_device device, T& t, Rest&... args) {
0984         send_if_memory_object( device, t );
0985         send_data( device, args... );
0986     }
0987 
0988     void send_data(opencl_device) {}
0989     // ------------------------------------------- //
0990 
0991 private:
0992     cl_event send_kernel_impl( opencl_device device, const cl_kernel& kernel,
0993         const range_type& work_size, cl_uint num_events, cl_event* event_list ) {
0994         const typename range_type::nd_range_type g_offset = { { 0, 0, 0 } };
0995         const typename range_type::nd_range_type& g_size = work_size.global_range();
0996         const typename range_type::nd_range_type& l_size = work_size.local_range();
0997         cl_uint s;
0998         for ( s = 1; s < 3 && g_size[s] != size_t(-1); ++s) {}
0999         cl_event event;
1000         enforce_cl_retcode(
1001             clEnqueueNDRangeKernel( device.my_cl_command_queue, kernel, s,
1002                 g_offset.data(), g_size.data(), l_size[0] ? l_size.data() : NULL, num_events, num_events ? event_list : NULL, &event ),
1003             "Failed to enqueue a kernel" );
1004         return event;
1005     }
1006 
1007     // ------------------------------------------- //
1008     template <typename T>
1009     bool get_event_from_one_arg( cl_event&, const T& ) {
1010         return false;
1011     }
1012 
1013     template <typename T, typename F>
1014     bool get_event_from_one_arg( cl_event& e, const opencl_async_msg<T, F>& msg) {
1015         cl_event const *e_ptr = msg.get_event();
1016 
1017         if ( e_ptr != NULL ) {
1018             e = *e_ptr;
1019             return true;
1020         }
1021 
1022         return false;
1023     }
1024 
1025     template <typename T, typename ...Rest>
1026     bool get_event_from_args( cl_event& e, const T& t, const Rest&... args ) {
1027         if ( get_event_from_one_arg( e, t ) ) {
1028             return true;
1029         }
1030 
1031         return get_event_from_args( e, args... );
1032     }
1033 
1034     bool get_event_from_args( cl_event& ) {
1035         return false;
1036     }
1037     // ------------------------------------------- //
1038 
1039     struct finalize_fn : tbb::internal::no_assign {
1040         virtual ~finalize_fn() {}
1041         virtual void operator() () {}
1042     };
1043 
1044     template<typename Fn>
1045     struct finalize_fn_leaf : public finalize_fn {
1046         Fn my_fn;
1047         finalize_fn_leaf(Fn fn) : my_fn(fn) {}
1048         void operator() () __TBB_override { my_fn(); }
1049     };
1050 
1051     static void CL_CALLBACK finalize_callback(cl_event, cl_int event_command_exec_status, void *data) {
1052         tbb::internal::suppress_unused_warning(event_command_exec_status);
1053         __TBB_ASSERT(event_command_exec_status == CL_COMPLETE, NULL);
1054 
1055         finalize_fn * const fn_ptr = static_cast<finalize_fn*>(data);
1056         __TBB_ASSERT(fn_ptr != NULL, "Invalid finalize function pointer");
1057         (*fn_ptr)();
1058 
1059         // Function pointer was created by 'new' & this callback must be called once only
1060         delete fn_ptr;
1061     }
1062 public:
1063     template <typename FinalizeFn, typename ...Args>
1064     void finalize( opencl_device device, FinalizeFn fn, Args&... args ) {
1065         cl_event e;
1066 
1067         if ( get_event_from_args( e, args... ) ) {
1068             enforce_cl_retcode( clSetEventCallback( e, CL_COMPLETE, finalize_callback,
1069                 new finalize_fn_leaf<FinalizeFn>(fn) ), "Failed to set a callback" );
1070         }
1071 
1072         enforce_cl_retcode( clFlush( device.my_cl_command_queue ), "Failed to flush an OpenCL command queue" );
1073     }
1074 
1075     const opencl_device_list& devices() {
1076         std::call_once( my_once_flag, &opencl_factory::init_once, this );
1077         return my_devices;
1078     }
1079 
1080 private:
1081     bool is_same_context( opencl_device::device_id_type d1, opencl_device::device_id_type d2 ) {
1082         __TBB_ASSERT( d1 != opencl_device::unknown && d2 != opencl_device::unknown, NULL );
1083         // Currently, factory supports only one context so if the both devices are not host it means the are in the same context.
1084         if ( d1 != opencl_device::host && d2 != opencl_device::host )
1085             return true;
1086         return d1 == d2;
1087     }
1088 private:
1089     opencl_factory( const opencl_factory& );
1090     opencl_factory& operator=(const opencl_factory&);
1091 
1092     cl_context context() {
1093         std::call_once( my_once_flag, &opencl_factory::init_once, this );
1094         return my_cl_context;
1095     }
1096 
1097     void init_once() {
1098         {
1099             tbb::spin_mutex::scoped_lock lock(my_devices_mutex);
1100             if (!my_devices.size())
1101                 my_devices = DeviceFilter()( opencl_info::available_devices() );
1102         }
1103 
1104         enforce_cl_retcode(my_devices.size() ? CL_SUCCESS : CL_INVALID_DEVICE, "No devices in the device list");
1105         cl_platform_id platform_id = my_devices.begin()->platform_id();
1106         for (opencl_device_list::iterator it = ++my_devices.begin(); it != my_devices.end(); ++it)
1107             enforce_cl_retcode(it->platform_id() == platform_id ? CL_SUCCESS : CL_INVALID_PLATFORM, "All devices should be in the same platform");
1108 
1109         std::vector<cl_device_id> cl_device_ids;
1110         for (auto d = my_devices.begin(); d != my_devices.end(); ++d) {
1111             cl_device_ids.push_back((*d).my_cl_device_id);
1112         }
1113 
1114         cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, (cl_context_properties)NULL };
1115         cl_int err;
1116         cl_context ctx = clCreateContext(context_properties,
1117             (cl_uint)cl_device_ids.size(),
1118             cl_device_ids.data(),
1119             NULL, NULL, &err);
1120         enforce_cl_retcode(err, "Failed to create context");
1121         my_cl_context = ctx;
1122 
1123         size_t device_counter = 0;
1124         for (auto d = my_devices.begin(); d != my_devices.end(); d++) {
1125             (*d).my_device_id = device_counter++;
1126             cl_int err2;
1127             cl_command_queue cq;
1128 #if CL_VERSION_2_0
1129             if ((*d).major_version() >= 2) {
1130                 if ((*d).out_of_order_exec_mode_on_host_present()) {
1131                     cl_queue_properties props[] = { CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0 };
1132                     cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1133                 } else {
1134                     cl_queue_properties props[] = { 0 };
1135                     cq = clCreateCommandQueueWithProperties(ctx, (*d).my_cl_device_id, props, &err2);
1136                 }
1137             } else
1138 #endif
1139             {
1140                 cl_command_queue_properties props = (*d).out_of_order_exec_mode_on_host_present() ? CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE : 0;
1141                 // Suppress "declared deprecated" warning for the next line.
1142 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1143 #pragma GCC diagnostic push
1144 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
1145 #endif
1146 #if _MSC_VER || __INTEL_COMPILER
1147 #pragma warning( push )
1148 #if __INTEL_COMPILER
1149 #pragma warning (disable: 1478)
1150 #else
1151 #pragma warning (disable: 4996)
1152 #endif
1153 #endif
1154                 cq = clCreateCommandQueue(ctx, (*d).my_cl_device_id, props, &err2);
1155 #if _MSC_VER || __INTEL_COMPILER
1156 #pragma warning( pop )
1157 #endif
1158 #if __TBB_GCC_WARNING_SUPPRESSION_PRESENT
1159 #pragma GCC diagnostic pop
1160 #endif
1161             }
1162             enforce_cl_retcode(err2, "Failed to create command queue");
1163             (*d).my_cl_command_queue = cq;
1164         }
1165     }
1166 
1167     std::once_flag my_once_flag;
1168     opencl_device_list my_devices;
1169     cl_context my_cl_context;
1170 
1171     tbb::spin_mutex my_devices_mutex;
1172 
1173     template <typename Factory>
1174     friend class opencl_program;
1175     template <typename Factory>
1176     friend class opencl_buffer_impl;
1177     template <typename Factory>
1178     friend class opencl_memory;
1179 }; // class opencl_factory
1180 
1181 // TODO: consider this namespace as public API
1182 namespace opencl_info {
1183 
1184 // Default types
1185 
1186 template <typename Factory>
1187 struct default_device_selector {
1188     opencl_device operator()(Factory& f) {
1189         __TBB_ASSERT(!f.devices().empty(), "No available devices");
1190         return *(f.devices().begin());
1191     }
1192 };
1193 
1194 struct default_device_filter {
1195     opencl_device_list operator()(const opencl_device_list &devices) {
1196         opencl_device_list dl;
1197         cl_platform_id platform_id = devices.begin()->platform_id();
1198         for (opencl_device_list::const_iterator it = devices.cbegin(); it != devices.cend(); ++it) {
1199             if (it->platform_id() == platform_id) {
1200                 dl.add(*it);
1201             }
1202         }
1203         return dl;
1204     }
1205 };
1206 
1207 class default_opencl_factory : public opencl_factory < default_device_filter >, tbb::internal::no_copy {
1208 public:
1209     template<typename T> using async_msg_type = opencl_async_msg<T, default_opencl_factory>;
1210 
1211     friend default_opencl_factory& default_factory();
1212 
1213 private:
1214     default_opencl_factory() = default;
1215 };
1216 
1217 inline default_opencl_factory& default_factory() {
1218     static default_opencl_factory default_factory;
1219     return default_factory;
1220 }
1221 
1222 } // namespace opencl_info
1223 
1224 template <typename T, typename Factory>
1225 opencl_buffer<T, Factory>::opencl_buffer( size_t size ) : my_impl( std::make_shared<impl_type>( size*sizeof(T), opencl_info::default_factory() ) ) {}
1226 
1227 
1228 enum class opencl_program_type {
1229     SOURCE,
1230     PRECOMPILED,
1231     SPIR
1232 };
1233 
1234 template <typename Factory = opencl_info::default_opencl_factory>
1235 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_program : tbb::internal::no_assign {
1236 public:
1237     typedef typename Factory::kernel_type kernel_type;
1238 
1239     opencl_program( Factory& factory, opencl_program_type type, const std::string& program_name ) : my_factory( factory ), my_type(type) , my_arg_str( program_name) {}
1240     opencl_program( Factory& factory, const char* program_name ) : opencl_program( factory, std::string( program_name ) ) {}
1241     opencl_program( Factory& factory, const std::string& program_name ) : opencl_program( factory, opencl_program_type::SOURCE, program_name ) {}
1242 
1243     opencl_program( opencl_program_type type, const std::string& program_name ) : opencl_program( opencl_info::default_factory(), type, program_name ) {}
1244     opencl_program( const char* program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1245     opencl_program( const std::string& program_name ) : opencl_program( opencl_info::default_factory(), program_name ) {}
1246     opencl_program( opencl_program_type type ) : opencl_program( opencl_info::default_factory(), type ) {}
1247 
1248     opencl_program( const opencl_program &src ) : my_factory( src.my_factory ), my_type( src.type ), my_arg_str( src.my_arg_str ), my_cl_program( src.my_cl_program ) {
1249         // Set my_do_once_flag to the called state.
1250         std::call_once( my_do_once_flag, [](){} );
1251     }
1252 
1253     kernel_type get_kernel( const std::string& k ) const {
1254         return kernel_type( get_cl_kernel(k), my_factory );
1255     }
1256 
1257 private:
1258     opencl_program( Factory& factory, cl_program program ) : my_factory( factory ), my_cl_program( program ) {
1259         // Set my_do_once_flag to the called state.
1260         std::call_once( my_do_once_flag, [](){} );
1261     }
1262 
1263     cl_kernel get_cl_kernel( const std::string& k ) const {
1264         std::call_once( my_do_once_flag, [this, &k](){ this->init( k ); } );
1265         cl_int err;
1266         cl_kernel kernel = clCreateKernel( my_cl_program, k.c_str(), &err );
1267         enforce_cl_retcode( err, std::string( "Failed to create kernel: " ) + k );
1268         return kernel;
1269     }
1270 
1271     class file_reader {
1272     public:
1273         file_reader( const std::string& filepath ) {
1274             std::ifstream file_descriptor( filepath, std::ifstream::binary );
1275             if ( !file_descriptor.is_open() ) {
1276                 std::string str = std::string( "Could not open file: " ) + filepath;
1277                 std::cerr << str << std::endl;
1278                 throw str;
1279             }
1280             file_descriptor.seekg( 0, file_descriptor.end );
1281             size_t length = size_t( file_descriptor.tellg() );
1282             file_descriptor.seekg( 0, file_descriptor.beg );
1283             my_content.resize( length );
1284             char* begin = &*my_content.begin();
1285             file_descriptor.read( begin, length );
1286             file_descriptor.close();
1287         }
1288         const char* content() { return &*my_content.cbegin(); }
1289         size_t length() { return my_content.length(); }
1290     private:
1291         std::string my_content;
1292     };
1293 
1294     class opencl_program_builder {
1295     public:
1296         typedef void (CL_CALLBACK *cl_callback_type)(cl_program, void*);
1297         opencl_program_builder( Factory& f, const std::string& name, cl_program program,
1298                                 cl_uint num_devices, cl_device_id* device_list,
1299                                 const char* options, cl_callback_type callback,
1300                                 void* user_data ) {
1301             cl_int err = clBuildProgram( program, num_devices, device_list, options,
1302                                          callback, user_data );
1303             if( err == CL_SUCCESS )
1304                 return;
1305             std::string str = std::string( "Failed to build program: " ) + name;
1306             if ( err == CL_BUILD_PROGRAM_FAILURE ) {
1307                 const opencl_device_list &devices = f.devices();
1308                 for ( auto d = devices.begin(); d != devices.end(); ++d ) {
1309                     std::cerr << "Build log for device: " << (*d).name() << std::endl;
1310                     size_t log_size;
1311                     cl_int query_err = clGetProgramBuildInfo(
1312                         program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
1313                         &log_size );
1314                     enforce_cl_retcode( query_err, "Failed to get build log size" );
1315                     if( log_size ) {
1316                         std::vector<char> output;
1317                         output.resize( log_size );
1318                         query_err = clGetProgramBuildInfo(
1319                             program, (*d).my_cl_device_id, CL_PROGRAM_BUILD_LOG,
1320                             output.size(), output.data(), NULL );
1321                         enforce_cl_retcode( query_err, "Failed to get build output" );
1322                         std::cerr << output.data() << std::endl;
1323                     } else {
1324                         std::cerr << "No build log available" << std::endl;
1325                     }
1326                 }
1327             }
1328             enforce_cl_retcode( err, str );
1329         }
1330     };
1331 
1332     class opencl_device_filter {
1333     public:
1334         template<typename Filter>
1335         opencl_device_filter( cl_uint& num_devices, cl_device_id* device_list,
1336                               Filter filter, const char* message ) {
1337             for ( cl_uint i = 0; i < num_devices; ++i )
1338                 if ( filter(device_list[i]) ) {
1339                     device_list[i--] = device_list[--num_devices];
1340                 }
1341             if ( !num_devices )
1342                 enforce_cl_retcode( CL_DEVICE_NOT_AVAILABLE, message );
1343         }
1344     };
1345 
1346     void init( const std::string& ) const {
1347         cl_uint num_devices;
1348         enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_NUM_DEVICES, sizeof( num_devices ), &num_devices, NULL ),
1349             "Failed to get OpenCL context info" );
1350         if ( !num_devices )
1351             enforce_cl_retcode( CL_DEVICE_NOT_FOUND, "No supported devices found" );
1352         cl_device_id *device_list = (cl_device_id *)alloca( num_devices*sizeof( cl_device_id ) );
1353         enforce_cl_retcode( clGetContextInfo( my_factory.context(), CL_CONTEXT_DEVICES, num_devices*sizeof( cl_device_id ), device_list, NULL ),
1354             "Failed to get OpenCL context info" );
1355         const char *options = NULL;
1356         switch ( my_type ) {
1357         case opencl_program_type::SOURCE: {
1358             file_reader fr( my_arg_str );
1359             const char *s[] = { fr.content() };
1360             const size_t l[] = { fr.length() };
1361             cl_int err;
1362             my_cl_program = clCreateProgramWithSource( my_factory.context(), 1, s, l, &err );
1363             enforce_cl_retcode( err, std::string( "Failed to create program: " ) + my_arg_str );
1364             opencl_device_filter(
1365                 num_devices, device_list,
1366                 []( const opencl_device& d ) -> bool {
1367                     return !d.compiler_available() || !d.linker_available();
1368                 }, "No one device supports building program from sources" );
1369             opencl_program_builder(
1370                 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1371                 options, /*callback*/ NULL, /*user data*/NULL );
1372             break;
1373         }
1374         case opencl_program_type::SPIR:
1375             options = "-x spir";
1376         case opencl_program_type::PRECOMPILED: {
1377             file_reader fr( my_arg_str );
1378             std::vector<const unsigned char*> s(
1379                 num_devices, reinterpret_cast<const unsigned char*>(fr.content()) );
1380             std::vector<size_t> l( num_devices, fr.length() );
1381             std::vector<cl_int> bin_statuses( num_devices, -1 );
1382             cl_int err;
1383             my_cl_program = clCreateProgramWithBinary( my_factory.context(), num_devices,
1384                                                        device_list, l.data(), s.data(),
1385                                                        bin_statuses.data(), &err );
1386             if( err != CL_SUCCESS ) {
1387                 std::string statuses_str;
1388                 for (auto st = bin_statuses.begin(); st != bin_statuses.end(); ++st) {
1389                     statuses_str += std::to_string((*st));
1390                 }
1391 
1392                 enforce_cl_retcode( err, std::string( "Failed to create program, error " + std::to_string( err ) + " : " ) + my_arg_str +
1393                                     std::string( ", binary_statuses = " ) + statuses_str );
1394             }
1395             opencl_program_builder(
1396                 my_factory, my_arg_str, my_cl_program, num_devices, device_list,
1397                 options, /*callback*/ NULL, /*user data*/NULL );
1398             break;
1399         }
1400         default:
1401             __TBB_ASSERT( false, "Unsupported program type" );
1402         }
1403     }
1404 
1405     Factory& my_factory;
1406     opencl_program_type my_type;
1407     std::string my_arg_str;
1408     mutable cl_program my_cl_program;
1409     mutable std::once_flag my_do_once_flag;
1410 
1411     template <typename DeviceFilter>
1412     friend class opencl_factory;
1413 
1414     friend class Factory::kernel;
1415 };
1416 
1417 template<typename... Args>
1418 class __TBB_DEPRECATED_IN_VERBOSE_MODE opencl_node;
1419 
1420 template<typename JP, typename Factory, typename... Ports>
1421 class __TBB_DEPRECATED_IN_VERBOSE_MODE
1422 opencl_node< tuple<Ports...>, JP, Factory > : public streaming_node< tuple<Ports...>, JP, Factory > {
1423     typedef streaming_node < tuple<Ports...>, JP, Factory > base_type;
1424 public:
1425     typedef typename base_type::kernel_type kernel_type;
1426 
1427     opencl_node( graph &g, const kernel_type& kernel )
1428         : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1429     {
1430         tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1431     }
1432 
1433     opencl_node( graph &g, const kernel_type& kernel, Factory &f )
1434         : base_type( g, kernel, opencl_info::default_device_selector <Factory >(), f )
1435     {
1436         tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1437     }
1438 
1439     template <typename DeviceSelector>
1440     opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d, Factory &f)
1441         : base_type( g, kernel, d, f)
1442     {
1443         tbb::internal::fgt_multiinput_multioutput_node( CODEPTR(), tbb::internal::FLOW_OPENCL_NODE, this, &this->my_graph );
1444     }
1445 };
1446 
1447 template<typename JP, typename... Ports>
1448 class __TBB_DEPRECATED_IN_VERBOSE_MODE
1449 opencl_node< tuple<Ports...>, JP > : public opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > {
1450     typedef opencl_node < tuple<Ports...>, JP, opencl_info::default_opencl_factory > base_type;
1451 public:
1452     typedef typename base_type::kernel_type kernel_type;
1453 
1454     opencl_node( graph &g, const kernel_type& kernel )
1455         : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1456     {}
1457 
1458     template <typename DeviceSelector>
1459     opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1460         : base_type( g, kernel, d, opencl_info::default_factory() )
1461     {}
1462 };
1463 
1464 template<typename... Ports>
1465 class __TBB_DEPRECATED_IN_VERBOSE_MODE
1466 opencl_node< tuple<Ports...> > : public opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > {
1467     typedef opencl_node < tuple<Ports...>, queueing, opencl_info::default_opencl_factory > base_type;
1468 public:
1469     typedef typename base_type::kernel_type kernel_type;
1470 
1471     opencl_node( graph &g, const kernel_type& kernel )
1472         : base_type( g, kernel, opencl_info::default_device_selector< opencl_info::default_opencl_factory >(), opencl_info::default_factory() )
1473     {}
1474 
1475     template <typename DeviceSelector>
1476     opencl_node( graph &g, const kernel_type& kernel, DeviceSelector d )
1477         : base_type( g, kernel, d, opencl_info::default_factory() )
1478     {}
1479 };
1480 
1481 } // namespace interfaceX
1482 
1483 using interface11::opencl_node;
1484 using interface11::read_only;
1485 using interface11::read_write;
1486 using interface11::write_only;
1487 using interface11::opencl_buffer;
1488 using interface11::opencl_subbuffer;
1489 using interface11::opencl_device;
1490 using interface11::opencl_device_list;
1491 using interface11::opencl_program;
1492 using interface11::opencl_program_type;
1493 using interface11::opencl_async_msg;
1494 using interface11::opencl_factory;
1495 using interface11::opencl_range;
1496 
1497 } // namespace flow
1498 } // namespace tbb
1499 #endif /* __TBB_PREVIEW_OPENCL_NODE */
1500 
1501 #include "internal/_warning_suppress_disable_notice.h"
1502 #undef __TBB_flow_graph_opencl_node_H_include_area
1503 
1504 #endif // __TBB_flow_graph_opencl_node_H