include/dlpack/dlpack.h

0001 /*!
0002  *  Copyright (c) 2017 -  by Contributors
0003  * \file dlpack.h
0004  * \brief The common header of DLPack.
0005  */
0006 #ifndef DLPACK_DLPACK_H_
0007 #define DLPACK_DLPACK_H_
0008
0009 /**
0010  * \brief Compatibility with C++
0011  */
0012 #ifdef __cplusplus
0013 #define DLPACK_EXTERN_C extern "C"
0014 #else
0015 #define DLPACK_EXTERN_C
0016 #endif
0017
0018 /*! \brief The current major version of dlpack */
0019 #define DLPACK_MAJOR_VERSION 1
0020
0021 /*! \brief The current minor version of dlpack */
0022 #define DLPACK_MINOR_VERSION 2
0023
0024 /*! \brief DLPACK_DLL prefix for windows */
0025 #ifdef _WIN32
0026 #ifdef DLPACK_EXPORTS
0027 #define DLPACK_DLL __declspec(dllexport)
0028 #else
0029 #define DLPACK_DLL __declspec(dllimport)
0030 #endif
0031 #else
0032 #define DLPACK_DLL
0033 #endif
0034
0035 #include <stdint.h>
0036 #include <stddef.h>
0037
0038 #ifdef __cplusplus
0039 extern "C" {
0040 #endif
0041
0042 /*!
0043  * \brief The DLPack version.
0044  *
0045  * A change in major version indicates that we have changed the
0046  * data layout of the ABI - DLManagedTensorVersioned.
0047  *
0048  * A change in minor version indicates that we have added new
0049  * code, such as a new device type, but the ABI is kept the same.
0050  *
0051  * If an obtained DLPack tensor has a major version that disagrees
0052  * with the version number specified in this header file
0053  * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
0054  * (and it is safe to do so). It is not safe to access any other fields
0055  * as the memory layout will have changed.
0056  *
0057  * In the case of a minor version mismatch, the tensor can be safely used as
0058  * long as the consumer knows how to interpret all fields. Minor version
0059  * updates indicate the addition of enumeration values.
0060  */
0061 typedef struct {
0062   /*! \brief DLPack major version. */
0063   uint32_t major;
0064   /*! \brief DLPack minor version. */
0065   uint32_t minor;
0066 } DLPackVersion;
0067
0068 /*!
0069  * \brief The device type in DLDevice.
0070  */
0071 #ifdef __cplusplus
0072 typedef enum : int32_t {
0073 #else
0074 typedef enum {
0075 #endif
0076   /*! \brief CPU device */
0077   kDLCPU = 1,
0078   /*! \brief CUDA GPU device */
0079   kDLCUDA = 2,
0080   /*!
0081    * \brief Pinned CUDA CPU memory by cudaMallocHost
0082    */
0083   kDLCUDAHost = 3,
0084   /*! \brief OpenCL devices. */
0085   kDLOpenCL = 4,
0086   /*! \brief Vulkan buffer for next generation graphics. */
0087   kDLVulkan = 7,
0088   /*! \brief Metal for Apple GPU. */
0089   kDLMetal = 8,
0090   /*! \brief Verilog simulator buffer */
0091   kDLVPI = 9,
0092   /*! \brief ROCm GPUs for AMD GPUs */
0093   kDLROCM = 10,
0094   /*!
0095    * \brief Pinned ROCm CPU memory allocated by hipMallocHost
0096    */
0097   kDLROCMHost = 11,
0098   /*!
0099    * \brief Reserved extension device type,
0100    * used for quickly test extension device
0101    * The semantics can differ depending on the implementation.
0102    */
0103   kDLExtDev = 12,
0104   /*!
0105    * \brief CUDA managed/unified memory allocated by cudaMallocManaged
0106    */
0107   kDLCUDAManaged = 13,
0108   /*!
0109    * \brief Unified shared memory allocated on a oneAPI non-partititioned
0110    * device. Call to oneAPI runtime is required to determine the device
0111    * type, the USM allocation type and the sycl context it is bound to.
0112    *
0113    */
0114   kDLOneAPI = 14,
0115   /*! \brief GPU support for next generation WebGPU standard. */
0116   kDLWebGPU = 15,
0117   /*! \brief Qualcomm Hexagon DSP */
0118   kDLHexagon = 16,
0119   /*! \brief Microsoft MAIA devices */
0120   kDLMAIA = 17,
0121   /*! \brief AWS Trainium */
0122   kDLTrn = 18,
0123 } DLDeviceType;
0124
0125 /*!
0126  * \brief A Device for Tensor and operator.
0127  */
0128 typedef struct {
0129   /*! \brief The device type used in the device. */
0130   DLDeviceType device_type;
0131   /*!
0132    * \brief The device index.
0133    * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
0134    */
0135   int32_t device_id;
0136 } DLDevice;
0137
0138 /*!
0139  * \brief The type code options DLDataType.
0140  */
0141 typedef enum {
0142   /*! \brief signed integer */
0143   kDLInt = 0U,
0144   /*! \brief unsigned integer */
0145   kDLUInt = 1U,
0146   /*! \brief IEEE floating point */
0147   kDLFloat = 2U,
0148   /*!
0149    * \brief Opaque handle type, reserved for testing purposes.
0150    * Frameworks need to agree on the handle data type for the exchange to be well-defined.
0151    */
0152   kDLOpaqueHandle = 3U,
0153   /*! \brief bfloat16 */
0154   kDLBfloat = 4U,
0155   /*!
0156    * \brief complex number
0157    * (C/C++/Python layout: compact struct per complex number)
0158    */
0159   kDLComplex = 5U,
0160   /*! \brief boolean */
0161   kDLBool = 6U,
0162   /*! \brief FP8 data types */
0163   kDLFloat8_e3m4 = 7U,
0164   kDLFloat8_e4m3 = 8U,
0165   kDLFloat8_e4m3b11fnuz = 9U,
0166   kDLFloat8_e4m3fn = 10U,
0167   kDLFloat8_e4m3fnuz = 11U,
0168   kDLFloat8_e5m2 = 12U,
0169   kDLFloat8_e5m2fnuz = 13U,
0170   kDLFloat8_e8m0fnu = 14U,
0171   /*! \brief FP6 data types
0172    * Setting bits != 6 is currently unspecified, and the producer must ensure it is set
0173    * while the consumer must stop importing if the value is unexpected.
0174    */
0175   kDLFloat6_e2m3fn = 15U,
0176   kDLFloat6_e3m2fn = 16U,
0177   /*! \brief FP4 data types
0178    * Setting bits != 4 is currently unspecified, and the producer must ensure it is set
0179    * while the consumer must stop importing if the value is unexpected.
0180    */
0181   kDLFloat4_e2m1fn = 17U,
0182 } DLDataTypeCode;
0183
0184 /*!
0185  * \brief The data type the tensor can hold. The data type is assumed to follow the
0186  * native endian-ness. An explicit error message should be raised when attempting to
0187  * export an array with non-native endianness
0188  *
0189  *  Examples
0190  *   - float: type_code = 2, bits = 32, lanes = 1
0191  *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
0192  *   - int8: type_code = 0, bits = 8, lanes = 1
0193  *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
0194  *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
0195  *   - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory)
0196  *   - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory)
0197  *   - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory)
0198  *
0199  *  When a sub-byte type is packed, DLPack requires the data to be in little bit-endian, i.e.,
0200  *  for a packed data set D ((D >> (i * bits)) && bit_mask) stores the i-th element.
0201  */
0202 typedef struct {
0203   /*!
0204    * \brief Type code of base types.
0205    * We keep it uint8_t instead of DLDataTypeCode for minimal memory
0206    * footprint, but the value should be one of DLDataTypeCode enum values.
0207    * */
0208   uint8_t code;
0209   /*!
0210    * \brief Number of bits, common choices are 8, 16, 32.
0211    */
0212   uint8_t bits;
0213   /*! \brief Number of lanes in the type, used for vector types. */
0214   uint16_t lanes;
0215 } DLDataType;
0216
0217 /*!
0218  * \brief Plain C Tensor object, does not manage memory.
0219  */
0220 typedef struct {
0221   /*!
0222    * \brief The data pointer points to the allocated data. This will be CUDA
0223    * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
0224    * types. This pointer is always aligned to 256 bytes as in CUDA. The
0225    * `byte_offset` field should be used to point to the beginning of the data.
0226    *
0227    * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow,
0228    * TVM, perhaps others) do not adhere to this 256 byte alignment requirement
0229    * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
0230    * (after which this note will be updated); at the moment it is recommended
0231    * to not rely on the data pointer being correctly aligned.
0232    *
0233    * For given DLTensor, the size of memory required to store the contents of
0234    * data is calculated as follows:
0235    *
0236    * \code{.c}
0237    * static inline size_t GetDataSize(const DLTensor* t) {
0238    *   size_t size = 1;
0239    *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
0240    *     size *= t->shape[i];
0241    *   }
0242    *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
0243    *   return size;
0244    * }
0245    * \endcode
0246    *
0247    * Note that if the tensor is of size zero, then the data pointer should be
0248    * set to `NULL`.
0249    */
0250   void* data;
0251   /*! \brief The device of the tensor */
0252   DLDevice device;
0253   /*! \brief Number of dimensions */
0254   int32_t ndim;
0255   /*! \brief The data type of the pointer*/
0256   DLDataType dtype;
0257   /*!
0258    * \brief The shape of the tensor
0259    *
0260    *  When ndim == 0, shape can be set to NULL.
0261    */
0262   int64_t* shape;
0263   /*!
0264    * \brief strides of the tensor (in number of elements, not bytes),
0265    *  can not be NULL if ndim != 0, must points to
0266    *  an array of ndim elements that specifies the strides,
0267    *  so consumer can always rely on strides[dim] being valid for 0 <= dim < ndim.
0268    *
0269    *  When ndim == 0, strides can be set to NULL.
0270    *
0271    *  \note Before DLPack v1.2, strides can be NULL to indicate contiguous data.
0272    *        This is not allowed in DLPack v1.2 and later. The rationale
0273    *        is to simplify the consumer handling.
0274    */
0275   int64_t* strides;
0276   /*! \brief The offset in bytes to the beginning pointer to data */
0277   uint64_t byte_offset;
0278 } DLTensor;
0279
0280 /*!
0281  * \brief C Tensor object, manage memory of DLTensor. This data structure is
0282  *  intended to facilitate the borrowing of DLTensor by another framework. It is
0283  *  not meant to transfer the tensor. When the borrowing framework doesn't need
0284  *  the tensor, it should call the deleter to notify the host that the resource
0285  *  is no longer needed.
0286  *
0287  * \note This data structure is used as Legacy DLManagedTensor
0288  *       in DLPack exchange and is deprecated after DLPack v0.8
0289  *       Use DLManagedTensorVersioned instead.
0290  *       This data structure may get renamed or deleted in future versions.
0291  *
0292  * \sa DLManagedTensorVersioned
0293  */
0294 typedef struct DLManagedTensor {
0295   /*! \brief DLTensor which is being memory managed */
0296   DLTensor dl_tensor;
0297   /*! \brief the context of the original host framework of DLManagedTensor in
0298    *   which DLManagedTensor is used in the framework. It can also be NULL.
0299    */
0300   void * manager_ctx;
0301   /*!
0302    * \brief Destructor - this should be called
0303    * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
0304    * NULL if there is no way for the caller to provide a reasonable destructor.
0305    * The destructor deletes the argument self as well.
0306    */
0307   void (*deleter)(struct DLManagedTensor * self);
0308 } DLManagedTensor;
0309
0310 // bit masks used in the DLManagedTensorVersioned
0311
0312 /*! \brief bit mask to indicate that the tensor is read only. */
0313 #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
0314
0315 /*!
0316  * \brief bit mask to indicate that the tensor is a copy made by the producer.
0317  *
0318  * If set, the tensor is considered solely owned throughout its lifetime by the
0319  * consumer, until the producer-provided deleter is invoked.
0320  */
0321 #define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL)
0322
0323 /*!
0324  * \brief bit mask to indicate that whether a sub-byte type is packed or padded.
0325  *
0326  * The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can
0327  * be set by the producer to signal that a tensor of sub-byte type is padded.
0328  */
0329 #define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL)
0330
0331 /*!
0332  * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
0333  *
0334  * This data structure is intended to facilitate the borrowing of DLTensor by
0335  * another framework. It is not meant to transfer the tensor. When the borrowing
0336  * framework doesn't need the tensor, it should call the deleter to notify the
0337  * host that the resource is no longer needed.
0338  *
0339  * \note This is the current standard DLPack exchange data structure.
0340  */
0341 typedef struct DLManagedTensorVersioned {
0342   /*!
0343    * \brief The API and ABI version of the current managed Tensor
0344    */
0345   DLPackVersion version;
0346   /*!
0347    * \brief the context of the original host framework.
0348    *
0349    * Stores DLManagedTensorVersioned is used in the
0350    * framework. It can also be NULL.
0351    */
0352   void *manager_ctx;
0353   /*!
0354    * \brief Destructor.
0355    *
0356    * This should be called to destruct manager_ctx which holds the DLManagedTensorVersioned.
0357    * It can be NULL if there is no way for the caller to provide a reasonable
0358    * destructor. The destructor deletes the argument self as well.
0359    */
0360   void (*deleter)(struct DLManagedTensorVersioned *self);
0361   /*!
0362    * \brief Additional bitmask flags information about the tensor.
0363    *
0364    * By default the flags should be set to 0.
0365    *
0366    * \note Future ABI changes should keep everything until this field
0367    *       stable, to ensure that deleter can be correctly called.
0368    *
0369    * \sa DLPACK_FLAG_BITMASK_READ_ONLY
0370    * \sa DLPACK_FLAG_BITMASK_IS_COPIED
0371    */
0372   uint64_t flags;
0373   /*! \brief DLTensor which is being memory managed */
0374   DLTensor dl_tensor;
0375 } DLManagedTensorVersioned;
0376
0377 //----------------------------------------------------------------------
0378 // DLPack `__c_dlpack_exchange_api__` fast exchange protocol definitions
0379 //----------------------------------------------------------------------
0380 /*!
0381  * \brief Request a producer library to create a new tensor.
0382  *
0383  * Create a new `DLManagedTensorVersioned` within the context of the producer
0384  * library. The allocation is defined via the prototype DLTensor.
0385  *
0386  * This function is exposed by the framework through the DLPackExchangeAPI.
0387  *
0388  * \param prototype The prototype DLTensor. Only the dtype, ndim, shape,
0389  *        and device fields are used.
0390  * \param out The output DLManagedTensorVersioned.
0391  * \param error_ctx Context for `SetError`.
0392  * \param SetError The function to set the error.
0393  * \return The owning DLManagedTensorVersioned* or NULL on failure.
0394  *         SetError is called exactly when NULL is returned (the implementor
0395  *         must ensure this).
0396  * \note - As a C function, must not thrown C++ exceptions.
0397  *       - Error propagation via SetError to avoid any direct need
0398  *         of Python API. Due to this `SetError` may have to ensure the GIL is
0399  *         held since it will presumably set a Python error.
0400  *
0401  * \sa DLPackExchangeAPI
0402  */
0403 typedef int (*DLPackManagedTensorAllocator)(                                         //
0404   DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx,              //
0405   void (*SetError)(void* error_ctx, const char* kind, const char* message)           //
0406 );
0407
0408 /*!
0409  * \brief Exports a PyObject* Tensor/NDArray to a DLManagedTensorVersioned.
0410  *
0411  * This function does not perform any stream synchronization. The consumer should query
0412  * DLPackCurrentWorkStream to get the current work stream and launch kernels on it.
0413  *
0414  * This function is exposed by the framework through the DLPackExchangeAPI.
0415  *
0416  * \param py_object The Python object to convert. Must have the same type
0417  *        as the one the `DLPackExchangeAPI` was discovered from.
0418  * \return The owning DLManagedTensorVersioned* or NULL on failure with a
0419  *         Python exception set. If the data cannot be described using DLPack
0420  *         this should be a BufferError if possible.
0421  * \note - As a C function, must not thrown C++ exceptions.
0422  *
0423  * \sa DLPackExchangeAPI, DLPackCurrentWorkStream
0424  */
0425 typedef int (*DLPackManagedTensorFromPyObjectNoSync)(                 //
0426   void* py_object,                                                    //
0427   DLManagedTensorVersioned** out                                      //
0428 );
0429
0430 /*!
0431  * \brief Exports a PyObject* Tensor/NDArray to a provided DLTensor.
0432  *
0433  * This function provides a faster interface for temporary, non-owning, exchange.
0434  * The producer (implementor) still owns the memory of data, strides, shape.
0435  * The liveness of the DLTensor and the data it views is only guaranteed until
0436  * control is returned.
0437  *
0438  * This function currently assumes that the producer (implementor) can fill
0439  * in the DLTensor shape and strides without the need for temporary allocations.
0440  *
0441  * This function does not perform any stream synchronization. The consumer should query
0442  * DLPackCurrentWorkStream to get the current work stream and launch kernels on it.
0443  *
0444  * This function is exposed by the framework through the DLPackExchangeAPI.
0445  *
0446  * \param py_object The Python object to convert. Must have the same type
0447  *        as the one the `DLPackExchangeAPI` was discovered from.
0448  * \param out The output DLTensor, whose space is pre-allocated on stack.
0449  * \return 0 on success, -1 on failure with a Python exception set.
0450  * \note - As a C function, must not thrown C++ exceptions.
0451  *
0452  * \sa DLPackExchangeAPI, DLPackCurrentWorkStream
0453  */
0454 typedef int (*DLPackDLTensorFromPyObjectNoSync)(                      //
0455   void* py_object,                                                    //
0456   DLTensor* out                                                       //
0457 );
0458
0459 /*!
0460  * \brief Obtain the current work stream of a device.
0461  *
0462  * Obtain the current work stream of a device from the producer framework.
0463  * For example, it should map to torch.cuda.current_stream in PyTorch.
0464  *
0465  * When device_type is kDLCPU, the consumer do not have to query the stream
0466  * and the producer can simply return NULL when queried.
0467  * The consumer do not have to do anything on stream sync or setting.
0468  * So CPU only framework can just provide a dummy implementation that
0469  * always set out_current_stream[0] to NULL.
0470  *
0471  * \param device_type The device type.
0472  * \param device_id The device id.
0473  * \param out_current_stream The output current work stream.
0474  *
0475  * \return 0 on success, -1 on failure with a Python exception set.
0476  * \note - As a C function, must not thrown C++ exceptions.
0477  *
0478  * \sa DLPackExchangeAPI
0479  */
0480 typedef int (*DLPackCurrentWorkStream)(                         //
0481   DLDeviceType device_type,                                     //
0482   int32_t device_id,                                            //
0483   void** out_current_stream                                     //
0484 );
0485
0486 /*!
0487  * \brief Imports a DLManagedTensorVersioned to a PyObject* Tensor/NDArray.
0488  *
0489  * Convert an owning DLManagedTensorVersioned* to the Python tensor of the
0490  * producer (implementor) library with the correct type.
0491  *
0492  * This function does not perform any stream synchronization.
0493  *
0494  * This function is exposed by the framework through the DLPackExchangeAPI.
0495  *
0496  * \param tensor The DLManagedTensorVersioned to convert the ownership of the
0497  *        tensor is stolen.
0498  * \param out_py_object The output Python object.
0499  * \return 0 on success, -1 on failure with a Python exception set.
0500  *
0501  * \sa DLPackExchangeAPI
0502  */
0503 typedef int (*DLPackManagedTensorToPyObjectNoSync)(                //
0504   DLManagedTensorVersioned* tensor,                                //
0505   void** out_py_object                                             //
0506 );
0507
0508 /*!
0509  * \brief DLPackExchangeAPI stable header.
0510  * \sa DLPackExchangeAPI
0511  */
0512 typedef struct DLPackExchangeAPIHeader {
0513   /*!
0514    * \brief The provided DLPack version the consumer must check major version
0515    *        compatibility before using this struct.
0516    */
0517   DLPackVersion version;
0518   /*!
0519    * \brief Optional pointer to an older DLPackExchangeAPI in the chain.
0520    *
0521    * It must be NULL if the framework does not support older versions.
0522    * If the current major version is larger than the one supported by the
0523    * consumer, the consumer may walk this to find an earlier supported version.
0524    *
0525    * \sa DLPackExchangeAPI
0526    */
0527   struct DLPackExchangeAPIHeader* prev_api;
0528 } DLPackExchangeAPIHeader;
0529
0530 /*!
0531  * \brief Framework-specific function pointers table for DLPack exchange.
0532  *
0533  * Additionally to `__dlpack__()` we define a C function table sharable by
0534  * Python implementations via `__c_dlpack_exchange_api__`.
0535  * This attribute must be set on the type as a Python integer compatible
0536  * with `PyLong_FromVoidPtr`/`PyLong_AsVoidPtr`.
0537  *
0538  * A consumer library may use a pattern such as:
0539  *
0540  * \code
0541  *
0542  * PyObject *api_obj = type(tensor_obj).__c_dlpack_exchange_api__;  // as C-code
0543  * MyDLPackExchangeAPI *api = PyLong_AsVoidPtr(api_obj);
0544  * if (api == NULL && PyErr_Occurred()) { goto handle_error; }
0545  *
0546  * \endcode
0547  *
0548  * Note that this must be defined on the type. The consumer should look up the
0549  * attribute on the type and may cache the result for each unique type.
0550  *
0551  * The precise API table is given by:
0552  * \code
0553  * struct MyDLPackExchangeAPI : public DLPackExchangeAPI {
0554  *   MyDLPackExchangeAPI() {
0555  *     header.version.major = DLPACK_MAJOR_VERSION;
0556  *     header.version.minor = DLPACK_MINOR_VERSION;
0557  *     header.prev_version_api = nullptr;
0558  *
0559  *     managed_tensor_allocator = MyDLPackManagedTensorAllocator;
0560  *     managed_tensor_from_py_object_no_sync = MyDLPackManagedTensorFromPyObjectNoSync;
0561  *     managed_tensor_to_py_object_no_sync = MyDLPackManagedTensorToPyObjectNoSync;
0562  *     dltensor_from_py_object_no_sync = MyDLPackDLTensorFromPyObjectNoSync;
0563  *     current_work_stream = MyDLPackCurrentWorkStream;
0564  *  }
0565  *
0566  *  static const DLPackExchangeAPI* Global() {
0567  *     static MyDLPackExchangeAPI inst;
0568  *     return &inst;
0569  *  }
0570  * };
0571  * \endcode
0572  *
0573  * Guidelines for leveraging DLPackExchangeAPI:
0574  *
0575  * There are generally two kinds of consumer needs for DLPack exchange:
0576  * - N0: library support, where consumer.kernel(x, y, z) would like to run a kernel
0577  *       with the data from x, y, z. The consumer is also expected to run the kernel with the same
0578  *       stream context as the producer. For example, when x, y, z is torch.Tensor,
0579  *       consumer should query exchange_api->current_work_stream to get the
0580  *       current stream and launch the kernel with the same stream.
0581  *       This setup is necessary for no synchronization in kernel launch and maximum compatibility
0582  *       with CUDA graph capture in the producer.
0583  *       This is the desirable behavior for library extension support for frameworks like PyTorch.
0584  * - N1: data ingestion and retention
0585  *
0586  * Note that obj.__dlpack__() API should provide useful ways for N1.
0587  * The primary focus of the current DLPackExchangeAPI is to enable faster exchange N0
0588  * with the support of the function pointer current_work_stream.
0589  *
0590  * Array/Tensor libraries should statically create and initialize this structure
0591  * then return a pointer to DLPackExchangeAPI as an int value in Tensor/Array.
0592  * The DLPackExchangeAPI* must stay alive throughout the lifetime of the process.
0593  *
0594  * One simple way to do so is to create a static instance of DLPackExchangeAPI
0595  * within the framework and return a pointer to it. The following code
0596  * shows an example to do so in C++. It should also be reasonably easy
0597  * to do so in other languages.
0598  */
0599 typedef struct DLPackExchangeAPI {
0600   /*!
0601    * \brief The header that remains stable across versions.
0602    */
0603   DLPackExchangeAPIHeader header;
0604   /*!
0605    * \brief Producer function pointer for DLPackManagedTensorAllocator
0606    *        This function must not be NULL.
0607    * \sa DLPackManagedTensorAllocator
0608    */
0609   DLPackManagedTensorAllocator managed_tensor_allocator;
0610   /*!
0611    * \brief Producer function pointer for DLPackManagedTensorFromPyObject
0612    *        This function must be not NULL.
0613    * \sa DLPackManagedTensorFromPyObject
0614    */
0615   DLPackManagedTensorFromPyObjectNoSync managed_tensor_from_py_object_no_sync;
0616   /*!
0617    * \brief Producer function pointer for DLPackManagedTensorToPyObject
0618    *        This function must be not NULL.
0619    * \sa DLPackManagedTensorToPyObject
0620    */
0621   DLPackManagedTensorToPyObjectNoSync managed_tensor_to_py_object_no_sync;
0622   /*!
0623    * \brief Producer function pointer for DLPackDLTensorFromPyObject
0624    *        This function can be NULL when the producer does not support this function.
0625    * \sa DLPackDLTensorFromPyObjectNoSync
0626    */
0627   DLPackDLTensorFromPyObjectNoSync dltensor_from_py_object_no_sync;
0628   /*!
0629    * \brief Producer function pointer for DLPackCurrentWorkStream
0630    *        This function must be not NULL.
0631    * \sa DLPackCurrentWorkStream
0632    */
0633   DLPackCurrentWorkStream current_work_stream;
0634 } DLPackExchangeAPI;
0635
0636 #ifdef __cplusplus
0637 }  // DLPACK_EXTERN_C
0638 #endif
0639 #endif  // DLPACK_DLPACK_H_