arrow/c/dlpack_abi.h

0001 // Taken from:
0002 // https://github.com/dmlc/dlpack/blob/ca4d00ad3e2e0f410eeab3264d21b8a39397f362/include/dlpack/dlpack.h
0003 /*!
0004  *  Copyright (c) 2017 by Contributors
0005  * \file dlpack.h
0006  * \brief The common header of DLPack.
0007  */
0008 #ifndef DLPACK_DLPACK_H_
0009 #define DLPACK_DLPACK_H_
0010
0011 /**
0012  * \brief Compatibility with C++
0013  */
0014 #ifdef __cplusplus
0015 #  define DLPACK_EXTERN_C extern "C"
0016 #else
0017 #  define DLPACK_EXTERN_C
0018 #endif
0019
0020 /*! \brief The current major version of dlpack */
0021 #define DLPACK_MAJOR_VERSION 1
0022
0023 /*! \brief The current minor version of dlpack */
0024 #define DLPACK_MINOR_VERSION 0
0025
0026 /*! \brief DLPACK_DLL prefix for windows */
0027 #ifdef _WIN32
0028 #  ifdef DLPACK_EXPORTS
0029 #    define DLPACK_DLL __declspec(dllexport)
0030 #  else
0031 #    define DLPACK_DLL __declspec(dllimport)
0032 #  endif
0033 #else
0034 #  define DLPACK_DLL
0035 #endif
0036
0037 #include <stddef.h>
0038 #include <stdint.h>
0039
0040 #ifdef __cplusplus
0041 extern "C" {
0042 #endif
0043
0044 /*!
0045  * \brief The DLPack version.
0046  *
0047  * A change in major version indicates that we have changed the
0048  * data layout of the ABI - DLManagedTensorVersioned.
0049  *
0050  * A change in minor version indicates that we have added new
0051  * code, such as a new device type, but the ABI is kept the same.
0052  *
0053  * If an obtained DLPack tensor has a major version that disagrees
0054  * with the version number specified in this header file
0055  * (i.e. major != DLPACK_MAJOR_VERSION), the consumer must call the deleter
0056  * (and it is safe to do so). It is not safe to access any other fields
0057  * as the memory layout will have changed.
0058  *
0059  * In the case of a minor version mismatch, the tensor can be safely used as
0060  * long as the consumer knows how to interpret all fields. Minor version
0061  * updates indicate the addition of enumeration values.
0062  */
0063 typedef struct {
0064   /*! \brief DLPack major version. */
0065   uint32_t major;
0066   /*! \brief DLPack minor version. */
0067   uint32_t minor;
0068 } DLPackVersion;
0069
0070 /*!
0071  * \brief The device type in DLDevice.
0072  */
0073 #ifdef __cplusplus
0074 typedef enum : int32_t {
0075 #else
0076 typedef enum {
0077 #endif
0078   /*! \brief CPU device */
0079   kDLCPU = 1,
0080   /*! \brief CUDA GPU device */
0081   kDLCUDA = 2,
0082   /*!
0083    * \brief Pinned CUDA CPU memory by cudaMallocHost
0084    */
0085   kDLCUDAHost = 3,
0086   /*! \brief OpenCL devices. */
0087   kDLOpenCL = 4,
0088   /*! \brief Vulkan buffer for next generation graphics. */
0089   kDLVulkan = 7,
0090   /*! \brief Metal for Apple GPU. */
0091   kDLMetal = 8,
0092   /*! \brief Verilog simulator buffer */
0093   kDLVPI = 9,
0094   /*! \brief ROCm GPUs for AMD GPUs */
0095   kDLROCM = 10,
0096   /*!
0097    * \brief Pinned ROCm CPU memory allocated by hipMallocHost
0098    */
0099   kDLROCMHost = 11,
0100   /*!
0101    * \brief Reserved extension device type,
0102    * used for quickly test extension device
0103    * The semantics can differ depending on the implementation.
0104    */
0105   kDLExtDev = 12,
0106   /*!
0107    * \brief CUDA managed/unified memory allocated by cudaMallocManaged
0108    */
0109   kDLCUDAManaged = 13,
0110   /*!
0111    * \brief Unified shared memory allocated on a oneAPI non-partititioned
0112    * device. Call to oneAPI runtime is required to determine the device
0113    * type, the USM allocation type and the sycl context it is bound to.
0114    *
0115    */
0116   kDLOneAPI = 14,
0117   /*! \brief GPU support for next generation WebGPU standard. */
0118   kDLWebGPU = 15,
0119   /*! \brief Qualcomm Hexagon DSP */
0120   kDLHexagon = 16,
0121 } DLDeviceType;
0122
0123 /*!
0124  * \brief A Device for Tensor and operator.
0125  */
0126 typedef struct {
0127   /*! \brief The device type used in the device. */
0128   DLDeviceType device_type;
0129   /*!
0130    * \brief The device index.
0131    * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
0132    */
0133   int32_t device_id;
0134 } DLDevice;
0135
0136 /*!
0137  * \brief The type code options DLDataType.
0138  */
0139 typedef enum {
0140   /*! \brief signed integer */
0141   kDLInt = 0U,
0142   /*! \brief unsigned integer */
0143   kDLUInt = 1U,
0144   /*! \brief IEEE floating point */
0145   kDLFloat = 2U,
0146   /*!
0147    * \brief Opaque handle type, reserved for testing purposes.
0148    * Frameworks need to agree on the handle data type for the exchange to be well-defined.
0149    */
0150   kDLOpaqueHandle = 3U,
0151   /*! \brief bfloat16 */
0152   kDLBfloat = 4U,
0153   /*!
0154    * \brief complex number
0155    * (C/C++/Python layout: compact struct per complex number)
0156    */
0157   kDLComplex = 5U,
0158   /*! \brief boolean */
0159   kDLBool = 6U,
0160 } DLDataTypeCode;
0161
0162 /*!
0163  * \brief The data type the tensor can hold. The data type is assumed to follow the
0164  * native endian-ness. An explicit error message should be raised when attempting to
0165  * export an array with non-native endianness
0166  *
0167  *  Examples
0168  *   - float: type_code = 2, bits = 32, lanes = 1
0169  *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
0170  *   - int8: type_code = 0, bits = 8, lanes = 1
0171  *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
0172  *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention,
0173  * the underlying storage size of bool is 8 bits)
0174  */
0175 typedef struct {
0176   /*!
0177    * \brief Type code of base types.
0178    * We keep it uint8_t instead of DLDataTypeCode for minimal memory
0179    * footprint, but the value should be one of DLDataTypeCode enum values.
0180    * */
0181   uint8_t code;
0182   /*!
0183    * \brief Number of bits, common choices are 8, 16, 32.
0184    */
0185   uint8_t bits;
0186   /*! \brief Number of lanes in the type, used for vector types. */
0187   uint16_t lanes;
0188 } DLDataType;
0189
0190 /*!
0191  * \brief Plain C Tensor object, does not manage memory.
0192  */
0193 typedef struct {
0194   /*!
0195    * \brief The data pointer points to the allocated data. This will be CUDA
0196    * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
0197    * types. This pointer is always aligned to 256 bytes as in CUDA. The
0198    * `byte_offset` field should be used to point to the beginning of the data.
0199    *
0200    * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
0201    * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
0202    * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
0203    * (after which this note will be updated); at the moment it is recommended
0204    * to not rely on the data pointer being correctly aligned.
0205    *
0206    * For given DLTensor, the size of memory required to store the contents of
0207    * data is calculated as follows:
0208    *
0209    * \code{.c}
0210    * static inline size_t GetDataSize(const DLTensor* t) {
0211    *   size_t size = 1;
0212    *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
0213    *     size *= t->shape[i];
0214    *   }
0215    *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
0216    *   return size;
0217    * }
0218    * \endcode
0219    */
0220   void* data;
0221   /*! \brief The device of the tensor */
0222   DLDevice device;
0223   /*! \brief Number of dimensions */
0224   int32_t ndim;
0225   /*! \brief The data type of the pointer*/
0226   DLDataType dtype;
0227   /*! \brief The shape of the tensor */
0228   int64_t* shape;
0229   /*!
0230    * \brief strides of the tensor (in number of elements, not bytes)
0231    *  can be NULL, indicating tensor is compact and row-majored.
0232    */
0233   int64_t* strides;
0234   /*! \brief The offset in bytes to the beginning pointer to data */
0235   uint64_t byte_offset;
0236 } DLTensor;
0237
0238 /*!
0239  * \brief C Tensor object, manage memory of DLTensor. This data structure is
0240  *  intended to facilitate the borrowing of DLTensor by another framework. It is
0241  *  not meant to transfer the tensor. When the borrowing framework doesn't need
0242  *  the tensor, it should call the deleter to notify the host that the resource
0243  *  is no longer needed.
0244  *
0245  * \note This data structure is used as Legacy DLManagedTensor
0246  *       in DLPack exchange and is deprecated after DLPack v0.8
0247  *       Use DLManagedTensorVersioned instead.
0248  *       This data structure may get renamed or deleted in future versions.
0249  *
0250  * \sa DLManagedTensorVersioned
0251  */
0252 typedef struct DLManagedTensor {
0253   /*! \brief DLTensor which is being memory managed */
0254   DLTensor dl_tensor;
0255   /*! \brief the context of the original host framework of DLManagedTensor in
0256    *   which DLManagedTensor is used in the framework. It can also be NULL.
0257    */
0258   void* manager_ctx;
0259   /*!
0260    * \brief Destructor - this should be called
0261    * to destruct the manager_ctx  which backs the DLManagedTensor. It can be
0262    * NULL if there is no way for the caller to provide a reasonable destructor.
0263    * The destructors deletes the argument self as well.
0264    */
0265   void (*deleter)(struct DLManagedTensor* self);
0266 } DLManagedTensor;
0267
0268 // bit masks used in in the DLManagedTensorVersioned
0269
0270 /*! \brief bit mask to indicate that the tensor is read only. */
0271 #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL)
0272
0273 /*!
0274  * \brief A versioned and managed C Tensor object, manage memory of DLTensor.
0275  *
0276  * This data structure is intended to facilitate the borrowing of DLTensor by
0277  * another framework. It is not meant to transfer the tensor. When the borrowing
0278  * framework doesn't need the tensor, it should call the deleter to notify the
0279  * host that the resource is no longer needed.
0280  *
0281  * \note This is the current standard DLPack exchange data structure.
0282  */
0283 struct DLManagedTensorVersioned {
0284   /*!
0285    * \brief The API and ABI version of the current managed Tensor
0286    */
0287   DLPackVersion version;
0288   /*!
0289    * \brief the context of the original host framework.
0290    *
0291    * Stores DLManagedTensorVersioned is used in the
0292    * framework. It can also be NULL.
0293    */
0294   void* manager_ctx;
0295   /*!
0296    * \brief Destructor.
0297    *
0298    * This should be called to destruct manager_ctx which holds the
0299    * DLManagedTensorVersioned. It can be NULL if there is no way for the caller to provide
0300    * a reasonable destructor. The destructors deletes the argument self as well.
0301    */
0302   void (*deleter)(struct DLManagedTensorVersioned* self);
0303   /*!
0304    * \brief Additional bitmask flags information about the tensor.
0305    *
0306    * By default the flags should be set to 0.
0307    *
0308    * \note Future ABI changes should keep everything until this field
0309    *       stable, to ensure that deleter can be correctly called.
0310    *
0311    * \sa DLPACK_FLAG_BITMASK_READ_ONLY
0312    */
0313   uint64_t flags;
0314   /*! \brief DLTensor which is being memory managed */
0315   DLTensor dl_tensor;
0316 };
0317
0318 #ifdef __cplusplus
0319 }  // DLPACK_EXTERN_C
0320 #endif
0321 #endif  // DLPACK_DLPACK_H_