Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-02-21 09:58:13

0001 /*!
0002  *  Copyright (c) 2017 by Contributors
0003  * \file dlpack.h
0004  * \brief The common header of DLPack.
0005  */
0006 #ifndef DLPACK_DLPACK_H_
0007 #define DLPACK_DLPACK_H_
0008 
0009 /**
0010  * \brief Compatibility with C++
0011  */
0012 #ifdef __cplusplus
0013 #define DLPACK_EXTERN_C extern "C"
0014 #else
0015 #define DLPACK_EXTERN_C
0016 #endif
0017 
0018 /*! \brief The current version of dlpack */
0019 #define DLPACK_VERSION 80
0020 
0021 /*! \brief The current ABI version of dlpack */
0022 #define DLPACK_ABI_VERSION 1
0023 
0024 /*! \brief DLPACK_DLL prefix for windows */
0025 #ifdef _WIN32
0026 #ifdef DLPACK_EXPORTS
0027 #define DLPACK_DLL __declspec(dllexport)
0028 #else
0029 #define DLPACK_DLL __declspec(dllimport)
0030 #endif
0031 #else
0032 #define DLPACK_DLL
0033 #endif
0034 
0035 #include <stdint.h>
0036 #include <stddef.h>
0037 
0038 #ifdef __cplusplus
0039 extern "C" {
0040 #endif
0041 /*!
0042  * \brief The device type in DLDevice.
0043  */
0044 #ifdef __cplusplus
0045 typedef enum : int32_t {
0046 #else
0047 typedef enum {
0048 #endif
0049   /*! \brief CPU device */
0050   kDLCPU = 1,
0051   /*! \brief CUDA GPU device */
0052   kDLCUDA = 2,
0053   /*!
0054    * \brief Pinned CUDA CPU memory by cudaMallocHost
0055    */
0056   kDLCUDAHost = 3,
0057   /*! \brief OpenCL devices. */
0058   kDLOpenCL = 4,
0059   /*! \brief Vulkan buffer for next generation graphics. */
0060   kDLVulkan = 7,
0061   /*! \brief Metal for Apple GPU. */
0062   kDLMetal = 8,
0063   /*! \brief Verilog simulator buffer */
0064   kDLVPI = 9,
0065   /*! \brief ROCm GPUs for AMD GPUs */
0066   kDLROCM = 10,
0067   /*!
0068    * \brief Pinned ROCm CPU memory allocated by hipMallocHost
0069    */
0070   kDLROCMHost = 11,
0071   /*!
0072    * \brief Reserved extension device type,
0073    * used for quickly test extension device
0074    * The semantics can differ depending on the implementation.
0075    */
0076   kDLExtDev = 12,
0077   /*!
0078    * \brief CUDA managed/unified memory allocated by cudaMallocManaged
0079    */
0080   kDLCUDAManaged = 13,
0081   /*!
0082    * \brief Unified shared memory allocated on a oneAPI non-partititioned
0083    * device. Call to oneAPI runtime is required to determine the device
0084    * type, the USM allocation type and the sycl context it is bound to.
0085    *
0086    */
0087   kDLOneAPI = 14,
0088   /*! \brief GPU support for next generation WebGPU standard. */
0089   kDLWebGPU = 15,
0090   /*! \brief Qualcomm Hexagon DSP */
0091   kDLHexagon = 16,
0092 } DLDeviceType;
0093 
0094 /*!
0095  * \brief A Device for Tensor and operator.
0096  */
0097 typedef struct {
0098   /*! \brief The device type used in the device. */
0099   DLDeviceType device_type;
0100   /*!
0101    * \brief The device index.
0102    * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
0103    */
0104   int32_t device_id;
0105 } DLDevice;
0106 
0107 /*!
0108  * \brief The type code options DLDataType.
0109  */
0110 typedef enum {
0111   /*! \brief signed integer */
0112   kDLInt = 0U,
0113   /*! \brief unsigned integer */
0114   kDLUInt = 1U,
0115   /*! \brief IEEE floating point */
0116   kDLFloat = 2U,
0117   /*!
0118    * \brief Opaque handle type, reserved for testing purposes.
0119    * Frameworks need to agree on the handle data type for the exchange to be well-defined.
0120    */
0121   kDLOpaqueHandle = 3U,
0122   /*! \brief bfloat16 */
0123   kDLBfloat = 4U,
0124   /*!
0125    * \brief complex number
0126    * (C/C++/Python layout: compact struct per complex number)
0127    */
0128   kDLComplex = 5U,
0129   /*! \brief boolean */
0130   kDLBool = 6U,
0131 } DLDataTypeCode;
0132 
0133 /*!
0134  * \brief The data type the tensor can hold. The data type is assumed to follow the
0135  * native endian-ness. An explicit error message should be raised when attempting to
0136  * export an array with non-native endianness
0137  *
0138  *  Examples
0139  *   - float: type_code = 2, bits = 32, lanes = 1
0140  *   - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
0141  *   - int8: type_code = 0, bits = 8, lanes = 1
0142  *   - std::complex<float>: type_code = 5, bits = 64, lanes = 1
0143  *   - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
0144  */
0145 typedef struct {
0146   /*!
0147    * \brief Type code of base types.
0148    * We keep it uint8_t instead of DLDataTypeCode for minimal memory
0149    * footprint, but the value should be one of DLDataTypeCode enum values.
0150    * */
0151   uint8_t code;
0152   /*!
0153    * \brief Number of bits, common choices are 8, 16, 32.
0154    */
0155   uint8_t bits;
0156   /*! \brief Number of lanes in the type, used for vector types. */
0157   uint16_t lanes;
0158 } DLDataType;
0159 
0160 /*!
0161  * \brief Plain C Tensor object, does not manage memory.
0162  */
0163 typedef struct {
0164   /*!
0165    * \brief The data pointer points to the allocated data. This will be CUDA
0166    * device pointer or cl_mem handle in OpenCL. It may be opaque on some device
0167    * types. This pointer is always aligned to 256 bytes as in CUDA. The
0168    * `byte_offset` field should be used to point to the beginning of the data.
0169    *
0170    * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
0171    * TVM, perhaps others) do not adhere to this 256 byte aligment requirement
0172    * on CPU/CUDA/ROCm, and always use `byte_offset=0`.  This must be fixed
0173    * (after which this note will be updated); at the moment it is recommended
0174    * to not rely on the data pointer being correctly aligned.
0175    *
0176    * For given DLTensor, the size of memory required to store the contents of
0177    * data is calculated as follows:
0178    *
0179    * \code{.c}
0180    * static inline size_t GetDataSize(const DLTensor* t) {
0181    *   size_t size = 1;
0182    *   for (tvm_index_t i = 0; i < t->ndim; ++i) {
0183    *     size *= t->shape[i];
0184    *   }
0185    *   size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
0186    *   return size;
0187    * }
0188    * \endcode
0189    */
0190   void* data;
0191   /*! \brief The device of the tensor */
0192   DLDevice device;
0193   /*! \brief Number of dimensions */
0194   int32_t ndim;
0195   /*! \brief The data type of the pointer*/
0196   DLDataType dtype;
0197   /*! \brief The shape of the tensor */
0198   int64_t* shape;
0199   /*!
0200    * \brief strides of the tensor (in number of elements, not bytes)
0201    *  can be NULL, indicating tensor is compact and row-majored.
0202    */
0203   int64_t* strides;
0204   /*! \brief The offset in bytes to the beginning pointer to data */
0205   uint64_t byte_offset;
0206 } DLTensor;
0207 
0208 /*!
0209  * \brief C Tensor object, manage memory of DLTensor. This data structure is
0210  *  intended to facilitate the borrowing of DLTensor by another framework. It is
0211  *  not meant to transfer the tensor. When the borrowing framework doesn't need
0212  *  the tensor, it should call the deleter to notify the host that the resource
0213  *  is no longer needed.
0214  */
0215 typedef struct DLManagedTensor {
0216   /*! \brief DLTensor which is being memory managed */
0217   DLTensor dl_tensor;
0218   /*! \brief the context of the original host framework of DLManagedTensor in
0219    *   which DLManagedTensor is used in the framework. It can also be NULL.
0220    */
0221   void * manager_ctx;
0222   /*! \brief Destructor signature void (*)(void*) - this should be called
0223    *   to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
0224    *   if there is no way for the caller to provide a reasonable destructor.
0225    *   The destructors deletes the argument self as well.
0226    */
0227   void (*deleter)(struct DLManagedTensor * self);
0228 } DLManagedTensor;
0229 #ifdef __cplusplus
0230 }  // DLPACK_EXTERN_C
0231 #endif
0232 #endif  // DLPACK_DLPACK_H_