![]() |
|
|||
File indexing completed on 2025-02-21 09:58:13
0001 /*! 0002 * Copyright (c) 2017 by Contributors 0003 * \file dlpack.h 0004 * \brief The common header of DLPack. 0005 */ 0006 #ifndef DLPACK_DLPACK_H_ 0007 #define DLPACK_DLPACK_H_ 0008 0009 /** 0010 * \brief Compatibility with C++ 0011 */ 0012 #ifdef __cplusplus 0013 #define DLPACK_EXTERN_C extern "C" 0014 #else 0015 #define DLPACK_EXTERN_C 0016 #endif 0017 0018 /*! \brief The current version of dlpack */ 0019 #define DLPACK_VERSION 80 0020 0021 /*! \brief The current ABI version of dlpack */ 0022 #define DLPACK_ABI_VERSION 1 0023 0024 /*! \brief DLPACK_DLL prefix for windows */ 0025 #ifdef _WIN32 0026 #ifdef DLPACK_EXPORTS 0027 #define DLPACK_DLL __declspec(dllexport) 0028 #else 0029 #define DLPACK_DLL __declspec(dllimport) 0030 #endif 0031 #else 0032 #define DLPACK_DLL 0033 #endif 0034 0035 #include <stdint.h> 0036 #include <stddef.h> 0037 0038 #ifdef __cplusplus 0039 extern "C" { 0040 #endif 0041 /*! 0042 * \brief The device type in DLDevice. 0043 */ 0044 #ifdef __cplusplus 0045 typedef enum : int32_t { 0046 #else 0047 typedef enum { 0048 #endif 0049 /*! \brief CPU device */ 0050 kDLCPU = 1, 0051 /*! \brief CUDA GPU device */ 0052 kDLCUDA = 2, 0053 /*! 0054 * \brief Pinned CUDA CPU memory by cudaMallocHost 0055 */ 0056 kDLCUDAHost = 3, 0057 /*! \brief OpenCL devices. */ 0058 kDLOpenCL = 4, 0059 /*! \brief Vulkan buffer for next generation graphics. */ 0060 kDLVulkan = 7, 0061 /*! \brief Metal for Apple GPU. */ 0062 kDLMetal = 8, 0063 /*! \brief Verilog simulator buffer */ 0064 kDLVPI = 9, 0065 /*! \brief ROCm GPUs for AMD GPUs */ 0066 kDLROCM = 10, 0067 /*! 0068 * \brief Pinned ROCm CPU memory allocated by hipMallocHost 0069 */ 0070 kDLROCMHost = 11, 0071 /*! 0072 * \brief Reserved extension device type, 0073 * used for quickly test extension device 0074 * The semantics can differ depending on the implementation. 0075 */ 0076 kDLExtDev = 12, 0077 /*! 0078 * \brief CUDA managed/unified memory allocated by cudaMallocManaged 0079 */ 0080 kDLCUDAManaged = 13, 0081 /*! 0082 * \brief Unified shared memory allocated on a oneAPI non-partititioned 0083 * device. Call to oneAPI runtime is required to determine the device 0084 * type, the USM allocation type and the sycl context it is bound to. 0085 * 0086 */ 0087 kDLOneAPI = 14, 0088 /*! \brief GPU support for next generation WebGPU standard. */ 0089 kDLWebGPU = 15, 0090 /*! \brief Qualcomm Hexagon DSP */ 0091 kDLHexagon = 16, 0092 } DLDeviceType; 0093 0094 /*! 0095 * \brief A Device for Tensor and operator. 0096 */ 0097 typedef struct { 0098 /*! \brief The device type used in the device. */ 0099 DLDeviceType device_type; 0100 /*! 0101 * \brief The device index. 0102 * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. 0103 */ 0104 int32_t device_id; 0105 } DLDevice; 0106 0107 /*! 0108 * \brief The type code options DLDataType. 0109 */ 0110 typedef enum { 0111 /*! \brief signed integer */ 0112 kDLInt = 0U, 0113 /*! \brief unsigned integer */ 0114 kDLUInt = 1U, 0115 /*! \brief IEEE floating point */ 0116 kDLFloat = 2U, 0117 /*! 0118 * \brief Opaque handle type, reserved for testing purposes. 0119 * Frameworks need to agree on the handle data type for the exchange to be well-defined. 0120 */ 0121 kDLOpaqueHandle = 3U, 0122 /*! \brief bfloat16 */ 0123 kDLBfloat = 4U, 0124 /*! 0125 * \brief complex number 0126 * (C/C++/Python layout: compact struct per complex number) 0127 */ 0128 kDLComplex = 5U, 0129 /*! \brief boolean */ 0130 kDLBool = 6U, 0131 } DLDataTypeCode; 0132 0133 /*! 0134 * \brief The data type the tensor can hold. The data type is assumed to follow the 0135 * native endian-ness. An explicit error message should be raised when attempting to 0136 * export an array with non-native endianness 0137 * 0138 * Examples 0139 * - float: type_code = 2, bits = 32, lanes = 1 0140 * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4 0141 * - int8: type_code = 0, bits = 8, lanes = 1 0142 * - std::complex<float>: type_code = 5, bits = 64, lanes = 1 0143 * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits) 0144 */ 0145 typedef struct { 0146 /*! 0147 * \brief Type code of base types. 0148 * We keep it uint8_t instead of DLDataTypeCode for minimal memory 0149 * footprint, but the value should be one of DLDataTypeCode enum values. 0150 * */ 0151 uint8_t code; 0152 /*! 0153 * \brief Number of bits, common choices are 8, 16, 32. 0154 */ 0155 uint8_t bits; 0156 /*! \brief Number of lanes in the type, used for vector types. */ 0157 uint16_t lanes; 0158 } DLDataType; 0159 0160 /*! 0161 * \brief Plain C Tensor object, does not manage memory. 0162 */ 0163 typedef struct { 0164 /*! 0165 * \brief The data pointer points to the allocated data. This will be CUDA 0166 * device pointer or cl_mem handle in OpenCL. It may be opaque on some device 0167 * types. This pointer is always aligned to 256 bytes as in CUDA. The 0168 * `byte_offset` field should be used to point to the beginning of the data. 0169 * 0170 * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, 0171 * TVM, perhaps others) do not adhere to this 256 byte aligment requirement 0172 * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed 0173 * (after which this note will be updated); at the moment it is recommended 0174 * to not rely on the data pointer being correctly aligned. 0175 * 0176 * For given DLTensor, the size of memory required to store the contents of 0177 * data is calculated as follows: 0178 * 0179 * \code{.c} 0180 * static inline size_t GetDataSize(const DLTensor* t) { 0181 * size_t size = 1; 0182 * for (tvm_index_t i = 0; i < t->ndim; ++i) { 0183 * size *= t->shape[i]; 0184 * } 0185 * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; 0186 * return size; 0187 * } 0188 * \endcode 0189 */ 0190 void* data; 0191 /*! \brief The device of the tensor */ 0192 DLDevice device; 0193 /*! \brief Number of dimensions */ 0194 int32_t ndim; 0195 /*! \brief The data type of the pointer*/ 0196 DLDataType dtype; 0197 /*! \brief The shape of the tensor */ 0198 int64_t* shape; 0199 /*! 0200 * \brief strides of the tensor (in number of elements, not bytes) 0201 * can be NULL, indicating tensor is compact and row-majored. 0202 */ 0203 int64_t* strides; 0204 /*! \brief The offset in bytes to the beginning pointer to data */ 0205 uint64_t byte_offset; 0206 } DLTensor; 0207 0208 /*! 0209 * \brief C Tensor object, manage memory of DLTensor. This data structure is 0210 * intended to facilitate the borrowing of DLTensor by another framework. It is 0211 * not meant to transfer the tensor. When the borrowing framework doesn't need 0212 * the tensor, it should call the deleter to notify the host that the resource 0213 * is no longer needed. 0214 */ 0215 typedef struct DLManagedTensor { 0216 /*! \brief DLTensor which is being memory managed */ 0217 DLTensor dl_tensor; 0218 /*! \brief the context of the original host framework of DLManagedTensor in 0219 * which DLManagedTensor is used in the framework. It can also be NULL. 0220 */ 0221 void * manager_ctx; 0222 /*! \brief Destructor signature void (*)(void*) - this should be called 0223 * to destruct manager_ctx which holds the DLManagedTensor. It can be NULL 0224 * if there is no way for the caller to provide a reasonable destructor. 0225 * The destructors deletes the argument self as well. 0226 */ 0227 void (*deleter)(struct DLManagedTensor * self); 0228 } DLManagedTensor; 0229 #ifdef __cplusplus 0230 } // DLPACK_EXTERN_C 0231 #endif 0232 #endif // DLPACK_DLPACK_H_
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |