|
|
|||
File indexing completed on 2026-05-10 08:45:13
0001 /* 0002 * SPDX-FileCopyrightText: Copyright (c) 2009-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 0003 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0004 * 0005 * Licensed under the Apache License, Version 2.0 (the "License"); 0006 * you may not use this file except in compliance with the License. 0007 * You may obtain a copy of the License at 0008 * 0009 * http://www.apache.org/licenses/LICENSE-2.0 0010 * 0011 * Unless required by applicable law or agreed to in writing, software 0012 * distributed under the License is distributed on an "AS IS" BASIS, 0013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 0014 * See the License for the specific language governing permissions and 0015 * limitations under the License. 0016 * 0017 * Licensed under the Apache License v2.0 with LLVM Exceptions. 0018 * See https://nvidia.github.io/NVTX/LICENSE.txt for license information. 0019 */ 0020 0021 #if defined(NVTX_AS_SYSTEM_HEADER) 0022 #if defined(__clang__) 0023 #pragma clang system_header 0024 #elif defined(__GNUC__) || defined(__NVCOMPILER) 0025 #pragma GCC system_header 0026 #elif defined(_MSC_VER) 0027 #pragma system_header 0028 #endif 0029 #endif 0030 0031 #include "nvToolsExtMem.h" 0032 0033 #include "cuda.h" 0034 #include "cuda_runtime.h" 0035 0036 #ifdef __cplusplus 0037 extern "C" { 0038 #endif /* __cplusplus */ 0039 0040 #ifndef NVTX_MEM_CUDART_CONTENTS_V1 0041 #define NVTX_MEM_CUDART_CONTENTS_V1 0042 0043 /** \defgroup MEMORY_CUDART Memory CUDA Runtime 0044 * See page \ref PAGE_MEMORY_CUDART. 0045 * @{ 0046 */ 0047 0048 /** \brief The memory is from a CUDA runtime array. 0049 * 0050 * Relevant functions: cudaMallocArray, cudaMalloc3DArray 0051 * Also cudaArray_t from other types such as cudaMipmappedArray_t 0052 * 0053 * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported 0054 * 0055 * nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo() 0056 * nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCudaArrayRangeDesc_t 0057 */ 0058 #define NVTX_MEM_TYPE_CUDA_ARRAY 0x11 0059 0060 /** \brief structure to describe memory in a CUDA array object 0061 */ 0062 typedef struct nvtxMemCudaArrayRangeDesc_v1 0063 { 0064 uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */ 0065 uint16_t structSize; /* Size of the structure. */ 0066 uint32_t reserved0; 0067 cudaArray_t src; 0068 size_t offset[3]; 0069 size_t extent[3]; 0070 } nvtxMemCudaArrayRangeDesc_v1; 0071 typedef nvtxMemCudaArrayRangeDesc_v1 nvtxMemCudaArrayRangeDesc_t; 0072 0073 0074 /** \brief The memory is from a CUDA device array. 0075 * 0076 * Relevant functions: cuArrayCreate, cuArray3DCreate 0077 * Also CUarray from other types such as CUmipmappedArray 0078 * 0079 * NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE is not supported 0080 * 0081 * nvtxMemHeapRegister receives a heapDesc of type cudaArray_t because the description can be retrieved by tools through cudaArrayGetInfo() 0082 * nvtxMemRegionRegisterEx receives a regionDesc of type nvtxMemCuArrayRangeDesc_t 0083 */ 0084 #define NVTX_MEM_TYPE_CU_ARRAY 0x12 0085 0086 /** \brief structure to describe memory in a CUDA array object 0087 */ 0088 typedef struct nvtxMemCuArrayRangeDesc_v1 0089 { 0090 uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */ 0091 uint16_t structSize; /* Size of the structure. */ 0092 uint32_t reserved0; 0093 CUarray src; 0094 size_t offset[3]; 0095 size_t extent[3]; 0096 } nvtxMemCuArrayRangeDesc_v1; 0097 typedef nvtxMemCuArrayRangeDesc_v1 nvtxMemCuArrayRangeDesc_t; 0098 0099 /* Reserving 0x2-0xF for more common types */ 0100 0101 #define NVTX_MEM_CUDA_PEER_ALL_DEVICES -1 0102 0103 /** \brief Get the permission object that represent the CUDA runtime device 0104 * or cuda driver context 0105 * 0106 * This object will allow developers to adjust permissions applied to work executed 0107 * on the GPU. It may be inherited or overridden by permissions object bound 0108 * with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags. 0109 * 0110 * Ex. change the peer to peer access permissions between devices in entirety 0111 * or punch through special holes 0112 * 0113 * By default, all memory is accessible that naturally would be to a CUDA kernel until 0114 * modified otherwise by nvtxMemCudaSetPeerAccess or changing regions. 0115 * 0116 * This object should also represent the CUDA driver API level context. 0117 */ 0118 NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetProcessWidePermissions( 0119 nvtxDomainHandle_t domain); 0120 0121 /** \brief Get the permission object that represent the CUDA runtime device 0122 * or cuda driver context 0123 * 0124 * This object will allow developers to adjust permissions applied to work executed 0125 * on the GPU. It may be inherited or overridden by permissions object bound 0126 * with NVTX_MEM_PERMISSIONS_BIND_SCOPE_CUDA_STREAM, depending on the binding flags. 0127 * 0128 * Ex. change the peer to peer access permissions between devices in entirety 0129 * or punch through special holes 0130 * 0131 * By default, all memory is accessible that naturally would be to a CUDA kernel until 0132 * modified otherwise by nvtxMemCudaSetPeerAccess or changing regions. 0133 * 0134 * This object should also represent the CUDA driver API level context. 0135 */ 0136 NVTX_DECLSPEC nvtxMemPermissionsHandle_t NVTX_API nvtxMemCudaGetDeviceWidePermissions( 0137 nvtxDomainHandle_t domain, 0138 int device); 0139 0140 /** \brief Change the default behavior for all memory mapped in from a particular device. 0141 * 0142 * While typically all memory defaults to readable and writable, users may desire to limit 0143 * access to reduced default permissions such as read-only and a per-device basis. 0144 * 0145 * Regions can used to further override smaller windows of memory. 0146 * 0147 * devicePeer can be NVTX_MEM_CUDA_PEER_ALL_DEVICES 0148 * 0149 */ 0150 NVTX_DECLSPEC void NVTX_API nvtxMemCudaSetPeerAccess( 0151 nvtxDomainHandle_t domain, 0152 nvtxMemPermissionsHandle_t permissions, 0153 int devicePeer, /* device number such as from cudaGetDevice() or NVTX_MEM_CUDA_PEER_ALL_DEVICES */ 0154 uint32_t flags); /* NVTX_MEM_PERMISSIONS_REGION_FLAGS_* */ 0155 0156 /** \brief Mark memory ranges as initialized. 0157 * 0158 * The heap refers the the heap within which the region resides. 0159 * This can be from nvtxMemHeapRegister, NVTX_MEM_HEAP_HANDLE_PROCESS_WIDE, or one provided from other extension API. 0160 * 0161 * The regionType arg will define which type is used in regionDescArray. 0162 * The most commonly used type is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. 0163 * 0164 * The regionCount arg is how many element are in regionDescArray and regionHandleArrayOut. 0165 * 0166 * The regionHandleArrayOut arg points to an array where the tool will provide region handles. 0167 * If a pointer if provided, it is expected to have regionCount elements. 0168 * This pointer can be NULL if regionType is NVTX_MEM_TYPE_VIRTUAL_ADDRESS. In this case, 0169 * the user can use the pointer to the virtual memory to reference the region in other 0170 * related functions which accept a nvtxMemRegionRef_t. 0171 */ 0172 typedef struct nvtxMemMarkInitializedBatch_v1 0173 { 0174 uint16_t extCompatID; /* Set to NVTX_EXT_COMPATID_MEM */ 0175 uint16_t structSize; /* Size of the structure. */ 0176 0177 uint32_t regionType; /* NVTX_MEM_TYPE_* */ 0178 0179 size_t regionDescCount; 0180 size_t regionDescElementSize; 0181 void const* regionDescElements; /* this will also become the handle for this region */ 0182 0183 } nvtxMemMarkInitializedBatch_v1; 0184 typedef nvtxMemMarkInitializedBatch_v1 nvtxMemMarkInitializedBatch_t; 0185 0186 /** \brief Register a region of memory inside of a heap of linear process virtual memory 0187 * 0188 * stream is the CUDA stream where the range was accessed and initialized. 0189 */ 0190 NVTX_DECLSPEC void NVTX_API nvtxMemCudaMarkInitialized( 0191 nvtxDomainHandle_t domain, 0192 cudaStream_t stream, 0193 uint8_t isPerThreadStream, /* 0 for false, otherwise true */ 0194 nvtxMemMarkInitializedBatch_t const* desc); 0195 0196 /** @} */ 0197 0198 #endif /* NVTX_MEM_CUDART_CONTENTS_V1 */ 0199 0200 #ifdef __GNUC__ 0201 #pragma GCC visibility push(internal) 0202 #endif 0203 0204 #ifndef NVTX_NO_IMPL 0205 #define NVTX_EXT_IMPL_MEM_CUDART_GUARD /* Ensure other headers cannot be included directly */ 0206 #include "nvtxDetail/nvtxExtImplMemCudaRt_v1.h" 0207 #undef NVTX_EXT_IMPL_MEM_CUDART_GUARD 0208 #endif /*NVTX_NO_IMPL*/ 0209 0210 #ifdef __GNUC__ 0211 #pragma GCC visibility pop 0212 #endif 0213 0214 0215 #ifdef __cplusplus 0216 } 0217 #endif /* __cplusplus */
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|