Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:01:15

0001 /*
0002  * Copyright © 2010-2023 Inria.  All rights reserved.
0003  * Copyright © 2010-2011 Université Bordeaux
0004  * Copyright © 2011 Cisco Systems, Inc.  All rights reserved.
0005  * See COPYING in top-level directory.
0006  */
0007 
0008 /** \file
0009  * \brief Macros to help interaction between hwloc and the CUDA Driver API.
0010  *
0011  * Applications that use both hwloc and the CUDA Driver API may want to
0012  * include this file so as to get topology information for CUDA devices.
0013  *
0014  */
0015 
0016 #ifndef HWLOC_CUDA_H
0017 #define HWLOC_CUDA_H
0018 
0019 #include "hwloc.h"
0020 #include "hwloc/autogen/config.h"
0021 #include "hwloc/helper.h"
0022 #ifdef HWLOC_LINUX_SYS
0023 #include "hwloc/linux.h"
0024 #endif
0025 
0026 #include <cuda.h>
0027 
0028 
0029 #ifdef __cplusplus
0030 extern "C" {
0031 #endif
0032 
0033 
0034 /** \defgroup hwlocality_cuda Interoperability with the CUDA Driver API
0035  *
0036  * This interface offers ways to retrieve topology information about
0037  * CUDA devices when using the CUDA Driver API.
0038  *
0039  * @{
0040  */
0041 
0042 /** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
0043  *
0044  * Device \p cudevice must match the local machine.
0045  *
0046  * \return 0 on success.
0047  * \return -1 on error, for instance if device information could not be found.
0048  */
0049 static __hwloc_inline int
0050 hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
0051                   CUdevice cudevice, int *domain, int *bus, int *dev)
0052 {
0053   CUresult cres;
0054 
0055 #if CUDA_VERSION >= 4000
0056   cres = cuDeviceGetAttribute(domain, CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID, cudevice);
0057   if (cres != CUDA_SUCCESS) {
0058     errno = ENOSYS;
0059     return -1;
0060   }
0061 #else
0062   *domain = 0;
0063 #endif
0064   cres = cuDeviceGetAttribute(bus, CU_DEVICE_ATTRIBUTE_PCI_BUS_ID, cudevice);
0065   if (cres != CUDA_SUCCESS) {
0066     errno = ENOSYS;
0067     return -1;
0068   }
0069   cres = cuDeviceGetAttribute(dev, CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID, cudevice);
0070   if (cres != CUDA_SUCCESS) {
0071     errno = ENOSYS;
0072     return -1;
0073   }
0074 
0075   return 0;
0076 }
0077 
0078 /** \brief Get the CPU set of processors that are physically
0079  * close to device \p cudevice.
0080  *
0081  * Store in \p set the CPU-set describing the locality of the CUDA device \p cudevice.
0082  *
0083  * Topology \p topology and device \p cudevice must match the local machine.
0084  * I/O devices detection and the CUDA component are not needed in the topology.
0085  *
0086  * The function only returns the locality of the device.
0087  * If more information about the device is needed, OS objects should
0088  * be used instead, see hwloc_cuda_get_device_osdev()
0089  * and hwloc_cuda_get_device_osdev_by_index().
0090  *
0091  * This function is currently only implemented in a meaningful way for
0092  * Linux; other systems will simply get a full cpuset.
0093  *
0094  * \return 0 on success.
0095  * \return -1 on error, for instance if device information could not be found.
0096  */
0097 static __hwloc_inline int
0098 hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
0099                  CUdevice cudevice, hwloc_cpuset_t set)
0100 {
0101 #ifdef HWLOC_LINUX_SYS
0102   /* If we're on Linux, use the sysfs mechanism to get the local cpus */
0103 #define HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX 128
0104   char path[HWLOC_CUDA_DEVICE_SYSFS_PATH_MAX];
0105   int domainid, busid, deviceid;
0106 
0107   if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domainid, &busid, &deviceid))
0108     return -1;
0109 
0110   if (!hwloc_topology_is_thissystem(topology)) {
0111     errno = EINVAL;
0112     return -1;
0113   }
0114 
0115   sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domainid, busid, deviceid);
0116   if (hwloc_linux_read_path_as_cpumask(path, set) < 0
0117       || hwloc_bitmap_iszero(set))
0118     hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0119 #else
0120   /* Non-Linux systems simply get a full cpuset */
0121   hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0122 #endif
0123   return 0;
0124 }
0125 
0126 /** \brief Get the hwloc PCI device object corresponding to the
0127  * CUDA device \p cudevice.
0128  *
0129  * \return The hwloc PCI device object describing the CUDA device \p cudevice.
0130  * \return \c NULL if none could be found.
0131  *
0132  * Topology \p topology and device \p cudevice must match the local machine.
0133  * I/O devices detection must be enabled in topology \p topology.
0134  * The CUDA component is not needed in the topology.
0135  */
0136 static __hwloc_inline hwloc_obj_t
0137 hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
0138 {
0139   int domain, bus, dev;
0140 
0141   if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
0142     return NULL;
0143 
0144   return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
0145 }
0146 
0147 /** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
0148  *
0149  * \return The hwloc OS device object that describes the given CUDA device \p cudevice.
0150  * \return \c NULL if none could be found.
0151  *
0152  * Topology \p topology and device \p cudevice must match the local machine.
0153  * I/O devices detection and the CUDA component must be enabled in the topology.
0154  * If not, the locality of the object may still be found using
0155  * hwloc_cuda_get_device_cpuset().
0156  *
0157  * \note This function cannot work if PCI devices are filtered out.
0158  *
0159  * \note The corresponding hwloc PCI device may be found by looking
0160  * at the result parent pointer (unless PCI devices are filtered out).
0161  */
0162 static __hwloc_inline hwloc_obj_t
0163 hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
0164 {
0165     hwloc_obj_t osdev = NULL;
0166     int domain, bus, dev;
0167 
0168     if (hwloc_cuda_get_device_pci_ids(topology, cudevice, &domain, &bus, &dev))
0169         return NULL;
0170 
0171     osdev = NULL;
0172     while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0173         hwloc_obj_t pcidev = osdev->parent;
0174         if (strncmp(osdev->name, "cuda", 4))
0175             continue;
0176         if (pcidev
0177             && pcidev->type == HWLOC_OBJ_PCI_DEVICE
0178             && (int) pcidev->attr->pcidev.domain == domain
0179             && (int) pcidev->attr->pcidev.bus == bus
0180             && (int) pcidev->attr->pcidev.dev == dev
0181             && pcidev->attr->pcidev.func == 0)
0182             return osdev;
0183         /* if PCI are filtered out, we need a info attr to match on */
0184     }
0185 
0186     return NULL;
0187 }
0188 
0189 /** \brief Get the hwloc OS device object corresponding to the
0190  * CUDA device whose index is \p idx.
0191  *
0192  * \return The hwloc OS device object describing the CUDA device whose index is \p idx.
0193  * \return \c NULL if none could be found.
0194  *
0195  * The topology \p topology does not necessarily have to match the current
0196  * machine. For instance the topology may be an XML import of a remote host.
0197  * I/O devices detection and the CUDA component must be enabled in the topology.
0198  *
0199  * \note The corresponding PCI device object can be obtained by looking
0200  * at the OS device parent object (unless PCI devices are filtered out).
0201  *
0202  * \note This function is identical to hwloc_cudart_get_device_osdev_by_index().
0203  */
0204 static __hwloc_inline hwloc_obj_t
0205 hwloc_cuda_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
0206 {
0207     hwloc_obj_t osdev = NULL;
0208     while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0209         if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
0210             && osdev->name
0211             && !strncmp("cuda", osdev->name, 4)
0212             && atoi(osdev->name + 4) == (int) idx)
0213             return osdev;
0214     }
0215     return NULL;
0216 }
0217 
0218 /** @} */
0219 
0220 
0221 #ifdef __cplusplus
0222 } /* extern "C" */
0223 #endif
0224 
0225 
0226 #endif /* HWLOC_CUDA_H */