Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:01:16

0001 /*
0002  * Copyright © 2012-2023 Inria.  All rights reserved.
0003  * See COPYING in top-level directory.
0004  */
0005 
0006 /** \file
0007  * \brief Macros to help interaction between hwloc and the NVIDIA Management Library.
0008  *
0009  * Applications that use both hwloc and the NVIDIA Management Library may want to
0010  * include this file so as to get topology information for NVML devices.
0011  */
0012 
0013 #ifndef HWLOC_NVML_H
0014 #define HWLOC_NVML_H
0015 
0016 #include "hwloc.h"
0017 #include "hwloc/autogen/config.h"
0018 #include "hwloc/helper.h"
0019 #ifdef HWLOC_LINUX_SYS
0020 #include "hwloc/linux.h"
0021 #endif
0022 
0023 #include <nvml.h>
0024 
0025 
0026 #ifdef __cplusplus
0027 extern "C" {
0028 #endif
0029 
0030 
0031 /** \defgroup hwlocality_nvml Interoperability with the NVIDIA Management Library
0032  *
0033  * This interface offers ways to retrieve topology information about
0034  * devices managed by the NVIDIA Management Library (NVML).
0035  *
0036  * @{
0037  */
0038 
0039 /** \brief Get the CPU set of processors that are physically
0040  * close to NVML device \p device.
0041  *
0042  * Store in \p set the CPU-set describing the locality of the NVML device \p device.
0043  *
0044  * Topology \p topology and device \p device must match the local machine.
0045  * I/O devices detection and the NVML component are not needed in the topology.
0046  *
0047  * The function only returns the locality of the device.
0048  * If more information about the device is needed, OS objects should
0049  * be used instead, see hwloc_nvml_get_device_osdev()
0050  * and hwloc_nvml_get_device_osdev_by_index().
0051  *
0052  * This function is currently only implemented in a meaningful way for
0053  * Linux; other systems will simply get a full cpuset.
0054  *
0055  * \return 0 on success.
0056  * \return -1 on error, for instance if device information could not be found.
0057  */
0058 static __hwloc_inline int
0059 hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
0060                  nvmlDevice_t device, hwloc_cpuset_t set)
0061 {
0062 #ifdef HWLOC_LINUX_SYS
0063   /* If we're on Linux, use the sysfs mechanism to get the local cpus */
0064 #define HWLOC_NVML_DEVICE_SYSFS_PATH_MAX 128
0065   char path[HWLOC_NVML_DEVICE_SYSFS_PATH_MAX];
0066   nvmlReturn_t nvres;
0067   nvmlPciInfo_t pci;
0068 
0069   if (!hwloc_topology_is_thissystem(topology)) {
0070     errno = EINVAL;
0071     return -1;
0072   }
0073 
0074   nvres = nvmlDeviceGetPciInfo(device, &pci);
0075   if (NVML_SUCCESS != nvres) {
0076     errno = EINVAL;
0077     return -1;
0078   }
0079 
0080   sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", pci.domain, pci.bus, pci.device);
0081   if (hwloc_linux_read_path_as_cpumask(path, set) < 0
0082       || hwloc_bitmap_iszero(set))
0083     hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0084 #else
0085   /* Non-Linux systems simply get a full cpuset */
0086   hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0087 #endif
0088   return 0;
0089 }
0090 
0091 /** \brief Get the hwloc OS device object corresponding to the
0092  * NVML device whose index is \p idx.
0093  *
0094  * \return The hwloc OS device object describing the NVML device whose index is \p idx.
0095  * \return \c NULL if none could be found.
0096  *
0097  * The topology \p topology does not necessarily have to match the current
0098  * machine. For instance the topology may be an XML import of a remote host.
0099  * I/O devices detection and the NVML component must be enabled in the topology.
0100  *
0101  * \note The corresponding PCI device object can be obtained by looking
0102  * at the OS device parent object (unless PCI devices are filtered out).
0103  */
0104 static __hwloc_inline hwloc_obj_t
0105 hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
0106 {
0107     hwloc_obj_t osdev = NULL;
0108     while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0109                 if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
0110                     && osdev->name
0111             && !strncmp("nvml", osdev->name, 4)
0112             && atoi(osdev->name + 4) == (int) idx)
0113                         return osdev;
0114         }
0115         return NULL;
0116 }
0117 
0118 /** \brief Get the hwloc OS device object corresponding to NVML device \p device.
0119  *
0120  * \return The hwloc OS device object that describes the given NVML device \p device.
0121  * \return \c NULL if none could be found.
0122  *
0123  * Topology \p topology and device \p device must match the local machine.
0124  * I/O devices detection and the NVML component must be enabled in the topology.
0125  * If not, the locality of the object may still be found using
0126  * hwloc_nvml_get_device_cpuset().
0127  *
0128  * \note The corresponding hwloc PCI device may be found by looking
0129  * at the result parent pointer (unless PCI devices are filtered out).
0130  */
0131 static __hwloc_inline hwloc_obj_t
0132 hwloc_nvml_get_device_osdev(hwloc_topology_t topology, nvmlDevice_t device)
0133 {
0134     hwloc_obj_t osdev;
0135     nvmlReturn_t nvres;
0136     nvmlPciInfo_t pci;
0137     char uuid[64];
0138 
0139     if (!hwloc_topology_is_thissystem(topology)) {
0140         errno = EINVAL;
0141         return NULL;
0142     }
0143 
0144     nvres = nvmlDeviceGetPciInfo(device, &pci);
0145     if (NVML_SUCCESS != nvres)
0146         return NULL;
0147 
0148     nvres = nvmlDeviceGetUUID(device, uuid, sizeof(uuid));
0149     if (NVML_SUCCESS != nvres)
0150         uuid[0] = '\0';
0151 
0152     osdev = NULL;
0153     while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0154         hwloc_obj_t pcidev = osdev->parent;
0155         const char *info;
0156 
0157         if (strncmp(osdev->name, "nvml", 4))
0158             continue;
0159 
0160         if (pcidev
0161             && pcidev->type == HWLOC_OBJ_PCI_DEVICE
0162             && pcidev->attr->pcidev.domain == pci.domain
0163             && pcidev->attr->pcidev.bus == pci.bus
0164             && pcidev->attr->pcidev.dev == pci.device
0165             && pcidev->attr->pcidev.func == 0)
0166             return osdev;
0167 
0168         info = hwloc_obj_get_info_by_name(osdev, "NVIDIAUUID");
0169         if (info && !strcmp(info, uuid))
0170             return osdev;
0171     }
0172 
0173     return NULL;
0174 }
0175 
0176 /** @} */
0177 
0178 
0179 #ifdef __cplusplus
0180 } /* extern "C" */
0181 #endif
0182 
0183 
0184 #endif /* HWLOC_NVML_H */