Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-06-07 08:28:42

0001 /*
0002  * SPDX-License-Identifier: BSD-3-Clause
0003  * Copyright © 2012-2023 Inria.  All rights reserved.
0004  * See COPYING in top-level directory.
0005  */
0006 
0007 /** \file
0008  * \brief Macros to help interaction between hwloc and the NVIDIA Management Library.
0009  *
0010  * Applications that use both hwloc and the NVIDIA Management Library may want to
0011  * include this file so as to get topology information for NVML devices.
0012  */
0013 
0014 #ifndef HWLOC_NVML_H
0015 #define HWLOC_NVML_H
0016 
0017 #include "hwloc.h"
0018 #include "hwloc/autogen/config.h"
0019 #include "hwloc/helper.h"
0020 #ifdef HWLOC_LINUX_SYS
0021 #include "hwloc/linux.h"
0022 #endif
0023 
0024 #include <nvml.h>
0025 
0026 
0027 #ifdef __cplusplus
0028 extern "C" {
0029 #endif
0030 
0031 
0032 /** \defgroup hwlocality_nvml Interoperability with the NVIDIA Management Library
0033  *
0034  * This interface offers ways to retrieve topology information about
0035  * devices managed by the NVIDIA Management Library (NVML).
0036  *
0037  * @{
0038  */
0039 
0040 /** \brief Get the CPU set of processors that are physically
0041  * close to NVML device \p device.
0042  *
0043  * Store in \p set the CPU-set describing the locality of the NVML device \p device.
0044  *
0045  * Topology \p topology and device \p device must match the local machine.
0046  * I/O devices detection and the NVML component are not needed in the topology.
0047  *
0048  * The function only returns the locality of the device.
0049  * If more information about the device is needed, OS objects should
0050  * be used instead, see hwloc_nvml_get_device_osdev()
0051  * and hwloc_nvml_get_device_osdev_by_index().
0052  *
0053  * This function is currently only implemented in a meaningful way for
0054  * Linux; other systems will simply get a full cpuset.
0055  *
0056  * \return 0 on success.
0057  * \return -1 on error, for instance if device information could not be found.
0058  */
0059 static __hwloc_inline int
0060 hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
0061                  nvmlDevice_t device, hwloc_cpuset_t set)
0062 {
0063 #ifdef HWLOC_LINUX_SYS
0064   /* If we're on Linux, use the sysfs mechanism to get the local cpus */
0065 #define HWLOC_NVML_DEVICE_SYSFS_PATH_MAX 128
0066   char path[HWLOC_NVML_DEVICE_SYSFS_PATH_MAX];
0067   nvmlReturn_t nvres;
0068   nvmlPciInfo_t pci;
0069 
0070   if (!hwloc_topology_is_thissystem(topology)) {
0071     errno = EINVAL;
0072     return -1;
0073   }
0074 
0075   nvres = nvmlDeviceGetPciInfo(device, &pci);
0076   if (NVML_SUCCESS != nvres) {
0077     errno = EINVAL;
0078     return -1;
0079   }
0080 
0081   sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", pci.domain, pci.bus, pci.device);
0082   if (hwloc_linux_read_path_as_cpumask(path, set) < 0
0083       || hwloc_bitmap_iszero(set))
0084     hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0085 #else
0086   /* Non-Linux systems simply get a full cpuset */
0087   hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0088 #endif
0089   return 0;
0090 }
0091 
0092 /** \brief Get the hwloc OS device object corresponding to the
0093  * NVML device whose index is \p idx.
0094  *
0095  * \return The hwloc OS device object describing the NVML device whose index is \p idx.
0096  * \return \c NULL if none could be found.
0097  *
0098  * The topology \p topology does not necessarily have to match the current
0099  * machine. For instance the topology may be an XML import of a remote host.
0100  * I/O devices detection and the NVML component must be enabled in the topology.
0101  *
0102  * \note The corresponding PCI device object can be obtained by looking
0103  * at the OS device parent object (unless PCI devices are filtered out).
0104  */
0105 static __hwloc_inline hwloc_obj_t
0106 hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
0107 {
0108     hwloc_obj_t osdev = NULL;
0109     while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0110                 if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
0111                     && osdev->name
0112             && !strncmp("nvml", osdev->name, 4)
0113             && atoi(osdev->name + 4) == (int) idx)
0114                         return osdev;
0115         }
0116         return NULL;
0117 }
0118 
0119 /** \brief Get the hwloc OS device object corresponding to NVML device \p device.
0120  *
0121  * \return The hwloc OS device object that describes the given NVML device \p device.
0122  * \return \c NULL if none could be found.
0123  *
0124  * Topology \p topology and device \p device must match the local machine.
0125  * I/O devices detection and the NVML component must be enabled in the topology.
0126  * If not, the locality of the object may still be found using
0127  * hwloc_nvml_get_device_cpuset().
0128  *
0129  * \note The corresponding hwloc PCI device may be found by looking
0130  * at the result parent pointer (unless PCI devices are filtered out).
0131  */
0132 static __hwloc_inline hwloc_obj_t
0133 hwloc_nvml_get_device_osdev(hwloc_topology_t topology, nvmlDevice_t device)
0134 {
0135     hwloc_obj_t osdev;
0136     nvmlReturn_t nvres;
0137     nvmlPciInfo_t pci;
0138     char uuid[64];
0139 
0140     if (!hwloc_topology_is_thissystem(topology)) {
0141         errno = EINVAL;
0142         return NULL;
0143     }
0144 
0145     nvres = nvmlDeviceGetPciInfo(device, &pci);
0146     if (NVML_SUCCESS != nvres)
0147         return NULL;
0148 
0149     nvres = nvmlDeviceGetUUID(device, uuid, sizeof(uuid));
0150     if (NVML_SUCCESS != nvres)
0151         uuid[0] = '\0';
0152 
0153     osdev = NULL;
0154     while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0155         hwloc_obj_t pcidev = osdev->parent;
0156         const char *info;
0157 
0158         if (strncmp(osdev->name, "nvml", 4))
0159             continue;
0160 
0161         if (pcidev
0162             && pcidev->type == HWLOC_OBJ_PCI_DEVICE
0163             && pcidev->attr->pcidev.domain == pci.domain
0164             && pcidev->attr->pcidev.bus == pci.bus
0165             && pcidev->attr->pcidev.dev == pci.device
0166             && pcidev->attr->pcidev.func == 0)
0167             return osdev;
0168 
0169         info = hwloc_obj_get_info_by_name(osdev, "NVIDIAUUID");
0170         if (info && !strcmp(info, uuid))
0171             return osdev;
0172     }
0173 
0174     return NULL;
0175 }
0176 
0177 /** @} */
0178 
0179 
0180 #ifdef __cplusplus
0181 } /* extern "C" */
0182 #endif
0183 
0184 
0185 #endif /* HWLOC_NVML_H */