File indexing completed on 2025-01-18 10:01:17
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #ifndef HWLOC_RSMI_H
0016 #define HWLOC_RSMI_H
0017
0018 #include "hwloc.h"
0019 #include "hwloc/autogen/config.h"
0020 #include "hwloc/helper.h"
0021 #ifdef HWLOC_LINUX_SYS
0022 #include "hwloc/linux.h"
0023 #endif
0024
0025 #include <rocm_smi/rocm_smi.h>
0026
0027
0028 #ifdef __cplusplus
0029 extern "C" {
0030 #endif
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045
0046
0047
0048
0049
0050
0051
0052
0053
0054
0055
0056
0057
0058
0059
0060
0061
0062 static __hwloc_inline int
0063 hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
0064 uint32_t dv_ind, hwloc_cpuset_t set)
0065 {
0066 #ifdef HWLOC_LINUX_SYS
0067
0068 #define HWLOC_RSMI_DEVICE_SYSFS_PATH_MAX 128
0069 char path[HWLOC_RSMI_DEVICE_SYSFS_PATH_MAX];
0070 rsmi_status_t ret;
0071 uint64_t bdfid = 0;
0072 unsigned domain, device, bus;
0073
0074 if (!hwloc_topology_is_thissystem(topology)) {
0075 errno = EINVAL;
0076 return -1;
0077 }
0078
0079 ret = rsmi_dev_pci_id_get(dv_ind, &bdfid);
0080 if (RSMI_STATUS_SUCCESS != ret) {
0081 errno = EINVAL;
0082 return -1;
0083 }
0084 domain = (bdfid>>32) & 0xffffffff;
0085 bus = ((bdfid & 0xffff)>>8) & 0xff;
0086 device = ((bdfid & 0xff)>>3) & 0x1f;
0087
0088 sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domain, bus, device);
0089 if (hwloc_linux_read_path_as_cpumask(path, set) < 0
0090 || hwloc_bitmap_iszero(set))
0091 hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0092 #else
0093
0094 hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
0095 #endif
0096 return 0;
0097 }
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114 static __hwloc_inline hwloc_obj_t
0115 hwloc_rsmi_get_device_osdev_by_index(hwloc_topology_t topology, uint32_t dv_ind)
0116 {
0117 hwloc_obj_t osdev = NULL;
0118 while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0119 if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
0120 && osdev->name
0121 && !strncmp("rsmi", osdev->name, 4)
0122 && atoi(osdev->name + 4) == (int) dv_ind)
0123 return osdev;
0124 }
0125 return NULL;
0126 }
0127
0128
0129
0130
0131
0132
0133
0134
0135
0136
0137
0138
0139
0140
0141
0142
0143 static __hwloc_inline hwloc_obj_t
0144 hwloc_rsmi_get_device_osdev(hwloc_topology_t topology, uint32_t dv_ind)
0145 {
0146 hwloc_obj_t osdev;
0147 rsmi_status_t ret;
0148 uint64_t bdfid = 0;
0149 unsigned domain, device, bus, func;
0150 uint64_t id;
0151 char uuid[64];
0152
0153 if (!hwloc_topology_is_thissystem(topology)) {
0154 errno = EINVAL;
0155 return NULL;
0156 }
0157
0158 ret = rsmi_dev_pci_id_get(dv_ind, &bdfid);
0159 if (RSMI_STATUS_SUCCESS != ret) {
0160 errno = EINVAL;
0161 return NULL;
0162 }
0163 domain = (bdfid>>32) & 0xffffffff;
0164 bus = ((bdfid & 0xffff)>>8) & 0xff;
0165 device = ((bdfid & 0xff)>>3) & 0x1f;
0166 func = bdfid & 0x7;
0167
0168 ret = rsmi_dev_unique_id_get(dv_ind, &id);
0169 if (RSMI_STATUS_SUCCESS != ret)
0170 uuid[0] = '\0';
0171 else
0172 sprintf(uuid, "%lx", id);
0173
0174 osdev = NULL;
0175 while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
0176 hwloc_obj_t pcidev = osdev->parent;
0177 const char *info;
0178
0179 if (strncmp(osdev->name, "rsmi", 4))
0180 continue;
0181
0182 if (pcidev
0183 && pcidev->type == HWLOC_OBJ_PCI_DEVICE
0184 && pcidev->attr->pcidev.domain == domain
0185 && pcidev->attr->pcidev.bus == bus
0186 && pcidev->attr->pcidev.dev == device
0187 && pcidev->attr->pcidev.func == func)
0188 return osdev;
0189
0190 info = hwloc_obj_get_info_by_name(osdev, "AMDUUID");
0191 if (info && !strcmp(info, uuid))
0192 return osdev;
0193 }
0194
0195 return NULL;
0196 }
0197
0198
0199
0200
0201 #ifdef __cplusplus
0202 }
0203 #endif
0204
0205
0206 #endif