|
||||
File indexing completed on 2025-01-18 10:01:15
0001 /* 0002 * Copyright © 2010-2024 Inria. All rights reserved. 0003 * See COPYING in top-level directory. 0004 */ 0005 0006 /** \file 0007 * \brief Object distances. 0008 */ 0009 0010 #ifndef HWLOC_DISTANCES_H 0011 #define HWLOC_DISTANCES_H 0012 0013 #ifndef HWLOC_H 0014 #error Please include the main hwloc.h instead 0015 #endif 0016 0017 0018 #ifdef __cplusplus 0019 extern "C" { 0020 #elif 0 0021 } 0022 #endif 0023 0024 0025 /** \defgroup hwlocality_distances_get Retrieve distances between objects 0026 * @{ 0027 */ 0028 0029 /** \brief Matrix of distances between a set of objects. 0030 * 0031 * The most common matrix contains latencies between NUMA nodes 0032 * (as reported in the System Locality Distance Information Table (SLIT) 0033 * in the ACPI specification), which may or may not be physically accurate. 0034 * It corresponds to the latency for accessing the memory of one node 0035 * from a core in another node. 0036 * The corresponding kind is ::HWLOC_DISTANCES_KIND_MEANS_LATENCY | ::HWLOC_DISTANCES_KIND_FROM_USER. 0037 * The name of this distances structure is "NUMALatency". 0038 * 0039 * The matrix may also contain bandwidths between random sets of objects, 0040 * possibly provided by the user, as specified in the \p kind attribute. 0041 * Others common distance structures include and "XGMIBandwidth", "XGMIHops", 0042 * "XeLinkBandwidth" and "NVLinkBandwidth". 0043 * 0044 * Pointers \p objs and \p values should not be replaced, reallocated, freed, etc. 0045 * However callers are allowed to modify \p kind as well as the contents 0046 * of \p objs and \p values arrays. 0047 * For instance, if there is a single NUMA node per Package, 0048 * hwloc_get_obj_with_same_locality() may be used to convert between them 0049 * and replace NUMA nodes in the \p objs array with the corresponding Packages. 0050 * See also hwloc_distances_transform() for applying some transformations 0051 * to the structure. 0052 */ 0053 struct hwloc_distances_s { 0054 unsigned nbobjs; /**< \brief Number of objects described by the distance matrix. */ 0055 hwloc_obj_t *objs; /**< \brief Array of objects described by the distance matrix. 0056 * These objects are not in any particular order, 0057 * see hwloc_distances_obj_index() and hwloc_distances_obj_pair_values() 0058 * for easy ways to find objects in this array and their corresponding values. 0059 */ 0060 unsigned long kind; /**< \brief OR'ed set of ::hwloc_distances_kind_e. */ 0061 hwloc_uint64_t *values; /**< \brief Matrix of distances between objects, stored as a one-dimension array. 0062 * 0063 * Distance from i-th to j-th object is stored in slot i*nbobjs+j. 0064 * The meaning of the value depends on the \p kind attribute. 0065 */ 0066 }; 0067 0068 /** \brief Kinds of distance matrices. 0069 * 0070 * The \p kind attribute of struct hwloc_distances_s is a OR'ed set 0071 * of kinds. 0072 * 0073 * Each distance matrix may have only one kind among HWLOC_DISTANCES_KIND_FROM_* 0074 * specifying where distance information comes from, 0075 * and one kind among HWLOC_DISTANCES_KIND_MEANS_* specifying 0076 * whether values are latencies or bandwidths. 0077 */ 0078 enum hwloc_distances_kind_e { 0079 /** \brief These distances were obtained from the operating system or hardware. 0080 * \hideinitializer 0081 */ 0082 HWLOC_DISTANCES_KIND_FROM_OS = (1UL<<0), 0083 /** \brief These distances were provided by the user. 0084 * \hideinitializer 0085 */ 0086 HWLOC_DISTANCES_KIND_FROM_USER = (1UL<<1), 0087 0088 /** \brief Distance values are similar to latencies between objects. 0089 * Values are smaller for closer objects, hence minimal on the diagonal 0090 * of the matrix (distance between an object and itself). 0091 * It could also be the number of network hops between objects, etc. 0092 * \hideinitializer 0093 */ 0094 HWLOC_DISTANCES_KIND_MEANS_LATENCY = (1UL<<2), 0095 /** \brief Distance values are similar to bandwidths between objects. 0096 * Values are higher for closer objects, hence maximal on the diagonal 0097 * of the matrix (distance between an object and itself). 0098 * Such values are currently ignored for distance-based grouping. 0099 * \hideinitializer 0100 */ 0101 HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3), 0102 0103 /** \brief This distances structure covers objects of different types. 0104 * This may apply to the "NVLinkBandwidth" structure in presence 0105 * of a NVSwitch or POWER processor NVLink port. 0106 * \hideinitializer 0107 */ 0108 HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4) 0109 }; 0110 0111 /** \brief Retrieve distance matrices. 0112 * 0113 * Retrieve distance matrices from the topology into the \p distances array. 0114 * 0115 * \p flags is currently unused, should be \c 0. 0116 * 0117 * \p kind serves as a filter. If \c 0, all distance matrices are returned. 0118 * If it contains some HWLOC_DISTANCES_KIND_FROM_*, only distance matrices 0119 * whose kind matches one of these are returned. 0120 * If it contains some HWLOC_DISTANCES_KIND_MEANS_*, only distance matrices 0121 * whose kind matches one of these are returned. 0122 * 0123 * On input, \p nr points to the number of distance matrices that may be stored 0124 * in \p distances. 0125 * On output, \p nr points to the number of distance matrices that were actually 0126 * found, even if some of them couldn't be stored in \p distances. 0127 * Distance matrices that couldn't be stored are ignored, but the function still 0128 * returns success (\c 0). The caller may find out by comparing the value pointed 0129 * by \p nr before and after the function call. 0130 * 0131 * Each distance matrix returned in the \p distances array should be released 0132 * by the caller using hwloc_distances_release(). 0133 * 0134 * \return 0 on success, -1 on error. 0135 */ 0136 HWLOC_DECLSPEC int 0137 hwloc_distances_get(hwloc_topology_t topology, 0138 unsigned *nr, struct hwloc_distances_s **distances, 0139 unsigned long kind, unsigned long flags); 0140 0141 /** \brief Retrieve distance matrices for object at a specific depth in the topology. 0142 * 0143 * Identical to hwloc_distances_get() with the additional \p depth filter. 0144 * 0145 * \return 0 on success, -1 on error. 0146 */ 0147 HWLOC_DECLSPEC int 0148 hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, 0149 unsigned *nr, struct hwloc_distances_s **distances, 0150 unsigned long kind, unsigned long flags); 0151 0152 /** \brief Retrieve distance matrices for object of a specific type. 0153 * 0154 * Identical to hwloc_distances_get() with the additional \p type filter. 0155 * 0156 * \return 0 on success, -1 on error. 0157 */ 0158 HWLOC_DECLSPEC int 0159 hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, 0160 unsigned *nr, struct hwloc_distances_s **distances, 0161 unsigned long kind, unsigned long flags); 0162 0163 /** \brief Retrieve a distance matrix with the given name. 0164 * 0165 * Usually only one distances structure may match a given name. 0166 * 0167 * The name of the most common structure is "NUMALatency". 0168 * Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth", 0169 * and "NVLinkBandwidth". 0170 * 0171 * \return 0 on success, -1 on error. 0172 */ 0173 HWLOC_DECLSPEC int 0174 hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, 0175 unsigned *nr, struct hwloc_distances_s **distances, 0176 unsigned long flags); 0177 0178 /** \brief Get a description of what a distances structure contains. 0179 * 0180 * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT), 0181 * or \c NULL if unknown. 0182 * 0183 * \return the constant string with the name of the distance structure. 0184 * 0185 * \note The returned name should not be freed by the caller, 0186 * it belongs to the hwloc library. 0187 */ 0188 HWLOC_DECLSPEC const char * 0189 hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances); 0190 0191 /** \brief Release a distance matrix structure previously returned by hwloc_distances_get(). 0192 * 0193 * \note This function is not required if the structure is removed with hwloc_distances_release_remove(). 0194 */ 0195 HWLOC_DECLSPEC void 0196 hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); 0197 0198 /** \brief Transformations of distances structures. */ 0199 enum hwloc_distances_transform_e { 0200 /** \brief Remove \c NULL objects from the distances structure. 0201 * 0202 * Every object that was replaced with \c NULL in the \p objs array 0203 * is removed and the \p values array is updated accordingly. 0204 * 0205 * At least \c 2 objects must remain, otherwise hwloc_distances_transform() 0206 * will return \c -1 with \p errno set to \c EINVAL. 0207 * 0208 * \p kind will be updated with or without ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES 0209 * according to the remaining objects. 0210 * 0211 * \hideinitializer 0212 */ 0213 HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL = 0, 0214 0215 /** \brief Replace bandwidth values with a number of links. 0216 * 0217 * Usually all values will be either \c 0 (no link) or \c 1 (one link). 0218 * However some matrices could get larger values if some pairs of 0219 * peers are connected by different numbers of links. 0220 * 0221 * Values on the diagonal are set to \c 0. 0222 * 0223 * This transformation only applies to bandwidth matrices. 0224 * 0225 * \hideinitializer 0226 */ 0227 HWLOC_DISTANCES_TRANSFORM_LINKS = 1, 0228 0229 /** \brief Merge switches with multiple ports into a single object. 0230 * This currently only applies to NVSwitches where GPUs seem connected to different 0231 * separate switch ports in the NVLinkBandwidth matrix. This transformation will 0232 * replace all of them with the same port connected to all GPUs. 0233 * Other ports are removed by applying ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally. 0234 * \hideinitializer 0235 */ 0236 HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2, 0237 0238 /** \brief Apply a transitive closure to the matrix to connect objects across switches. 0239 * This currently only applies to GPUs and NVSwitches in the NVLinkBandwidth matrix. 0240 * All pairs of GPUs will be reported as directly connected. 0241 * \hideinitializer 0242 */ 0243 HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3 0244 }; 0245 0246 /** \brief Apply a transformation to a distances structure. 0247 * 0248 * Modify a distances structure that was previously obtained with 0249 * hwloc_distances_get() or one of its variants. 0250 * 0251 * This modifies the local copy of the distances structures but does 0252 * not modify the distances information stored inside the topology 0253 * (retrieved by another call to hwloc_distances_get() or exported to XML). 0254 * To do so, one should add a new distances structure with same 0255 * name, kind, objects and values (see \ref hwlocality_distances_add) 0256 * and then remove this old one with hwloc_distances_release_remove(). 0257 * 0258 * \p transform must be one of the transformations listed 0259 * in ::hwloc_distances_transform_e. 0260 * 0261 * These transformations may modify the contents of the \p objs or \p values arrays. 0262 * 0263 * \p transform_attr must be \c NULL for now. 0264 * 0265 * \p flags must be \c 0 for now. 0266 * 0267 * \return 0 on success, -1 on error for instance if flags are invalid. 0268 * 0269 * \note Objects in distances array \p objs may be directly modified 0270 * in place without using hwloc_distances_transform(). 0271 * One may use hwloc_get_obj_with_same_locality() to easily convert 0272 * between similar objects of different types. 0273 */ 0274 HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct hwloc_distances_s *distances, 0275 enum hwloc_distances_transform_e transform, 0276 void *transform_attr, 0277 unsigned long flags); 0278 0279 /** @} */ 0280 0281 0282 0283 /** \defgroup hwlocality_distances_consult Helpers for consulting distance matrices 0284 * @{ 0285 */ 0286 0287 /** \brief Find the index of an object in a distances structure. 0288 * 0289 * \return the index of the object in the distances structure if any. 0290 * \return -1 if object \p obj is not involved in structure \p distances. 0291 */ 0292 static __hwloc_inline int 0293 hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj) 0294 { 0295 unsigned i; 0296 for(i=0; i<distances->nbobjs; i++) 0297 if (distances->objs[i] == obj) 0298 return (int)i; 0299 return -1; 0300 } 0301 0302 /** \brief Find the values between two objects in a distance matrices. 0303 * 0304 * The distance from \p obj1 to \p obj2 is stored in the value pointed by 0305 * \p value1to2 and reciprocally. 0306 * 0307 * \return 0 on success. 0308 * \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances. 0309 */ 0310 static __hwloc_inline int 0311 hwloc_distances_obj_pair_values(struct hwloc_distances_s *distances, 0312 hwloc_obj_t obj1, hwloc_obj_t obj2, 0313 hwloc_uint64_t *value1to2, hwloc_uint64_t *value2to1) 0314 { 0315 int i1 = hwloc_distances_obj_index(distances, obj1); 0316 int i2 = hwloc_distances_obj_index(distances, obj2); 0317 if (i1 < 0 || i2 < 0) 0318 return -1; 0319 *value1to2 = distances->values[i1 * distances->nbobjs + i2]; 0320 *value2to1 = distances->values[i2 * distances->nbobjs + i1]; 0321 return 0; 0322 } 0323 0324 /** @} */ 0325 0326 0327 0328 /** \defgroup hwlocality_distances_add Add distances between objects 0329 * 0330 * The usual way to add distances is: 0331 * \code 0332 * hwloc_distances_add_handle_t handle; 0333 * int err = -1; 0334 * handle = hwloc_distances_add_create(topology, "name", kind, 0); 0335 * if (handle) { 0336 * err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0); 0337 * if (!err) 0338 * err = hwloc_distances_add_commit(topology, handle, flags); 0339 * } 0340 * \endcode 0341 * If \p err is \c 0 at the end, then addition was successful. 0342 * 0343 * @{ 0344 */ 0345 0346 /** \brief Handle to a new distances structure during its addition to the topology. */ 0347 typedef void * hwloc_distances_add_handle_t; 0348 0349 /** \brief Create a new empty distances structure. 0350 * 0351 * Create an empty distances structure 0352 * to be filled with hwloc_distances_add_values() 0353 * and then committed with hwloc_distances_add_commit(). 0354 * 0355 * Parameter \p name is optional, it may be \c NULL. 0356 * Otherwise, it will be copied internally and may later be freed by the caller. 0357 * 0358 * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. 0359 * Only one kind of meaning and one kind of provenance may be given if appropriate 0360 * (e.g. ::HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH and ::HWLOC_DISTANCES_KIND_FROM_USER). 0361 * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically set 0362 * according to objects having different types in hwloc_distances_add_values(). 0363 * 0364 * \p flags must be \c 0 for now. 0365 * 0366 * \return A hwloc_distances_add_handle_t that should then be passed 0367 * to hwloc_distances_add_values() and hwloc_distances_add_commit(). 0368 * 0369 * \return \c NULL on error. 0370 */ 0371 HWLOC_DECLSPEC hwloc_distances_add_handle_t 0372 hwloc_distances_add_create(hwloc_topology_t topology, 0373 const char *name, unsigned long kind, 0374 unsigned long flags); 0375 0376 /** \brief Specify the objects and values in a new empty distances structure. 0377 * 0378 * Specify the objects and values for a new distances structure 0379 * that was returned as a handle by hwloc_distances_add_create(). 0380 * The structure must then be committed with hwloc_distances_add_commit(). 0381 * 0382 * The number of objects is \p nbobjs and the array of objects is \p objs. 0383 * Distance values are stored as a one-dimension array in \p values. 0384 * The distance from object i to object j is in slot i*nbobjs+j. 0385 * 0386 * \p nbobjs must be at least 2. 0387 * 0388 * Arrays \p objs and \p values will be copied internally, 0389 * they may later be freed by the caller. 0390 * 0391 * On error, the temporary distances structure and its content are destroyed. 0392 * 0393 * \p flags must be \c 0 for now. 0394 * 0395 * \return 0 on success. 0396 * \return -1 on error. 0397 */ 0398 HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology, 0399 hwloc_distances_add_handle_t handle, 0400 unsigned nbobjs, hwloc_obj_t *objs, 0401 hwloc_uint64_t *values, 0402 unsigned long flags); 0403 0404 /** \brief Flags for adding a new distances to a topology. */ 0405 enum hwloc_distances_add_flag_e { 0406 /** \brief Try to group objects based on the newly provided distance information. 0407 * Grouping is only performed when the distances structure contains latencies, 0408 * and when all objects are of the same type. 0409 * \hideinitializer 0410 */ 0411 HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0), 0412 /** \brief If grouping, consider the distance values as inaccurate and relax the 0413 * comparisons during the grouping algorithms. The actual accuracy may be modified 0414 * through the HWLOC_GROUPING_ACCURACY environment variable (see \ref envvar). 0415 * \hideinitializer 0416 */ 0417 HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1) 0418 }; 0419 0420 /** \brief Commit a new distances structure. 0421 * 0422 * This function finalizes the distances structure and inserts in it the topology. 0423 * 0424 * Parameter \p handle was previously returned by hwloc_distances_add_create(). 0425 * Then objects and values were specified with hwloc_distances_add_values(). 0426 * 0427 * \p flags configures the behavior of the function using an optional OR'ed set of 0428 * ::hwloc_distances_add_flag_e. 0429 * It may be used to request the grouping of existing objects based on distances. 0430 * 0431 * On error, the temporary distances structure and its content are destroyed. 0432 * 0433 * \return 0 on success. 0434 * \return -1 on error. 0435 */ 0436 HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology, 0437 hwloc_distances_add_handle_t handle, 0438 unsigned long flags); 0439 0440 /** @} */ 0441 0442 0443 0444 /** \defgroup hwlocality_distances_remove Remove distances between objects 0445 * @{ 0446 */ 0447 0448 /** \brief Remove all distance matrices from a topology. 0449 * 0450 * Remove all distance matrices, either provided by the user or 0451 * gathered through the OS. 0452 * 0453 * If these distances were used to group objects, these additional 0454 * Group objects are not removed from the topology. 0455 * 0456 * \return 0 on success, -1 on error. 0457 */ 0458 HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); 0459 0460 /** \brief Remove distance matrices for objects at a specific depth in the topology. 0461 * 0462 * Identical to hwloc_distances_remove() but only applies to one level of the topology. 0463 * 0464 * \return 0 on success, -1 on error. 0465 */ 0466 HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth); 0467 0468 /** \brief Remove distance matrices for objects of a specific type in the topology. 0469 * 0470 * Identical to hwloc_distances_remove() but only applies to one level of the topology. 0471 * 0472 * \return 0 on success, -1 on error. 0473 */ 0474 static __hwloc_inline int 0475 hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) 0476 { 0477 int depth = hwloc_get_type_depth(topology, type); 0478 if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) 0479 return 0; 0480 return hwloc_distances_remove_by_depth(topology, depth); 0481 } 0482 0483 /** \brief Release and remove the given distance matrice from the topology. 0484 * 0485 * This function includes a call to hwloc_distances_release(). 0486 * 0487 * \return 0 on success, -1 on error. 0488 */ 0489 HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances); 0490 0491 /** @} */ 0492 0493 0494 #ifdef __cplusplus 0495 } /* extern "C" */ 0496 #endif 0497 0498 0499 #endif /* HWLOC_DISTANCES_H */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |