Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 10:01:15

0001 /*
0002  * Copyright © 2010-2024 Inria.  All rights reserved.
0003  * See COPYING in top-level directory.
0004  */
0005 
0006 /** \file
0007  * \brief Object distances.
0008  */
0009 
0010 #ifndef HWLOC_DISTANCES_H
0011 #define HWLOC_DISTANCES_H
0012 
0013 #ifndef HWLOC_H
0014 #error Please include the main hwloc.h instead
0015 #endif
0016 
0017 
0018 #ifdef __cplusplus
0019 extern "C" {
0020 #elif 0
0021 }
0022 #endif
0023 
0024 
0025 /** \defgroup hwlocality_distances_get Retrieve distances between objects
0026  * @{
0027  */
0028 
0029 /** \brief Matrix of distances between a set of objects.
0030  *
0031  * The most common matrix contains latencies between NUMA nodes
0032  * (as reported in the System Locality Distance Information Table (SLIT)
0033  * in the ACPI specification), which may or may not be physically accurate.
0034  * It corresponds to the latency for accessing the memory of one node
0035  * from a core in another node.
0036  * The corresponding kind is ::HWLOC_DISTANCES_KIND_MEANS_LATENCY | ::HWLOC_DISTANCES_KIND_FROM_USER.
0037  * The name of this distances structure is "NUMALatency".
0038  *
0039  * The matrix may also contain bandwidths between random sets of objects,
0040  * possibly provided by the user, as specified in the \p kind attribute.
0041  * Others common distance structures include and "XGMIBandwidth", "XGMIHops",
0042  * "XeLinkBandwidth" and "NVLinkBandwidth".
0043  *
0044  * Pointers \p objs and \p values should not be replaced, reallocated, freed, etc.
0045  * However callers are allowed to modify \p kind as well as the contents
0046  * of \p objs and \p values arrays.
0047  * For instance, if there is a single NUMA node per Package,
0048  * hwloc_get_obj_with_same_locality() may be used to convert between them
0049  * and replace NUMA nodes in the \p objs array with the corresponding Packages.
0050  * See also hwloc_distances_transform() for applying some transformations
0051  * to the structure.
0052  */
0053 struct hwloc_distances_s {
0054   unsigned nbobjs;      /**< \brief Number of objects described by the distance matrix. */
0055   hwloc_obj_t *objs;        /**< \brief Array of objects described by the distance matrix.
0056                  * These objects are not in any particular order,
0057                  * see hwloc_distances_obj_index() and hwloc_distances_obj_pair_values()
0058                  * for easy ways to find objects in this array and their corresponding values.
0059                  */
0060   unsigned long kind;       /**< \brief OR'ed set of ::hwloc_distances_kind_e. */
0061   hwloc_uint64_t *values;   /**< \brief Matrix of distances between objects, stored as a one-dimension array.
0062                  *
0063                  * Distance from i-th to j-th object is stored in slot i*nbobjs+j.
0064                  * The meaning of the value depends on the \p kind attribute.
0065                  */
0066 };
0067 
0068 /** \brief Kinds of distance matrices.
0069  *
0070  * The \p kind attribute of struct hwloc_distances_s is a OR'ed set
0071  * of kinds.
0072  *
0073  * Each distance matrix may have only one kind among HWLOC_DISTANCES_KIND_FROM_*
0074  * specifying where distance information comes from,
0075  * and one kind among HWLOC_DISTANCES_KIND_MEANS_* specifying
0076  * whether values are latencies or bandwidths.
0077  */
0078 enum hwloc_distances_kind_e {
0079   /** \brief These distances were obtained from the operating system or hardware.
0080    * \hideinitializer
0081    */
0082   HWLOC_DISTANCES_KIND_FROM_OS = (1UL<<0),
0083   /** \brief These distances were provided by the user.
0084    * \hideinitializer
0085    */
0086   HWLOC_DISTANCES_KIND_FROM_USER = (1UL<<1),
0087 
0088   /** \brief Distance values are similar to latencies between objects.
0089    * Values are smaller for closer objects, hence minimal on the diagonal
0090    * of the matrix (distance between an object and itself).
0091    * It could also be the number of network hops between objects, etc.
0092    * \hideinitializer
0093    */
0094   HWLOC_DISTANCES_KIND_MEANS_LATENCY = (1UL<<2),
0095   /** \brief Distance values are similar to bandwidths between objects.
0096    * Values are higher for closer objects, hence maximal on the diagonal
0097    * of the matrix (distance between an object and itself).
0098    * Such values are currently ignored for distance-based grouping.
0099    * \hideinitializer
0100    */
0101   HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3),
0102 
0103   /** \brief This distances structure covers objects of different types.
0104    * This may apply to the "NVLinkBandwidth" structure in presence
0105    * of a NVSwitch or POWER processor NVLink port.
0106    * \hideinitializer
0107    */
0108   HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4)
0109 };
0110 
0111 /** \brief Retrieve distance matrices.
0112  *
0113  * Retrieve distance matrices from the topology into the \p distances array.
0114  *
0115  * \p flags is currently unused, should be \c 0.
0116  *
0117  * \p kind serves as a filter. If \c 0, all distance matrices are returned.
0118  * If it contains some HWLOC_DISTANCES_KIND_FROM_*, only distance matrices
0119  * whose kind matches one of these are returned.
0120  * If it contains some HWLOC_DISTANCES_KIND_MEANS_*, only distance matrices
0121  * whose kind matches one of these are returned.
0122  *
0123  * On input, \p nr points to the number of distance matrices that may be stored
0124  * in \p distances.
0125  * On output, \p nr points to the number of distance matrices that were actually
0126  * found, even if some of them couldn't be stored in \p distances.
0127  * Distance matrices that couldn't be stored are ignored, but the function still
0128  * returns success (\c 0). The caller may find out by comparing the value pointed
0129  * by \p nr before and after the function call.
0130  *
0131  * Each distance matrix returned in the \p distances array should be released
0132  * by the caller using hwloc_distances_release().
0133  *
0134  * \return 0 on success, -1 on error.
0135  */
0136 HWLOC_DECLSPEC int
0137 hwloc_distances_get(hwloc_topology_t topology,
0138             unsigned *nr, struct hwloc_distances_s **distances,
0139             unsigned long kind, unsigned long flags);
0140 
0141 /** \brief Retrieve distance matrices for object at a specific depth in the topology.
0142  *
0143  * Identical to hwloc_distances_get() with the additional \p depth filter.
0144  *
0145  * \return 0 on success, -1 on error.
0146  */
0147 HWLOC_DECLSPEC int
0148 hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
0149                  unsigned *nr, struct hwloc_distances_s **distances,
0150                  unsigned long kind, unsigned long flags);
0151 
0152 /** \brief Retrieve distance matrices for object of a specific type.
0153  *
0154  * Identical to hwloc_distances_get() with the additional \p type filter.
0155  *
0156  * \return 0 on success, -1 on error.
0157  */
0158 HWLOC_DECLSPEC int
0159 hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
0160                 unsigned *nr, struct hwloc_distances_s **distances,
0161                 unsigned long kind, unsigned long flags);
0162 
0163 /** \brief Retrieve a distance matrix with the given name.
0164  *
0165  * Usually only one distances structure may match a given name.
0166  *
0167  * The name of the most common structure is "NUMALatency".
0168  * Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth",
0169  * and "NVLinkBandwidth".
0170  *
0171  * \return 0 on success, -1 on error.
0172  */
0173 HWLOC_DECLSPEC int
0174 hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
0175                 unsigned *nr, struct hwloc_distances_s **distances,
0176                 unsigned long flags);
0177 
0178 /** \brief Get a description of what a distances structure contains.
0179  *
0180  * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
0181  * or \c NULL if unknown.
0182  *
0183  * \return the constant string with the name of the distance structure.
0184  *
0185  * \note The returned name should not be freed by the caller,
0186  * it belongs to the hwloc library.
0187  */
0188 HWLOC_DECLSPEC const char *
0189 hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
0190 
0191 /** \brief Release a distance matrix structure previously returned by hwloc_distances_get().
0192  *
0193  * \note This function is not required if the structure is removed with hwloc_distances_release_remove().
0194  */
0195 HWLOC_DECLSPEC void
0196 hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances);
0197 
0198 /** \brief Transformations of distances structures. */
0199 enum hwloc_distances_transform_e {
0200   /** \brief Remove \c NULL objects from the distances structure.
0201    *
0202    * Every object that was replaced with \c NULL in the \p objs array
0203    * is removed and the \p values array is updated accordingly.
0204    *
0205    * At least \c 2 objects must remain, otherwise hwloc_distances_transform()
0206    * will return \c -1 with \p errno set to \c EINVAL.
0207    *
0208    * \p kind will be updated with or without ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES
0209    * according to the remaining objects.
0210    *
0211    * \hideinitializer
0212    */
0213   HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL = 0,
0214 
0215   /** \brief Replace bandwidth values with a number of links.
0216    *
0217    * Usually all values will be either \c 0 (no link) or \c 1 (one link).
0218    * However some matrices could get larger values if some pairs of
0219    * peers are connected by different numbers of links.
0220    *
0221    * Values on the diagonal are set to \c 0.
0222    *
0223    * This transformation only applies to bandwidth matrices.
0224    *
0225    * \hideinitializer
0226    */
0227   HWLOC_DISTANCES_TRANSFORM_LINKS = 1,
0228 
0229   /** \brief Merge switches with multiple ports into a single object.
0230    * This currently only applies to NVSwitches where GPUs seem connected to different
0231    * separate switch ports in the NVLinkBandwidth matrix. This transformation will
0232    * replace all of them with the same port connected to all GPUs.
0233    * Other ports are removed by applying ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
0234    * \hideinitializer
0235    */
0236   HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2,
0237 
0238   /** \brief Apply a transitive closure to the matrix to connect objects across switches.
0239    * This currently only applies to GPUs and NVSwitches in the NVLinkBandwidth matrix.
0240    * All pairs of GPUs will be reported as directly connected.
0241    * \hideinitializer
0242    */
0243   HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3
0244 };
0245 
0246 /** \brief Apply a transformation to a distances structure.
0247  *
0248  * Modify a distances structure that was previously obtained with
0249  * hwloc_distances_get() or one of its variants.
0250  *
0251  * This modifies the local copy of the distances structures but does
0252  * not modify the distances information stored inside the topology
0253  * (retrieved by another call to hwloc_distances_get() or exported to XML).
0254  * To do so, one should add a new distances structure with same
0255  * name, kind, objects and values (see \ref hwlocality_distances_add)
0256  * and then remove this old one with hwloc_distances_release_remove().
0257  *
0258  * \p transform must be one of the transformations listed
0259  * in ::hwloc_distances_transform_e.
0260  *
0261  * These transformations may modify the contents of the \p objs or \p values arrays.
0262  *
0263  * \p transform_attr must be \c NULL for now.
0264  *
0265  * \p flags must be \c 0 for now.
0266  *
0267  * \return 0 on success, -1 on error for instance if flags are invalid.
0268  *
0269  * \note Objects in distances array \p objs may be directly modified
0270  * in place without using hwloc_distances_transform().
0271  * One may use hwloc_get_obj_with_same_locality() to easily convert
0272  * between similar objects of different types.
0273  */
0274 HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct hwloc_distances_s *distances,
0275                                              enum hwloc_distances_transform_e transform,
0276                                              void *transform_attr,
0277                                              unsigned long flags);
0278 
0279 /** @} */
0280 
0281 
0282 
0283 /** \defgroup hwlocality_distances_consult Helpers for consulting distance matrices
0284  * @{
0285  */
0286 
0287 /** \brief Find the index of an object in a distances structure.
0288  *
0289  * \return the index of the object in the distances structure if any.
0290  * \return -1 if object \p obj is not involved in structure \p distances.
0291  */
0292 static __hwloc_inline int
0293 hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj)
0294 {
0295   unsigned i;
0296   for(i=0; i<distances->nbobjs; i++)
0297     if (distances->objs[i] == obj)
0298       return (int)i;
0299   return -1;
0300 }
0301 
0302 /** \brief Find the values between two objects in a distance matrices.
0303  *
0304  * The distance from \p obj1 to \p obj2 is stored in the value pointed by
0305  * \p value1to2 and reciprocally.
0306  *
0307  * \return 0 on success.
0308  * \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances.
0309  */
0310 static __hwloc_inline int
0311 hwloc_distances_obj_pair_values(struct hwloc_distances_s *distances,
0312                 hwloc_obj_t obj1, hwloc_obj_t obj2,
0313                 hwloc_uint64_t *value1to2, hwloc_uint64_t *value2to1)
0314 {
0315   int i1 = hwloc_distances_obj_index(distances, obj1);
0316   int i2 = hwloc_distances_obj_index(distances, obj2);
0317   if (i1 < 0 || i2 < 0)
0318     return -1;
0319   *value1to2 = distances->values[i1 * distances->nbobjs + i2];
0320   *value2to1 = distances->values[i2 * distances->nbobjs + i1];
0321   return 0;
0322 }
0323 
0324 /** @} */
0325 
0326 
0327 
0328 /** \defgroup hwlocality_distances_add Add distances between objects
0329  *
0330  * The usual way to add distances is:
0331  * \code
0332  * hwloc_distances_add_handle_t handle;
0333  * int err = -1;
0334  * handle = hwloc_distances_add_create(topology, "name", kind, 0);
0335  * if (handle) {
0336  *   err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0);
0337  *   if (!err)
0338  *     err = hwloc_distances_add_commit(topology, handle, flags);
0339  * }
0340  * \endcode
0341  * If \p err is \c 0 at the end, then addition was successful.
0342  *
0343  * @{
0344  */
0345 
0346 /** \brief Handle to a new distances structure during its addition to the topology. */
0347 typedef void * hwloc_distances_add_handle_t;
0348 
0349 /** \brief Create a new empty distances structure.
0350  *
0351  * Create an empty distances structure
0352  * to be filled with hwloc_distances_add_values()
0353  * and then committed with hwloc_distances_add_commit().
0354  *
0355  * Parameter \p name is optional, it may be \c NULL.
0356  * Otherwise, it will be copied internally and may later be freed by the caller.
0357  *
0358  * \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
0359  * Only one kind of meaning and one kind of provenance may be given if appropriate
0360  * (e.g. ::HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH and ::HWLOC_DISTANCES_KIND_FROM_USER).
0361  * Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically set
0362  * according to objects having different types in hwloc_distances_add_values().
0363  *
0364  * \p flags must be \c 0 for now.
0365  *
0366  * \return A hwloc_distances_add_handle_t that should then be passed
0367  * to hwloc_distances_add_values() and hwloc_distances_add_commit().
0368  *
0369  * \return \c NULL on error.
0370  */
0371 HWLOC_DECLSPEC hwloc_distances_add_handle_t
0372 hwloc_distances_add_create(hwloc_topology_t topology,
0373                            const char *name, unsigned long kind,
0374                            unsigned long flags);
0375 
0376 /** \brief Specify the objects and values in a new empty distances structure.
0377  *
0378  * Specify the objects and values for a new distances structure
0379  * that was returned as a handle by hwloc_distances_add_create().
0380  * The structure must then be committed with hwloc_distances_add_commit().
0381  *
0382  * The number of objects is \p nbobjs and the array of objects is \p objs.
0383  * Distance values are stored as a one-dimension array in \p values.
0384  * The distance from object i to object j is in slot i*nbobjs+j.
0385  *
0386  * \p nbobjs must be at least 2.
0387  *
0388  * Arrays \p objs and \p values will be copied internally,
0389  * they may later be freed by the caller.
0390  *
0391  * On error, the temporary distances structure and its content are destroyed.
0392  *
0393  * \p flags must be \c 0 for now.
0394  *
0395  * \return 0 on success.
0396  * \return -1 on error.
0397  */
0398 HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology,
0399                                               hwloc_distances_add_handle_t handle,
0400                                               unsigned nbobjs, hwloc_obj_t *objs,
0401                                               hwloc_uint64_t *values,
0402                                               unsigned long flags);
0403 
0404 /** \brief Flags for adding a new distances to a topology. */
0405 enum hwloc_distances_add_flag_e {
0406   /** \brief Try to group objects based on the newly provided distance information.
0407    * Grouping is only performed when the distances structure contains latencies,
0408    * and when all objects are of the same type.
0409    * \hideinitializer
0410    */
0411   HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0),
0412   /** \brief If grouping, consider the distance values as inaccurate and relax the
0413    * comparisons during the grouping algorithms. The actual accuracy may be modified
0414    * through the HWLOC_GROUPING_ACCURACY environment variable (see \ref envvar).
0415    * \hideinitializer
0416    */
0417   HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1)
0418 };
0419 
0420 /** \brief Commit a new distances structure.
0421  *
0422  * This function finalizes the distances structure and inserts in it the topology.
0423  *
0424  * Parameter \p handle was previously returned by hwloc_distances_add_create().
0425  * Then objects and values were specified with hwloc_distances_add_values().
0426  *
0427  * \p flags configures the behavior of the function using an optional OR'ed set of
0428  * ::hwloc_distances_add_flag_e.
0429  * It may be used to request the grouping of existing objects based on distances.
0430  *
0431  * On error, the temporary distances structure and its content are destroyed.
0432  *
0433  * \return 0 on success.
0434  * \return -1 on error.
0435  */
0436 HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
0437                                               hwloc_distances_add_handle_t handle,
0438                                               unsigned long flags);
0439 
0440 /** @} */
0441 
0442 
0443 
0444 /** \defgroup hwlocality_distances_remove Remove distances between objects
0445  * @{
0446  */
0447 
0448 /** \brief Remove all distance matrices from a topology.
0449  *
0450  * Remove all distance matrices, either provided by the user or
0451  * gathered through the OS.
0452  *
0453  * If these distances were used to group objects, these additional
0454  * Group objects are not removed from the topology.
0455  *
0456  * \return 0 on success, -1 on error.
0457  */
0458 HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
0459 
0460 /** \brief Remove distance matrices for objects at a specific depth in the topology.
0461  *
0462  * Identical to hwloc_distances_remove() but only applies to one level of the topology.
0463  *
0464  * \return 0 on success, -1 on error.
0465  */
0466 HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth);
0467 
0468 /** \brief Remove distance matrices for objects of a specific type in the topology.
0469  *
0470  * Identical to hwloc_distances_remove() but only applies to one level of the topology.
0471  *
0472  * \return 0 on success, -1 on error.
0473  */
0474 static __hwloc_inline int
0475 hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
0476 {
0477   int depth = hwloc_get_type_depth(topology, type);
0478   if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE)
0479     return 0;
0480   return hwloc_distances_remove_by_depth(topology, depth);
0481 }
0482 
0483 /** \brief Release and remove the given distance matrice from the topology.
0484  *
0485  * This function includes a call to hwloc_distances_release().
0486  *
0487  * \return 0 on success, -1 on error.
0488  */
0489 HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);
0490 
0491 /** @} */
0492 
0493 
0494 #ifdef __cplusplus
0495 } /* extern "C" */
0496 #endif
0497 
0498 
0499 #endif /* HWLOC_DISTANCES_H */