llvm/Support/Threading.h

0001 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 //
0009 // This file declares helper functions for running LLVM in a multi-threaded
0010 // environment.
0011 //
0012 //===----------------------------------------------------------------------===//
0013
0014 #ifndef LLVM_SUPPORT_THREADING_H
0015 #define LLVM_SUPPORT_THREADING_H
0016
0017 #include "llvm/ADT/BitVector.h"
0018 #include "llvm/ADT/StringRef.h"
0019 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
0020 #include "llvm/Support/Compiler.h"
0021 #include <optional>
0022
0023 #if defined(_MSC_VER)
0024 // MSVC's call_once implementation worked since VS 2015, which is the minimum
0025 // supported version as of this writing.
0026 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
0027 #elif defined(LLVM_ON_UNIX) &&                                                 \
0028     (defined(_LIBCPP_VERSION) ||                                               \
0029      !(defined(__NetBSD__) || defined(__OpenBSD__) || defined(__powerpc__)))
0030 // std::call_once from libc++ is used on all Unix platforms. Other
0031 // implementations like libstdc++ are known to have problems on NetBSD,
0032 // OpenBSD and PowerPC.
0033 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
0034 #elif defined(LLVM_ON_UNIX) &&                                                 \
0035     (defined(__powerpc__) && defined(__LITTLE_ENDIAN__))
0036 #define LLVM_THREADING_USE_STD_CALL_ONCE 1
0037 #else
0038 #define LLVM_THREADING_USE_STD_CALL_ONCE 0
0039 #endif
0040
0041 #if LLVM_THREADING_USE_STD_CALL_ONCE
0042 #include <mutex>
0043 #else
0044 #include "llvm/Support/Atomic.h"
0045 #endif
0046
0047 namespace llvm {
0048 class Twine;
0049
0050 /// Returns true if LLVM is compiled with support for multi-threading, and
0051 /// false otherwise.
0052 constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
0053
0054 #if LLVM_THREADING_USE_STD_CALL_ONCE
0055
0056   typedef std::once_flag once_flag;
0057
0058 #else
0059
0060   enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 };
0061
0062   /// The llvm::once_flag structure
0063   ///
0064   /// This type is modeled after std::once_flag to use with llvm::call_once.
0065   /// This structure must be used as an opaque object. It is a struct to force
0066   /// autoinitialization and behave like std::once_flag.
0067   struct once_flag {
0068     volatile sys::cas_flag status = Uninitialized;
0069   };
0070
0071 #endif
0072
0073   /// Execute the function specified as a parameter once.
0074   ///
0075   /// Typical usage:
0076   /// \code
0077   ///   void foo() {...};
0078   ///   ...
0079   ///   static once_flag flag;
0080   ///   call_once(flag, foo);
0081   /// \endcode
0082   ///
0083   /// \param flag Flag used for tracking whether or not this has run.
0084   /// \param F Function to call once.
0085   template <typename Function, typename... Args>
0086   void call_once(once_flag &flag, Function &&F, Args &&... ArgList) {
0087 #if LLVM_THREADING_USE_STD_CALL_ONCE
0088     std::call_once(flag, std::forward<Function>(F),
0089                    std::forward<Args>(ArgList)...);
0090 #else
0091     // For other platforms we use a generic (if brittle) version based on our
0092     // atomics.
0093     sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized);
0094     if (old_val == Uninitialized) {
0095       std::forward<Function>(F)(std::forward<Args>(ArgList)...);
0096       sys::MemoryFence();
0097       TsanIgnoreWritesBegin();
0098       TsanHappensBefore(&flag.status);
0099       flag.status = Done;
0100       TsanIgnoreWritesEnd();
0101     } else {
0102       // Wait until any thread doing the call has finished.
0103       sys::cas_flag tmp = flag.status;
0104       sys::MemoryFence();
0105       while (tmp != Done) {
0106         tmp = flag.status;
0107         sys::MemoryFence();
0108       }
0109     }
0110     TsanHappensAfter(&flag.status);
0111 #endif
0112   }
0113
0114   /// This tells how a thread pool will be used
0115   class ThreadPoolStrategy {
0116   public:
0117     // The default value (0) means all available threads should be used,
0118     // taking the affinity mask into account. If set, this value only represents
0119     // a suggested high bound, the runtime might choose a lower value (not
0120     // higher).
0121     unsigned ThreadsRequested = 0;
0122
0123     // If SMT is active, use hyper threads. If false, there will be only one
0124     // std::thread per core.
0125     bool UseHyperThreads = true;
0126
0127     // If set, will constrain 'ThreadsRequested' to the number of hardware
0128     // threads, or hardware cores.
0129     bool Limit = false;
0130
0131     /// Retrieves the max available threads for the current strategy. This
0132     /// accounts for affinity masks and takes advantage of all CPU sockets.
0133     unsigned compute_thread_count() const;
0134
0135     /// Assign the current thread to an ideal hardware CPU or NUMA node. In a
0136     /// multi-socket system, this ensures threads are assigned to all CPU
0137     /// sockets. \p ThreadPoolNum represents a number bounded by [0,
0138     /// compute_thread_count()).
0139     void apply_thread_strategy(unsigned ThreadPoolNum) const;
0140
0141     /// Finds the CPU socket where a thread should go. Returns 'std::nullopt' if
0142     /// the thread shall remain on the actual CPU socket.
0143     std::optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const;
0144   };
0145
0146   /// Build a strategy from a number of threads as a string provided in \p Num.
0147   /// When Num is above the max number of threads specified by the \p Default
0148   /// strategy, we attempt to equally allocate the threads on all CPU sockets.
0149   /// "0" or an empty string will return the \p Default strategy.
0150   /// "all" for using all hardware threads.
0151   std::optional<ThreadPoolStrategy>
0152   get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {});
0153
0154   /// Returns a thread strategy for tasks requiring significant memory or other
0155   /// resources. To be used for workloads where hardware_concurrency() proves to
0156   /// be less efficient. Avoid this strategy if doing lots of I/O. Currently
0157   /// based on physical cores, if available for the host system, otherwise falls
0158   /// back to hardware_concurrency(). Returns 1 when LLVM is configured with
0159   /// LLVM_ENABLE_THREADS = OFF.
0160   inline ThreadPoolStrategy
0161   heavyweight_hardware_concurrency(unsigned ThreadCount = 0) {
0162     ThreadPoolStrategy S;
0163     S.UseHyperThreads = false;
0164     S.ThreadsRequested = ThreadCount;
0165     return S;
0166   }
0167
0168   /// Like heavyweight_hardware_concurrency() above, but builds a strategy
0169   /// based on the rules described for get_threadpool_strategy().
0170   /// If \p Num is invalid, returns a default strategy where one thread per
0171   /// hardware core is used.
0172   inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) {
0173     std::optional<ThreadPoolStrategy> S =
0174         get_threadpool_strategy(Num, heavyweight_hardware_concurrency());
0175     if (S)
0176       return *S;
0177     return heavyweight_hardware_concurrency();
0178   }
0179
0180   /// Returns a default thread strategy where all available hardware resources
0181   /// are to be used, except for those initially excluded by an affinity mask.
0182   /// This function takes affinity into consideration. Returns 1 when LLVM is
0183   /// configured with LLVM_ENABLE_THREADS=OFF.
0184   inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) {
0185     ThreadPoolStrategy S;
0186     S.ThreadsRequested = ThreadCount;
0187     return S;
0188   }
0189
0190   /// Like hardware_concurrency() above, but builds a strategy
0191   /// based on the rules described for get_threadpool_strategy().
0192   /// If \p Num is invalid, returns a default strategy where one thread per
0193   /// hardware core is used.
0194   inline ThreadPoolStrategy hardware_concurrency(StringRef Num) {
0195     std::optional<ThreadPoolStrategy> S =
0196         get_threadpool_strategy(Num, hardware_concurrency());
0197     if (S)
0198       return *S;
0199     return hardware_concurrency();
0200   }
0201
0202   /// Returns an optimal thread strategy to execute specified amount of tasks.
0203   /// This strategy should prevent us from creating too many threads if we
0204   /// occasionaly have an unexpectedly small amount of tasks.
0205   inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) {
0206     ThreadPoolStrategy S;
0207     S.Limit = true;
0208     S.ThreadsRequested = TaskCount;
0209     return S;
0210   }
0211
0212   /// Return the current thread id, as used in various OS system calls.
0213   /// Note that not all platforms guarantee that the value returned will be
0214   /// unique across the entire system, so portable code should not assume
0215   /// this.
0216   uint64_t get_threadid();
0217
0218   /// Get the maximum length of a thread name on this platform.
0219   /// A value of 0 means there is no limit.
0220   uint32_t get_max_thread_name_length();
0221
0222   /// Set the name of the current thread.  Setting a thread's name can
0223   /// be helpful for enabling useful diagnostics under a debugger or when
0224   /// logging.  The level of support for setting a thread's name varies
0225   /// wildly across operating systems, and we only make a best effort to
0226   /// perform the operation on supported platforms.  No indication of success
0227   /// or failure is returned.
0228   void set_thread_name(const Twine &Name);
0229
0230   /// Get the name of the current thread.  The level of support for
0231   /// getting a thread's name varies wildly across operating systems, and it
0232   /// is not even guaranteed that if you can successfully set a thread's name
0233   /// that you can later get it back.  This function is intended for diagnostic
0234   /// purposes, and as with setting a thread's name no indication of whether
0235   /// the operation succeeded or failed is returned.
0236   void get_thread_name(SmallVectorImpl<char> &Name);
0237
0238   /// Returns a mask that represents on which hardware thread, core, CPU, NUMA
0239   /// group, the calling thread can be executed. On Windows, threads cannot
0240   /// cross CPU sockets boundaries.
0241   llvm::BitVector get_thread_affinity_mask();
0242
0243   /// Returns how many physical CPUs or NUMA groups the system has.
0244   unsigned get_cpus();
0245
0246   /// Returns how many physical cores (as opposed to logical cores returned from
0247   /// thread::hardware_concurrency(), which includes hyperthreads).
0248   /// Returns -1 if unknown for the current host system.
0249   int get_physical_cores();
0250
0251   enum class ThreadPriority {
0252     /// Lower the current thread's priority as much as possible. Can be used
0253     /// for long-running tasks that are not time critical; more energy-
0254     /// efficient than Low.
0255     Background = 0,
0256
0257     /// Lower the current thread's priority such that it does not affect
0258     /// foreground tasks significantly. This is a good default for long-
0259     /// running, latency-insensitive tasks to make sure cpu is not hogged
0260     /// by this task.
0261     Low = 1,
0262
0263     /// Restore the current thread's priority to default scheduling priority.
0264     Default = 2,
0265   };
0266   enum class SetThreadPriorityResult { FAILURE, SUCCESS };
0267   SetThreadPriorityResult set_thread_priority(ThreadPriority Priority);
0268 }
0269
0270 #endif