Frontend/OpenMP/OMPGridValues.h

0001 //====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 ///
0009 /// \file
0010 /// \brief Provides definitions for Target specific Grid Values
0011 ///
0012 //===----------------------------------------------------------------------===//
0013
0014 #ifndef LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
0015 #define LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H
0016
0017 namespace llvm {
0018
0019 namespace omp {
0020
0021 /// \brief Defines various target-specific GPU grid values that must be
0022 ///        consistent between host RTL (plugin), device RTL, and clang.
0023 ///        We can change grid values for a "fat" binary so that different
0024 ///        passes get the correct values when generating code for a
0025 ///        multi-target binary. Both amdgcn and nvptx values are stored in
0026 ///        this file. In the future, should there be differences between GPUs
0027 ///        of the same architecture, then simply make a different array and
0028 ///        use the new array name.
0029 ///
0030 /// Example usage in clang:
0031 ///   const unsigned slot_size =
0032 ///   ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
0033 ///
0034 /// Example usage in libomptarget/deviceRTLs:
0035 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
0036 ///   #ifdef __AMDGPU__
0037 ///     #define GRIDVAL AMDGPUGridValues
0038 ///   #else
0039 ///     #define GRIDVAL NVPTXGridValues
0040 ///   #endif
0041 ///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
0042 ///   llvm::omp::GRIDVAL().GV_Warp_Size
0043 ///
0044 /// Example usage in libomptarget hsa plugin:
0045 ///   #include "llvm/Frontend/OpenMP/OMPGridValues.h"
0046 ///   #define GRIDVAL AMDGPUGridValues
0047 ///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
0048 ///   llvm::omp::GRIDVAL().GV_Warp_Size
0049 ///
0050 /// Example usage in libomptarget cuda plugin:
0051 ///    #include "llvm/Frontend/OpenMP/OMPGridValues.h"
0052 ///    #define GRIDVAL NVPTXGridValues
0053 ///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
0054 ///    llvm::omp::GRIDVAL().GV_Warp_Size
0055 ///
0056
0057 struct GV {
0058   /// The size reserved for data in a shared memory slot.
0059   unsigned GV_Slot_Size;
0060   /// The default value of maximum number of threads in a worker warp.
0061   unsigned GV_Warp_Size;
0062
0063   constexpr unsigned warpSlotSize() const {
0064     return GV_Warp_Size * GV_Slot_Size;
0065   }
0066
0067   /// the maximum number of teams.
0068   unsigned GV_Max_Teams;
0069   // The default number of teams in the absence of any other information.
0070   unsigned GV_Default_Num_Teams;
0071
0072   // An alternative to the heavy data sharing infrastructure that uses global
0073   // memory is one that uses device __shared__ memory.  The amount of such space
0074   // (in bytes) reserved by the OpenMP runtime is noted here.
0075   unsigned GV_SimpleBufferSize;
0076   // The absolute maximum team size for a working group
0077   unsigned GV_Max_WG_Size;
0078   // The default maximum team size for a working group
0079   unsigned GV_Default_WG_Size;
0080
0081   constexpr unsigned maxWarpNumber() const {
0082     return GV_Max_WG_Size / GV_Warp_Size;
0083   }
0084 };
0085
0086 /// For AMDGPU GPUs
0087 static constexpr GV AMDGPUGridValues64 = {
0088     256,       // GV_Slot_Size
0089     64,        // GV_Warp_Size
0090     (1 << 16), // GV_Max_Teams
0091     440,       // GV_Default_Num_Teams
0092     896,       // GV_SimpleBufferSize
0093     1024,      // GV_Max_WG_Size,
0094     256,       // GV_Default_WG_Size
0095 };
0096
0097 static constexpr GV AMDGPUGridValues32 = {
0098     256,       // GV_Slot_Size
0099     32,        // GV_Warp_Size
0100     (1 << 16), // GV_Max_Teams
0101     440,       // GV_Default_Num_Teams
0102     896,       // GV_SimpleBufferSize
0103     1024,      // GV_Max_WG_Size,
0104     256,       // GV_Default_WG_Size
0105 };
0106
0107 template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
0108   static_assert(wavesize == 32 || wavesize == 64, "Unexpected wavesize");
0109   return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
0110 }
0111
0112 /// For Nvidia GPUs
0113 static constexpr GV NVPTXGridValues = {
0114     256,       // GV_Slot_Size
0115     32,        // GV_Warp_Size
0116     (1 << 16), // GV_Max_Teams
0117     3200,      // GV_Default_Num_Teams
0118     896,       // GV_SimpleBufferSize
0119     1024,      // GV_Max_WG_Size
0120     128,       // GV_Default_WG_Size
0121 };
0122
0123 } // namespace omp
0124 } // namespace llvm
0125
0126 #endif // LLVM_FRONTEND_OPENMP_OMPGRIDVALUES_H