backend/cuda/Backend.h

0001 /// \file cuda/Backend.h
0002 /// \author Johannes de Fine Licht (johannes.definelicht@cern.ch)
0003
0004 #ifndef VECGEOM_BACKEND_CUDABACKEND_H_
0005 #define VECGEOM_BACKEND_CUDABACKEND_H_
0006
0007 #include "VecGeom/base/Config.h"
0008 #include "VecGeom/base/Global.h"
0009
0010 #include "VecGeom/backend/scalar/Backend.h"
0011 #include "VecGeom/backend/cuda/Interface.h"
0012
0013 namespace vecgeom {
0014 #ifdef VECCORE_CUDA
0015 inline
0016 #endif
0017     namespace cuda {
0018
0019 struct kCuda {
0020   typedef int int_v;
0021   typedef Precision precision_v;
0022   typedef bool bool_v;
0023   typedef Inside_t inside_v;
0024   static constexpr precision_v kOne  = 1.0;
0025   static constexpr precision_v kZero = 0.0;
0026   const static bool_v kTrue          = true;
0027   const static bool_v kFalse         = false;
0028   // alternative typedefs ( might supercede above typedefs )
0029   typedef int Int_t;
0030   typedef Precision Double_t;
0031   typedef bool Bool_t;
0032   typedef int Index_t;
0033 };
0034
0035 typedef kCuda::int_v CudaInt;
0036 typedef kCuda::precision_v CudaPrecision;
0037 typedef kCuda::bool_v CudaBool;
0038
0039 #if defined(VECGEOM_ENABLE_CUDA) && !defined(VECGEOM_BACKEND_TYPE)
0040 constexpr size_t kVectorSize = 1;
0041 #define VECGEOM_BACKEND_TYPE vecgeom::kScalar
0042 #define VECGEOM_BACKEND_PRECISION_FROM_PTR(P) (*(P))
0043 #define VECGEOM_BACKEND_PRECISION_TYPE Precision
0044 #define VECGEOM_BACKEND_PRECISION_TYPE_SIZE 1
0045 //#define VECGEOM_BACKEND_PRECISION_NOT_SCALAR
0046 #define VECGEOM_BACKEND_BOOL vecgeom::ScalarBool
0047 #define VECGEOM_BACKEND_INSIDE vecgeom::kScalar::inside_v
0048 #endif
0049
0050 static const unsigned kThreadsPerBlock = 256;
0051
0052 // Auxiliary GPU functions
0053 #ifdef VECCORE_CUDA
0054
0055 VECCORE_ATT_DEVICE
0056 VECGEOM_FORCE_INLINE
0057 int ThreadIndex()
0058 {
0059   return blockDim.x * blockIdx.x + threadIdx.x;
0060 }
0061
0062 VECCORE_ATT_DEVICE
0063 VECGEOM_FORCE_INLINE
0064 int ThreadOffset()
0065 {
0066   return blockDim.x * gridDim.x;
0067 }
0068
0069 #endif
0070
0071 /**
0072  * Initialize with the number of threads required to construct the necessary
0073  * block and grid dimensions to accommodate all threads.
0074  */
0075 struct LaunchParameters {
0076   dim3 block_size;
0077   dim3 grid_size;
0078   LaunchParameters(const unsigned threads)
0079   {
0080     // Blocks always one dimensional
0081     block_size.x                                 = kThreadsPerBlock;
0082     if (threads < kThreadsPerBlock) block_size.x = threads;
0083     block_size.y                                 = 1;
0084     block_size.z                                 = 1;
0085     // Grid becomes two dimensional at large sizes
0086     const unsigned blocks = 1 + (threads - 1) / kThreadsPerBlock;
0087     grid_size.z           = 1;
0088     if (blocks <= 1 << 16) {
0089       grid_size.x = blocks;
0090       grid_size.y = 1;
0091     } else {
0092       int dim     = static_cast<int>(sqrt(static_cast<double>(blocks)) + 0.5);
0093       grid_size.x = dim;
0094       grid_size.y = dim;
0095     }
0096   }
0097 };
0098 }
0099 } // End global namespace
0100
0101 #endif // VECGEOM_BACKEND_CUDABACKEND_H_