VecGeom/management/CudaManager.h

0001 /// \file CudaManager.h
0002 /// \author Johannes de Fine Licht (johannes.definelicht@cern.ch); Sandro Wenzel(sandro.wenzel@cern.ch)
0003
0004 #ifndef VECGEOM_MANAGEMENT_CUDAMANAGER_H_
0005 #define VECGEOM_MANAGEMENT_CUDAMANAGER_H_
0006
0007 #include "VecGeom/base/Cuda.h"
0008 #include "VecGeom/base/Global.h"
0009
0010 #include "VecGeom/base/Vector.h"
0011 #include "VecGeom/volumes/Box.h"
0012
0013 #ifdef VECGEOM_CUDA_INTERFACE
0014 #include "VecGeom/backend/cuda/Interface.h"
0015 #endif
0016
0017 #include <list>
0018 #include <map>
0019 #include <set>
0020
0021 // Compile for vecgeom namespace to work as interface
0022 namespace vecgeom {
0023
0024 VECGEOM_DEVICE_FORWARD_DECLARE(class VPlacedVolume;);
0025 VECGEOM_DEVICE_FORWARD_DECLARE(void CudaManagerPrintGeometry(vecgeom::cuda::VPlacedVolume const *const world););
0026 VECGEOM_DEVICE_FORWARD_DECLARE(void InitDeviceCompactPlacedVolBufferPtr(void *););
0027
0028 // we put some global data into a separate namespace
0029 // this is done since CUDA does not support static const members in class definitions
0030 namespace globaldevicegeomdata {
0031 inline VECCORE_ATT_DEVICE VPlacedVolume *gCompactPlacedVolBuffer = nullptr;
0032 inline VECCORE_ATT_DEVICE NavIndex_t *gNavIndex = nullptr; // address of navigation index table
0033 inline VECCORE_ATT_DEVICE int gMaxDepth = 0;
0034 } // namespace globaldevicegeomdata
0035
0036 #ifndef VECCORE_CUDA
0037 inline
0038 #endif
0039     namespace cxx {
0040
0041 #ifdef VECCORE_CUDA
0042 // Forward declarations for NVCC compilation
0043 class VUnplacedVolume;
0044 class VPlacedVolume;
0045 class LogicalVolume;
0046 class Transformation3D;
0047 template <typename Type>
0048 class Vector;
0049 #endif
0050
0051 class CudaManager {
0052
0053 private:
0054   bool synchronized_;
0055   int verbose_;
0056   int total_volumes_;
0057
0058   using Daughter_t        = VPlacedVolume const *;
0059   using CudaDaughter_t    = cuda::VPlacedVolume const *;
0060   using CudaDaughterPtr_t = DevicePtr<cuda::VPlacedVolume>;
0061
0062   std::set<VUnplacedVolume const *> unplaced_volumes_;
0063   std::set<LogicalVolume const *> logical_volumes_;
0064   std::set<VPlacedVolume const *> placed_volumes_;
0065   std::set<Transformation3D const *> transformations_;
0066   std::set<Vector<Daughter_t> *> daughters_;
0067
0068   typedef void const *CpuAddress;
0069   typedef DevicePtr<char> GpuAddress;
0070   typedef std::map<const CpuAddress, GpuAddress> MemoryMap;
0071   typedef std::map<GpuAddress, CpuAddress> PlacedVolumeMemoryMap;
0072   typedef std::map<GpuAddress, GpuAddress> GpuMemoryMap;
0073
0074   VPlacedVolume const *world_;
0075   DevicePtr<vecgeom::cuda::VPlacedVolume> world_gpu_;
0076   DevicePtr<vecgeom::cuda::VPlacedVolume> fPlacedVolumeBufferOnDevice;
0077   DevicePtr<NavIndex_t> fNavTableOnDevice;
0078
0079 private:
0080   /**
0081    * Contains a mapping between objects stored in host memory and pointers to
0082    * equivalent objects stored on the GPU. Stored GPU pointers are pointing to
0083    * allocated memory, but do not necessary have meaningful data stored at the
0084    * addresses yet.
0085    * \sa AllocateGeometry()
0086    * \sa CleanGpu()
0087    */
0088   MemoryMap memory_map_;
0089   GpuMemoryMap gpu_memory_map_;
0090   /**
0091    * inverse memory_map for fast GPU pointer to CPU conversion
0092    *
0093    */
0094   PlacedVolumeMemoryMap fGPUtoCPUmapForPlacedVolumes_;
0095
0096   std::list<GpuAddress> allocated_memory_;
0097
0098 public:
0099   /**
0100    * Retrieve singleton instance.
0101    */
0102   static CudaManager &Instance()
0103   {
0104     static CudaManager instance;
0105     return instance;
0106   }
0107
0108   VPlacedVolume const *world() const;
0109
0110   vecgeom::cuda::VPlacedVolume const *world_gpu() const;
0111
0112   /**
0113    * Stages a new geometry to be copied to the GPU.
0114    */
0115   void LoadGeometry(VPlacedVolume const *const volume);
0116
0117   void LoadGeometry();
0118
0119   /**
0120    * Synchronizes the loaded geometry to the GPU by allocating space,
0121    * creating new objects with correct pointers, then copying them to the GPU.
0122    * \return Pointer to top volume on the GPU.
0123    */
0124   DevicePtr<const vecgeom::cuda::VPlacedVolume> Synchronize();
0125
0126   /**
0127    * Deallocates all GPU pointers stored in the memory table.
0128    */
0129   void CleanGpu();
0130
0131   /**
0132    * Forget the geometry (to prepare for a new call to LoadGeomtry)
0133    */
0134    void Clear();
0135
0136   /**
0137    * Launch a CUDA kernel that recursively outputs the geometry loaded onto the
0138    * device.
0139    */
0140   void PrintGeometry() const;
0141
0142   // /**
0143   //  * Launch a CUDA kernel that will locate points in the geometry
0144   //  */
0145   // void LocatePoints(SOA3D<Precision> const &container, const int depth,
0146   //                   int *const output) const;
0147
0148   void set_verbose(const int verbose) { verbose_ = verbose; }
0149
0150   template <typename Type>
0151   GpuAddress Lookup(Type const *const key) const;
0152
0153   template <typename Type>
0154   GpuAddress Lookup(DevicePtr<Type> key) const;
0155
0156   DevicePtr<cuda::VUnplacedVolume> LookupUnplaced(VUnplacedVolume const *const host_ptr) const;
0157
0158   DevicePtr<cuda::LogicalVolume> LookupLogical(LogicalVolume const *const host_ptr) const;
0159
0160   DevicePtr<cuda::VPlacedVolume> LookupPlaced(VPlacedVolume const *const host_ptr) const;
0161   VPlacedVolume const *LookupPlacedCPUPtr(const void *address);
0162
0163   DevicePtr<cuda::Transformation3D> LookupTransformation(Transformation3D const *const host_ptr) const;
0164
0165   DevicePtr<cuda::Vector<CudaDaughter_t>> LookupDaughters(Vector<Daughter_t> *const host_ptr) const;
0166
0167   DevicePtr<CudaDaughter_t> LookupDaughterArray(Vector<Daughter_t> *const host_ptr) const;
0168
0169 private:
0170   CudaManager();
0171   CudaManager(CudaManager const &);
0172   CudaManager &operator=(CudaManager const &);
0173
0174   /**
0175    * Recursively scans placed volumes to retrieve all unique objects
0176    * for copying to the GPU.
0177    */
0178   void ScanGeometry(VPlacedVolume const *const volume);
0179
0180   /**
0181    * Allocates all objects retrieved by ScanGeometry() on the GPU, storing
0182    * pointers in the memory table for future reference.
0183    */
0184   void AllocateGeometry();
0185
0186   /**
0187    * Converts object pointers to void pointers so they can be used as lookup in
0188    * the memory table.
0189    */
0190   template <typename Type>
0191   static CpuAddress ToCpuAddress(Type const *const ptr)
0192   {
0193     return static_cast<CpuAddress>(ptr);
0194   }
0195
0196   /**
0197    * Helper routine allocate GPU memory for a collection of object
0198    */
0199   template <typename Coll>
0200   bool AllocateCollectionOnCoproc(const char *verbose_title, const Coll &data, bool isplaced = false);
0201
0202   /**
0203    * Helper routine allocate GPU memory for placed volume objects
0204    */
0205   bool AllocatePlacedVolumesOnCoproc();
0206
0207   /** Allocator method for the navigation index table */
0208   bool AllocateNavIndexOnCoproc();
0209
0210   // template <typename TrackContainer>
0211   // void LocatePointsTemplate(TrackContainer const &container, const int n,
0212   //                           const int depth, int *const output) const;
0213
0214   /// Copy all placed volumes to the device.
0215   void CopyPlacedVolumes() const;
0216 };
0217
0218 // void CudaManagerLocatePoints(VPlacedVolume const *const world,
0219 //                              SOA3D<Precision> const *const points,
0220 //                              const int n, const int depth, int *const output);
0221
0222 inline VPlacedVolume const *CudaManager::LookupPlacedCPUPtr(const void *address)
0223 {
0224   const VPlacedVolume *cpu_ptr =
0225       (const VPlacedVolume *)fGPUtoCPUmapForPlacedVolumes_[GpuAddress(const_cast<void *>(address))];
0226   assert(cpu_ptr != NULL);
0227   return cpu_ptr;
0228 }
0229 } // namespace cxx
0230 } // namespace vecgeom
0231
0232 #endif // VECGEOM_MANAGEMENT_CUDAMANAGER_H_