Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:54:49

0001 //----------------------------------*-C++-*----------------------------------//
0002 // Copyright 2020-2024 UT-Battelle, LLC, and other Celeritas developers.
0003 // See the top-level COPYRIGHT file for details.
0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0005 //---------------------------------------------------------------------------//
0006 //! \file corecel/math/Atomics.hh
0007 //! \brief Atomics for use in kernel code (CUDA/HIP/OpenMP).
0008 //---------------------------------------------------------------------------//
0009 #pragma once
0010 
0011 #include "corecel/Assert.hh"
0012 #include "corecel/Macros.hh"
0013 #include "corecel/Types.hh"
0014 
0015 #include "Algorithms.hh"
0016 
0017 namespace celeritas
0018 {
0019 //---------------------------------------------------------------------------//
0020 /*!
0021  * Add to a value, returning the original value.
0022  *
0023  * Note that on CPU, this assumes the atomic add is being done in with \em
0024  * track-level parallelism rather than \em event-level because these utilities
0025  * are meant for "kernel" code.
0026  *
0027  * \warning Multiple events must not use this function to simultaneously modify
0028  * shared data.
0029  */
0030 template<class T>
0031 CELER_FORCEINLINE_FUNCTION T atomic_add(T* address, T value)
0032 {
0033 #if CELER_DEVICE_COMPILE
0034     return atomicAdd(address, value);
0035 #else
0036     CELER_EXPECT(address);
0037     T initial;
0038 #    if defined(_OPENMP) && CELERITAS_OPENMP == CELERITAS_OPENMP_TRACK
0039 #        pragma omp atomic capture
0040 #    endif
0041     {
0042         initial = *address;
0043         *address += value;
0044     }
0045     return initial;
0046 #endif
0047 }
0048 
0049 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600)
0050 //---------------------------------------------------------------------------//
0051 /*!
0052  * Atomic addition specialization for double-precision on older platforms.
0053  *
0054  * From CUDA C Programming guide v10.1 p127
0055  */
0056 inline __device__ double atomic_add(double* address, double val)
0057 {
0058     CELER_EXPECT(address);
0059     ull_int* address_as_ull = reinterpret_cast<ull_int*>(address);
0060     ull_int old = *address_as_ull;
0061     ull_int assumed;
0062     do
0063     {
0064         assumed = old;
0065         old = atomicCAS(
0066             address_as_ull,
0067             assumed,
0068             __double_as_longlong(val + __longlong_as_double(assumed)));
0069         // Note: uses integer comparison to avoid hang in case of NaN (since
0070         // NaN != NaN)
0071     } while (assumed != old);
0072     return __longlong_as_double(old);
0073 }
0074 #endif
0075 
0076 //---------------------------------------------------------------------------//
0077 /*!
0078  * Set the value to the minimum of the actual and given, returning old.
0079  */
0080 template<class T>
0081 CELER_FORCEINLINE_FUNCTION T atomic_min(T* address, T value)
0082 {
0083 #if CELER_DEVICE_COMPILE
0084     return atomicMin(address, value);
0085 #else
0086     CELER_EXPECT(address);
0087     T initial;
0088 #    if defined(_OPENMP) && CELERITAS_OPENMP == CELERITAS_OPENMP_TRACK
0089 #        pragma omp atomic capture
0090 #    endif
0091     {
0092         initial = *address;
0093         *address = celeritas::min(initial, value);
0094     }
0095     return initial;
0096 #endif
0097 }
0098 
0099 //---------------------------------------------------------------------------//
0100 /*!
0101  * Set the value to the maximum of the actual and given, returning old.
0102  */
0103 template<class T>
0104 CELER_FORCEINLINE_FUNCTION T atomic_max(T* address, T value)
0105 {
0106 #if CELER_DEVICE_COMPILE
0107     return atomicMax(address, value);
0108 #else
0109     CELER_EXPECT(address);
0110     T initial;
0111 #    if defined(_OPENMP) && CELERITAS_OPENMP == CELERITAS_OPENMP_TRACK
0112 #        pragma omp atomic capture
0113 #    endif
0114     {
0115         initial = *address;
0116         *address = celeritas::max(initial, value);
0117     }
0118     return initial;
0119 #endif
0120 }
0121 
0122 #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ <= 300)
0123 //---------------------------------------------------------------------------//
0124 /*!
0125  * Software emulation of atomic max for older systems.
0126  *
0127  * This is a modification of the "software double-precision add" algorithm.
0128  * TODO: combine this algorithm with the atomic_add and genericize on operation
0129  * if we ever need to implement the atomics for other types.
0130  */
0131 inline __device__ ull_int atomic_max(ull_int* address, ull_int val)
0132 {
0133     CELER_EXPECT(address);
0134     ull_int old = *address;
0135     ull_int assumed;
0136     do
0137     {
0138         assumed = old;
0139         old = atomicCAS(address, assumed, celeritas::max(val, assumed));
0140     } while (assumed != old);
0141     return old;
0142 }
0143 #endif
0144 
0145 //---------------------------------------------------------------------------//
0146 }  // namespace celeritas