Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:54:46

0001 //---------------------------------*-CUDA-*----------------------------------//
0002 // Copyright 2023-2024 UT-Battelle, LLC, and other Celeritas developers.
0003 // See the top-level COPYRIGHT file for details.
0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0005 //---------------------------------------------------------------------------//
0006 //! \file corecel/data/detail/LdgIteratorImpl.hh
0007 //---------------------------------------------------------------------------//
0008 #pragma once
0009 
0010 #include <type_traits>
0011 
0012 #include "corecel/Macros.hh"
0013 #include "corecel/OpaqueId.hh"
0014 #include "corecel/Types.hh"
0015 #include "corecel/math/Quantity.hh"
0016 
0017 #include "TypeTraits.hh"
0018 
0019 namespace celeritas
0020 {
0021 namespace detail
0022 {
0023 //---------------------------------------------------------------------------//
0024 /*!
0025  * Wrap the low-level CUDA/HIP "load global memory" function.
0026  *
0027  * This low-level capability allows improved caching because we're \em
0028  * promising that no other thread can modify its value while the kernel is
0029  * active.
0030  */
0031 template<class T>
0032 CELER_CONSTEXPR_FUNCTION T ldg(T const* ptr)
0033 {
0034     static_assert(std::is_arithmetic_v<T>,
0035                   "Only const arithmetic types are supported by __ldg");
0036 #if CELER_DEVICE_COMPILE
0037     return __ldg(ptr);
0038 #else
0039     return *ptr;
0040 #endif
0041 }
0042 
0043 //---------------------------------------------------------------------------//
0044 /*!
0045  * Reads a value T using __ldg builtin and return a copy of it
0046  */
0047 template<class T, typename = void>
0048 struct LdgLoader
0049 {
0050     static_assert(std::is_const_v<T>, "Only const data are supported by __ldg");
0051 
0052     using value_type = std::remove_const_t<T>;
0053     using pointer = std::add_pointer_t<value_type const>;
0054     using reference = value_type;
0055 
0056     CELER_CONSTEXPR_FUNCTION static reference read(pointer p)
0057     {
0058         return ldg(p);
0059     }
0060 };
0061 
0062 /*!
0063  * Specialization when T == OpaqueId.
0064  * Wraps the underlying index in a OpaqueId when returning it.
0065  */
0066 template<class I, class T>
0067 struct LdgLoader<OpaqueId<I, T> const, void>
0068 {
0069     using value_type = OpaqueId<I, T>;
0070     using pointer = std::add_pointer_t<value_type const>;
0071     using reference = value_type;
0072 
0073     CELER_CONSTEXPR_FUNCTION static reference read(pointer p)
0074     {
0075         return value_type{ldg(p->data())};
0076     }
0077 };
0078 
0079 /*!
0080  * Specialization when T == Quantity.
0081  * Wraps the underlying value in a Quantity when returning it.
0082  */
0083 template<class I, class T>
0084 struct LdgLoader<Quantity<I, T> const, void>
0085 {
0086     using value_type = Quantity<I, T>;
0087     using pointer = std::add_pointer_t<value_type const>;
0088     using reference = value_type;
0089 
0090     CELER_CONSTEXPR_FUNCTION static reference read(pointer p)
0091     {
0092         return value_type{ldg(p->data())};
0093     }
0094 };
0095 
0096 template<class T>
0097 struct LdgLoader<T const, std::enable_if_t<std::is_enum_v<T>>>
0098 {
0099     using value_type = T;
0100     using pointer = std::add_pointer_t<value_type const>;
0101     using reference = value_type;
0102     using underlying_type = std::underlying_type_t<T>;
0103 
0104     CELER_CONSTEXPR_FUNCTION static reference read(pointer p)
0105     {
0106 #if CELER_DEVICE_COMPILE
0107         // Technically breaks aliasing rule but it's not an issue:
0108         // the compiler doesn't derive any optimization and the pointer doesn't
0109         // escape the function
0110         return value_type{ldg(reinterpret_cast<underlying_type const*>(p))};
0111 #else
0112         return *p;
0113 #endif
0114     }
0115 };
0116 
0117 //---------------------------------------------------------------------------//
0118 }  // namespace detail
0119 }  // namespace celeritas