Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:54:47

0001 //----------------------------------*-C++-*----------------------------------//
0002 // Copyright 2020-2024 UT-Battelle, LLC, and other Celeritas developers.
0003 // See the top-level COPYRIGHT file for details.
0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0005 //---------------------------------------------------------------------------//
0006 //! \file corecel/data/StackAllocator.hh
0007 //---------------------------------------------------------------------------//
0008 #pragma once
0009 
0010 #include <new>
0011 
0012 #include "corecel/math/Atomics.hh"
0013 
0014 #include "StackAllocatorData.hh"
0015 
0016 namespace celeritas
0017 {
0018 //---------------------------------------------------------------------------//
0019 /*!
0020  * Dynamically allocate arbitrary data on a stack.
0021  *
0022  * The stack allocator view acts as a functor and accessor to the allocated
0023  * data. It enables very fast on-device dynamic allocation of data, such as
0024  * secondaries or detector hits. As an example, inside a hypothetical physics
0025  * Interactor class, you could create two particles with the following code:
0026  * \code
0027 
0028  struct Interactor
0029  {
0030     StackAllocator<Secondary> allocate;
0031 
0032     // Sample an interaction
0033     template<class Engine>
0034     Interaction operator()(Engine&)
0035     {
0036        // Create 2 secondary particles
0037        Secondary* allocated = this->allocate(2);
0038        if (!allocated)
0039        {
0040            return Interaction::from_failure();
0041        }
0042        Interaction result;
0043        result.secondaries = Span<Secondary>{allocated, 2};
0044        return result;
0045     };
0046  };
0047  \endcode
0048  *
0049  * A later kernel could then iterate over the secondaries to apply cutoffs:
0050  * \code
0051  using SecondaryRef
0052      = StackAllocatorData<Secondary, Ownership::reference, MemSpace::device>;
0053 
0054  __global__ apply_cutoff(const SecondaryRef ptrs)
0055  {
0056      auto thread_idx = celeritas::KernelParamCalculator::thread_id().get();
0057      StackAllocator<Secondary> allocate(ptrs);
0058      auto secondaries = allocate.get();
0059      if (thread_idx < secondaries.size())
0060      {
0061          Secondary& sec = secondaries[thread_idx];
0062          if (sec.energy < 100 * units::kilo_electron_volts)
0063          {
0064              sec.energy = 0;
0065          }
0066      }
0067  }
0068  * \endcode
0069  *
0070  * You *cannot* safely access the current size of the stack in the same kernel
0071  * that's modifying it -- if the stack attempts to allocate beyond the end,
0072  * then the \c size() call will reflect that overflowed state, rather than the
0073  * corrected size reflecting the failed allocation.
0074  *
0075  * A third kernel with a single thread would then be responsible for clearing
0076  * the data:
0077  * \code
0078  __global__ clear_stack(const SecondaryRef ptrs)
0079  {
0080      StackAllocator<Secondary> allocate(ptrs);
0081      auto thread_idx = celeritas::KernelParamCalculator::thread_id().get();
0082      if (thread_idx == 0)
0083      {
0084          allocate.clear();
0085      }
0086  }
0087  * \endcode
0088  *
0089  * These separate kernel launches are needed as grid-level synchronization
0090  * points.
0091  *
0092  * \todo Instead of returning a pointer, return IdRange<T>. Rename
0093  * StackAllocatorData to StackAllocation and have it look like a collection so
0094  * that *it* will provide access to the data. Better yet, have a
0095  * StackAllocation that can be a `const_reference` to the StackAllocatorData.
0096  * Then the rule will be "you can't create a StackAllocator in the same kernel
0097  * that you directly access a StackAllocation".
0098  */
0099 template<class T>
0100 class StackAllocator
0101 {
0102   public:
0103     //!@{
0104     //! \name Type aliases
0105     using value_type = T;
0106     using result_type = value_type*;
0107     using Data = StackAllocatorData<T, Ownership::reference, MemSpace::native>;
0108     //!@}
0109 
0110   public:
0111     // Construct with shared data
0112     explicit inline CELER_FUNCTION StackAllocator(Data const& data);
0113 
0114     // Total storage capacity (always safe)
0115     inline CELER_FUNCTION size_type capacity() const;
0116 
0117     //// INITIALIZE ////
0118 
0119     // Reset storage
0120     inline CELER_FUNCTION void clear();
0121 
0122     //// ALLOCATE ////
0123 
0124     // Allocate space for this many data
0125     inline CELER_FUNCTION result_type operator()(size_type count);
0126 
0127     //// ACCESS ////
0128 
0129     // Current size
0130     inline CELER_FUNCTION size_type size() const;
0131 
0132     // View all allocated data
0133     inline CELER_FUNCTION Span<value_type> get();
0134     inline CELER_FUNCTION Span<value_type const> get() const;
0135 
0136   private:
0137     Data const& data_;
0138 
0139     //// HELPER FUNCTIONS ////
0140 
0141     using SizeId = ItemId<size_type>;
0142     using StorageId = ItemId<T>;
0143     static CELER_CONSTEXPR_FUNCTION SizeId size_id() { return SizeId{0}; }
0144 };
0145 
0146 //---------------------------------------------------------------------------//
0147 // INLINE DEFINITIONS
0148 //---------------------------------------------------------------------------//
0149 /*!
0150  * Construct with defaults.
0151  */
0152 template<class T>
0153 CELER_FUNCTION StackAllocator<T>::StackAllocator(Data const& shared)
0154     : data_(shared)
0155 {
0156     CELER_EXPECT(shared);
0157 }
0158 
0159 //---------------------------------------------------------------------------//
0160 /*!
0161  * Get the maximum number of values that can be allocated.
0162  */
0163 template<class T>
0164 CELER_FUNCTION auto StackAllocator<T>::capacity() const -> size_type
0165 {
0166     return data_.storage.size();
0167 }
0168 
0169 //---------------------------------------------------------------------------//
0170 /*!
0171  * Clear the stack allocator.
0172  *
0173  * This sets the size to zero. It should ideally *only* be called by a single
0174  * thread (though multiple threads resetting it should also be OK), but
0175  * *cannot be used in the same kernel that is allocating or viewing it*. This
0176  * is because the access times between different threads or thread-blocks is
0177  * indeterminate inside of a single kernel.
0178  */
0179 template<class T>
0180 CELER_FUNCTION void StackAllocator<T>::clear()
0181 {
0182     data_.size[this->size_id()] = 0;
0183 }
0184 
0185 //---------------------------------------------------------------------------//
0186 /*!
0187  * Allocate space for a given number of items.
0188  *
0189  * Returns NULL if allocation failed due to out-of-memory. Ensures that the
0190  * shared size reflects the amount of data allocated.
0191  */
0192 template<class T>
0193 CELER_FUNCTION auto
0194 StackAllocator<T>::operator()(size_type count) -> result_type
0195 {
0196     CELER_EXPECT(count > 0);
0197 
0198     // Atomic add 'count' to the shared size
0199     size_type start = atomic_add(&data_.size[this->size_id()], count);
0200     if (CELER_UNLIKELY(start + count > data_.storage.size()))
0201     {
0202         // Out of memory: restore the old value so that another thread can
0203         // potentially use it. Multiple threads are likely to exceed the
0204         // capacity simultaneously. Only one has a "start" value less than or
0205         // equal to the total capacity: the remainder are (arbitrarily) higher
0206         // than that.
0207         if (start <= this->capacity())
0208         {
0209             // We were the first thread to exceed capacity, even though other
0210             // threads might have failed (and might still be failing) to
0211             // allocate. Restore the actual allocated size to the start value.
0212             // This might allow another thread with a smaller allocation to
0213             // succeed, but it also guarantees that at the end of the kernel,
0214             // the size reflects the actual capacity.
0215             data_.size[this->size_id()] = start;
0216         }
0217 
0218         /*!
0219          * \todo It might be useful to set an "out of memory" flag to make it
0220          * easier for host code to detect whether a failure occurred, rather
0221          * than looping through primaries and testing for failure.
0222          */
0223 
0224         // Return null pointer, indicating failure to allocate.
0225         return nullptr;
0226     }
0227 
0228     // Initialize the data at the newly "allocated" address
0229     value_type* result = new (&data_.storage[StorageId{start}]) value_type;
0230     for (size_type i = 1; i < count; ++i)
0231     {
0232         // Initialize remaining values
0233         CELER_ASSERT(&data_.storage[StorageId{start + i}] == result + i);
0234         new (&data_.storage[StorageId{start + i}]) value_type;
0235     }
0236     return result;
0237 }
0238 
0239 //---------------------------------------------------------------------------//
0240 /*!
0241  * Get the number of items currently present.
0242  *
0243  * This value may not be meaningful (may be less than "actual" size) if
0244  * called in the same kernel as other threads that are allocating.
0245  */
0246 template<class T>
0247 CELER_FUNCTION auto StackAllocator<T>::size() const -> size_type
0248 {
0249     size_type result = data_.size[this->size_id()];
0250     CELER_ENSURE(result <= this->capacity());
0251     return result;
0252 }
0253 
0254 //---------------------------------------------------------------------------//
0255 /*!
0256  * View all allocated data.
0257  *
0258  * This cannot be called while any running kernel could be modifiying the size.
0259  */
0260 template<class T>
0261 CELER_FUNCTION auto StackAllocator<T>::get() -> Span<value_type>
0262 {
0263     return data_.storage[ItemRange<T>{StorageId{0}, StorageId{this->size()}}];
0264 }
0265 
0266 //---------------------------------------------------------------------------//
0267 /*!
0268  * View all allocated data (const).
0269  *
0270  * This cannot be called while any running kernel could be modifiying the size.
0271  */
0272 template<class T>
0273 CELER_FUNCTION auto StackAllocator<T>::get() const -> Span<value_type const>
0274 {
0275     return data_.storage[ItemRange<T>{StorageId{0}, StorageId{this->size()}}];
0276 }
0277 
0278 //---------------------------------------------------------------------------//
0279 }  // namespace celeritas