![]() |
|
|||
File indexing completed on 2025-09-15 08:55:04
0001 //------------------------------- -*- C++ -*- -------------------------------// 0002 // Copyright Celeritas contributors: see top-level COPYRIGHT file for details 0003 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0004 //---------------------------------------------------------------------------// 0005 //! \file corecel/data/Collection.hh 0006 //---------------------------------------------------------------------------// 0007 #pragma once 0008 0009 #include "corecel/Assert.hh" 0010 #include "corecel/OpaqueId.hh" 0011 #include "corecel/Types.hh" 0012 #include "corecel/cont/Range.hh" 0013 #include "corecel/sys/ThreadId.hh" 0014 0015 #include "ObserverPtr.hh" 0016 0017 #include "detail/CollectionImpl.hh" 0018 0019 namespace celeritas 0020 { 0021 //---------------------------------------------------------------------------// 0022 /*! 0023 * \page collections Collection: a data portability class 0024 * 0025 * The \c Collection manages data allocation and transfer between CPU and GPU. 0026 * Its primary design goal is facilitating construction of deeply hierarchical 0027 * data on host at setup time and seamlessly copying to device. 0028 * The templated \c T must be trivially copyable: either a fundamental data 0029 * type or a struct of such types. 0030 * 0031 * An individual item in a \c Collection<T> can be accessed with \c ItemId<T>, 0032 * a contiguous subset of items are accessed with \c ItemRange<T>, and the 0033 * entirety of the data are accessed with \c AllItems<T>. All three of these 0034 * classes are trivially copyable, so they can be embedded in structs that can 0035 * be managed by a Collection. A group of Collections, one for each data type, 0036 * can therefore be trivially copied to the GPU to enable arbitrarily deep and 0037 * complex data hierarchies. 0038 * 0039 * By convention, groups of Collections comprising the data for a single class 0040 * or subsystem (such as RayleighInteractor or Physics) are stored in a helper 0041 * struct suffixed with \c Data . For cases where there is both persistent data 0042 * (problem-specific parameters) and transient data (track-specific states), 0043 * the collections must be grouped into two separate classes. \c StateData are 0044 * meant to be mutable and never directly copied between host and device; its 0045 * data collections are typically accessed by thread ID. \c ParamsData are 0046 * immutable and always "mirrored" on both host and device. Sometimes it's 0047 * sensible to partition \c ParamsData into discrete helper structs (stored by 0048 * value), each with a group of collections, and perhaps another struct that 0049 * has non-templated scalars (since the default assignment operator is less 0050 * work than manually copying scalars in a templated assignment operator. 0051 * 0052 * A collection group has the following requirements to be compatible with the 0053 \c 0054 * CollectionMirror, \c CollectionStateStore, and other such helper classes: 0055 * - Be a struct templated with \c template<Ownership W, MemSpace M> 0056 * - Contain only Collection objects and trivially copyable structs 0057 * - Define an operator bool that is true if and only if the class data is 0058 * assigned and consistent 0059 * - Define a templated assignment operator on "other" Ownership and MemSpace 0060 * which assigns every member to the right-hand-side's member 0061 * 0062 * Additionally, a \c StateData collection group must define 0063 * - A member function \c size() returning the number of entries (i.e. number 0064 * of threads) 0065 * - A free function \c resize with one of two signatures: 0066 * \code 0067 void resize( 0068 StateData<Ownership::value, M>* data, 0069 HostCRef<ParamsData> const& params, 0070 StreamId stream, 0071 size_type size); 0072 // or... 0073 void resize( 0074 StateData<Ownership::value, M>* data, 0075 const HostCRef<ParamsData>& params, 0076 size_type size); 0077 // or... 0078 void resize( 0079 StateData<Ownership::value, M>* data, 0080 size_type size); 0081 * \endcode 0082 * 0083 * By convention, related groups of collections are stored in a header file 0084 * named \c Data.hh . 0085 * 0086 * See ParticleParamsData and ParticleStateData for minimal examples of using 0087 * collections. The MaterialParamsData demonstrates additional complexity 0088 * by having a multi-level data hierarchy, and MaterialStateData has a resize 0089 * function that uses params data. PhysicsParamsData is a very complex example, 0090 * and GeoParamsData demonstates how to use template specialization to adapt 0091 * Collections to another codebase with a different convention for host-device 0092 * portability. 0093 */ 0094 0095 //! Opaque ID representing a single element of a container. 0096 template<class T> 0097 using ItemId = OpaqueId<T, size_type>; 0098 0099 //---------------------------------------------------------------------------// 0100 /*! 0101 * Reference a contiguous range of IDs corresponding to a slice of items. 0102 * 0103 * \tparam T The value type of items to represent. 0104 * 0105 * An ItemRange is a range of \c OpaqueId<T> that reference a range of values 0106 * of type \c T in a \c Collection . The ItemRange acts like a \c slice object 0107 * in Python when used on a Collection, returning a Span<T> of the underlying 0108 * data. 0109 * 0110 * An ItemRange is only meaningful in connection with a particular Collection 0111 * of type T. It doesn't have any persistent connection to its associated 0112 * collection and thus must be used carefully. 0113 * 0114 * \code 0115 struct MyMaterial 0116 { 0117 real_type number_density; 0118 ItemRange<ElementComponents> components; 0119 }; 0120 0121 template<Ownership W, MemSpace M> 0122 struct MyData 0123 { 0124 Collection<ElementComponents, W, M> components; 0125 Collection<MyMaterial, W, M> materials; 0126 }; 0127 * \endcode 0128 */ 0129 template<class T, class Size = size_type> 0130 using ItemRange = Range<OpaqueId<T, Size>>; 0131 0132 //---------------------------------------------------------------------------// 0133 /*! 0134 * Access data in a Range<T2> with an index of type T1. 0135 * 0136 * Here, T1 and T2 are expected to be OpaqueId types. This is simply a 0137 * type-safe "offset" with range checking. 0138 */ 0139 template<class T1, class T2> 0140 class ItemMap 0141 { 0142 static_assert(detail::is_opaque_id_v<T1>, "T1 is not OpaqueID"); 0143 static_assert(detail::is_opaque_id_v<T2>, "T2 is not OpaqueID"); 0144 0145 public: 0146 //!@{ 0147 //! \name Type aliases 0148 using key_type = T1; 0149 using mapped_type = T2; 0150 //!@} 0151 0152 public: 0153 //// CONSTRUCTION //// 0154 0155 ItemMap() = default; 0156 0157 //! Construct from an exising Range<T2> 0158 explicit CELER_FUNCTION ItemMap(Range<T2> range) : range_(range) {} 0159 0160 //// ACCESS //// 0161 0162 //! Access Range via OpaqueId of type T1 0163 CELER_FORCEINLINE_FUNCTION T2 operator[](T1 id) const 0164 { 0165 CELER_EXPECT(id < this->size()); 0166 return range_[id.unchecked_get()]; 0167 } 0168 0169 //! Whether the underlying Range<T2> is empty 0170 CELER_FORCEINLINE_FUNCTION bool empty() const { return range_.empty(); } 0171 0172 //! Size of the underlying Range<T2> 0173 CELER_FORCEINLINE_FUNCTION size_type size() const { return range_.size(); } 0174 0175 private: 0176 //// DATA //// 0177 Range<T2> range_; 0178 }; 0179 0180 // Forward-declare collection builder, needed for GCC7 0181 template<class T2, MemSpace M2, class Id2> 0182 class CollectionBuilder; 0183 0184 //---------------------------------------------------------------------------// 0185 /*! 0186 * Sentinel class for obtaining a view to all items of a collection. 0187 */ 0188 template<class T, MemSpace M = MemSpace::native> 0189 struct AllItems 0190 { 0191 }; 0192 0193 //---------------------------------------------------------------------------// 0194 /*! 0195 * Manage generic array-like data ownership and transfer from host to device. 0196 * 0197 * Data are constructed incrementally on the host, then copied (along with 0198 * their associated ItemRange) to device. A Collection can act as a 0199 * \c std::vector<T>, \c DeviceVector<T>, \c Span<T>, or \c Span<const T>. The 0200 * Spans can point to host or device memory, but the \c MemSpace template 0201 * argument protects against accidental accesses from the wrong memory space. 0202 * 0203 * Each Collection object is usually accessed with an ItemRange, which 0204 * references a contiguous set of elements in the Collection. 0205 * For example, setup code on the host would extend the Collection with a 0206 * series of vectors, the addition of which returns a ItemRange that returns 0207 * the equivalent data on host or device. 0208 * This methodology allows complex nested data structures to be built 0209 * up quickly at setup time without knowing the size requirements beforehand. 0210 * 0211 * Host-device functions and classes should use \c Collection with a reference 0212 * or const_reference Ownership, and the \c MemSpace::native type, which 0213 * expects device memory when compiled inside a CUDA file and host memory when 0214 * used inside a C++ source or test. (This design choice prevents a single CUDA 0215 * file from compiling separate host-compatible and device-compatible compute 0216 * kernels, but in the case of Celeritas this situation won't arise, because 0217 * we always want to build host code in C++ files for development ease and to 0218 * allow testing when CUDA is disabled.) 0219 * 0220 * A \c MemSpace::Mapped collection will be accessible on both host and 0221 * device. Unified addressing must be supported by the current device, or an 0222 * exception will be thrown when initializing the collection. Memory pages will 0223 * reside on in "pinned" memory on host, and each access from device code to a 0224 * changed page will require a slow memory transfer. 0225 * Allocating pinned memory is slow and reduces the memory available to the 0226 * system: so only allocate the smallest amount needed with the longest 0227 * possible lifetime. 0228 * Frequently accessing data from device code will result in low performance. 0229 * Use case for mapped memory are: 0230 * - as a source or destination memory space for asynchronous operations, 0231 * - on integrated GPU architecture, or 0232 * - a [single coalesced read or write from device code]( 0233 https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#zero-copy). 0234 * 0235 * Accessing a \c const_reference collection in \c device memory will return a 0236 * wrapper container that accesses the low-level data through the \c __ldg 0237 * primitive, which can accelerate random access by telling the compiler 0238 * <em>the memory will not be changed during the lifetime of the kernel</em>. 0239 * Therefore it is important to \em only use const Collections for shared, 0240 * constant "params" data. 0241 */ 0242 template<class T, Ownership W, MemSpace M, class I = ItemId<T>> 0243 class Collection 0244 { 0245 // rocrand states have nontrivial destructors 0246 static_assert(std::is_trivially_copyable<T>::value || CELERITAS_USE_HIP, 0247 "Collection element is not trivially copyable"); 0248 static_assert(std::is_trivially_destructible<T>::value || CELERITAS_USE_HIP, 0249 "Collection element is not trivially destructible"); 0250 0251 using CollectionTraitsT = detail::CollectionTraits<T, W, M>; 0252 using const_value_type = typename CollectionTraitsT::const_type; 0253 0254 public: 0255 //!@{ 0256 //! \name Type aliases 0257 using value_type = typename CollectionTraitsT::type; 0258 using SpanT = typename CollectionTraitsT::SpanT; 0259 using SpanConstT = typename CollectionTraitsT::SpanConstT; 0260 using pointer = ObserverPtr<value_type, M>; 0261 using const_pointer = ObserverPtr<const_value_type, M>; 0262 using reference_type = typename CollectionTraitsT::reference_type; 0263 using const_reference_type = 0264 typename CollectionTraitsT::const_reference_type; 0265 using size_type = typename I::size_type; 0266 using ItemIdT = I; 0267 using ItemRangeT = Range<ItemIdT>; 0268 using AllItemsT = AllItems<T, M>; 0269 //!@} 0270 0271 static constexpr Ownership ownership = W; 0272 static constexpr MemSpace memspace = M; 0273 0274 public: 0275 //// CONSTRUCTION //// 0276 0277 //!@{ 0278 //! Default constructors 0279 Collection() = default; 0280 Collection(Collection const&) = default; 0281 Collection(Collection&&) = default; 0282 //!@} 0283 0284 ~Collection() = default; 0285 0286 // Construct from another collection 0287 template<Ownership W2, MemSpace M2> 0288 explicit inline Collection(Collection<T, W2, M2, I> const& other); 0289 0290 // Construct from another collection (mutable) 0291 template<Ownership W2, MemSpace M2> 0292 explicit inline Collection(Collection<T, W2, M2, I>& other); 0293 0294 //!@{ 0295 //! Default assignment 0296 Collection& operator=(Collection const& other) = default; 0297 Collection& operator=(Collection&& other) = default; 0298 //!@} 0299 0300 // Assign from another collection 0301 template<Ownership W2, MemSpace M2> 0302 inline Collection& operator=(Collection<T, W2, M2, I> const& other); 0303 0304 // Assign (mutable!) from another collection 0305 template<Ownership W2, MemSpace M2> 0306 inline Collection& operator=(Collection<T, W2, M2, I>& other); 0307 0308 //// ACCESS //// 0309 0310 // Access a single element 0311 CELER_FORCEINLINE_FUNCTION reference_type operator[](ItemIdT i); 0312 CELER_FORCEINLINE_FUNCTION const_reference_type operator[](ItemIdT i) const; 0313 0314 // Access a subset of the data with a slice 0315 CELER_FORCEINLINE_FUNCTION SpanT operator[](ItemRangeT ps); 0316 CELER_FORCEINLINE_FUNCTION SpanConstT operator[](ItemRangeT ps) const; 0317 0318 // Access all data. 0319 CELER_FORCEINLINE_FUNCTION SpanT operator[](AllItemsT); 0320 CELER_FORCEINLINE_FUNCTION SpanConstT operator[](AllItemsT) const; 0321 0322 //!@{ 0323 //! Direct accesors to underlying data 0324 CELER_FORCEINLINE_FUNCTION size_type size() const 0325 { 0326 return static_cast<size_type>(this->storage().size()); 0327 } 0328 CELER_FORCEINLINE_FUNCTION bool empty() const 0329 { 0330 return this->storage().empty(); 0331 } 0332 CELER_FORCEINLINE_FUNCTION pointer data() 0333 { 0334 return pointer{this->storage().data()}; 0335 } 0336 CELER_FORCEINLINE_FUNCTION const_pointer data() const 0337 { 0338 return const_pointer{this->storage().data()}; 0339 } 0340 //!@} 0341 0342 private: 0343 //// DATA //// 0344 0345 detail::CollectionStorage<T, W, M> storage_{}; 0346 0347 protected: 0348 //// FRIENDS //// 0349 0350 template<class T2, Ownership W2, MemSpace M2, class Id2> 0351 friend class Collection; 0352 0353 template<class T2, MemSpace M2, class Id2> 0354 friend class CollectionBuilder; 0355 0356 template<class T2, class Id2> 0357 friend class DedupeCollectionBuilder; 0358 0359 //!@{ 0360 // Private accessors for collection construction/access 0361 using StorageT = typename detail::CollectionStorage<T, W, M>::type; 0362 CELER_FORCEINLINE_FUNCTION StorageT const& storage() const 0363 { 0364 return storage_.data; 0365 } 0366 CELER_FORCEINLINE_FUNCTION StorageT& storage() { return storage_.data; } 0367 //@} 0368 }; 0369 0370 //! Collection for data of type T but indexed by TrackSlotId for use in States 0371 template<class T, Ownership W, MemSpace M> 0372 using StateCollection = Collection<T, W, M, TrackSlotId>; 0373 0374 //---------------------------------------------------------------------------// 0375 // INLINE DEFINITIONS 0376 //---------------------------------------------------------------------------// 0377 //!@{ 0378 /*! 0379 * Construct or assign from another collection. 0380 * 0381 * These are generally used to create "references" to "values" (same memory 0382 * space) but can also be used to copy from device to host. The \c 0383 * detail::CollectionAssigner class statically checks for allowable 0384 * transformations and memory moves. 0385 * 0386 * TODO: add optimization to do an in-place copy (rather than a new allocation) 0387 * if the host and destination are the same size. 0388 */ 0389 template<class T, Ownership W, MemSpace M, class I> 0390 template<Ownership W2, MemSpace M2> 0391 Collection<T, W, M, I>::Collection(Collection<T, W2, M2, I> const& other) 0392 { 0393 detail::copy_collection(other.storage_, &storage_); 0394 detail::CollectionStorageValidator<W2>()(this->size(), 0395 other.storage().size()); 0396 } 0397 0398 template<class T, Ownership W, MemSpace M, class I> 0399 template<Ownership W2, MemSpace M2> 0400 Collection<T, W, M, I>::Collection(Collection<T, W2, M2, I>& other) 0401 { 0402 detail::copy_collection(other.storage_, &storage_); 0403 detail::CollectionStorageValidator<W2>()(this->size(), 0404 other.storage().size()); 0405 } 0406 0407 template<class T, Ownership W, MemSpace M, class I> 0408 template<Ownership W2, MemSpace M2> 0409 Collection<T, W, M, I>& 0410 Collection<T, W, M, I>::operator=(Collection<T, W2, M2, I> const& other) 0411 { 0412 detail::copy_collection(other.storage_, &storage_); 0413 detail::CollectionStorageValidator<W2>()(this->size(), 0414 other.storage().size()); 0415 return *this; 0416 } 0417 0418 template<class T, Ownership W, MemSpace M, class I> 0419 template<Ownership W2, MemSpace M2> 0420 Collection<T, W, M, I>& 0421 Collection<T, W, M, I>::operator=(Collection<T, W2, M2, I>& other) 0422 { 0423 detail::copy_collection(other.storage_, &storage_); 0424 detail::CollectionStorageValidator<W2>()(this->size(), 0425 other.storage().size()); 0426 return *this; 0427 } 0428 //!@} 0429 0430 //---------------------------------------------------------------------------// 0431 /*! 0432 * Access a single element. 0433 */ 0434 template<class T, Ownership W, MemSpace M, class I> 0435 CELER_FUNCTION auto 0436 Collection<T, W, M, I>::operator[](ItemIdT i) -> reference_type 0437 { 0438 CELER_EXPECT(i < this->size()); 0439 return this->storage()[i.unchecked_get()]; 0440 } 0441 0442 //---------------------------------------------------------------------------// 0443 /*! 0444 * Access a single element (const). 0445 */ 0446 template<class T, Ownership W, MemSpace M, class I> 0447 CELER_FUNCTION auto 0448 Collection<T, W, M, I>::operator[](ItemIdT i) const -> const_reference_type 0449 { 0450 CELER_EXPECT(i < this->size()); 0451 return this->storage()[i.unchecked_get()]; 0452 } 0453 0454 //---------------------------------------------------------------------------// 0455 /*! 0456 * Access a subset of the data as a Span. 0457 */ 0458 template<class T, Ownership W, MemSpace M, class I> 0459 CELER_FUNCTION auto Collection<T, W, M, I>::operator[](ItemRangeT ps) -> SpanT 0460 { 0461 CELER_EXPECT(*ps.begin() <= *ps.end()); 0462 CELER_EXPECT(*ps.end() < this->size() + 1); 0463 auto* data = this->storage().data(); 0464 return {data + ps.begin()->unchecked_get(), 0465 data + ps.end()->unchecked_get()}; 0466 } 0467 0468 //---------------------------------------------------------------------------// 0469 /*! 0470 * Access a subset of the data as a Span (const). 0471 */ 0472 template<class T, Ownership W, MemSpace M, class I> 0473 CELER_FUNCTION auto 0474 Collection<T, W, M, I>::operator[](ItemRangeT ps) const -> SpanConstT 0475 { 0476 CELER_EXPECT(*ps.begin() <= *ps.end()); 0477 CELER_EXPECT(*ps.end() < this->size() + 1); 0478 auto* data = this->storage().data(); 0479 return {data + ps.begin()->unchecked_get(), 0480 data + ps.end()->unchecked_get()}; 0481 } 0482 0483 //---------------------------------------------------------------------------// 0484 /*! 0485 * Access all of the data as a Span. 0486 */ 0487 template<class T, Ownership W, MemSpace M, class I> 0488 CELER_FUNCTION auto Collection<T, W, M, I>::operator[](AllItemsT) -> SpanT 0489 { 0490 return {this->storage().data(), this->storage().size()}; 0491 } 0492 0493 //---------------------------------------------------------------------------// 0494 /*! 0495 * Access all of the data as a Span (const). 0496 */ 0497 template<class T, Ownership W, MemSpace M, class I> 0498 CELER_FUNCTION auto 0499 Collection<T, W, M, I>::operator[](AllItemsT) const -> SpanConstT 0500 { 0501 return {this->storage().data(), this->storage().size()}; 0502 } 0503 0504 //---------------------------------------------------------------------------// 0505 } // namespace celeritas
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |