|
||||
File indexing completed on 2025-01-18 09:54:47
0001 //----------------------------------*-C++-*----------------------------------// 0002 // Copyright 2021-2024 UT-Battelle, LLC, and other Celeritas developers. 0003 // See the top-level COPYRIGHT file for details. 0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0005 //---------------------------------------------------------------------------// 0006 //! \file corecel/data/Collection.hh 0007 //---------------------------------------------------------------------------// 0008 #pragma once 0009 0010 #include "corecel/Assert.hh" 0011 #include "corecel/OpaqueId.hh" 0012 #include "corecel/Types.hh" 0013 #include "corecel/cont/Range.hh" 0014 #include "corecel/sys/ThreadId.hh" 0015 0016 #include "ObserverPtr.hh" 0017 0018 #include "detail/CollectionImpl.hh" 0019 0020 namespace celeritas 0021 { 0022 //---------------------------------------------------------------------------// 0023 /*! 0024 * \page collections Collection: a data portability class 0025 * 0026 * The \c Collection manages data allocation and transfer between CPU and GPU. 0027 * Its primary design goal is facilitating construction of deeply hierarchical 0028 * data on host at setup time and seamlessly copying to device. 0029 * The templated \c T must be trivially copyable---either a fundamental data 0030 * type or a struct of such types. 0031 * 0032 * An individual item in a \c Collection<T> can be accessed with \c ItemId<T>, 0033 * a contiguous subset of items are accessed with \c ItemRange<T>, and the 0034 * entirety of the data are accessed with \c AllItems<T>. All three of these 0035 * classes are trivially copyable, so they can be embedded in structs that can 0036 * be managed by a Collection. A group of Collections, one for each data type, 0037 * can therefore be trivially copied to the GPU to enable arbitrarily deep and 0038 * complex data hierarchies. 0039 * 0040 * By convention, groups of Collections comprising the data for a single class 0041 * or subsystem (such as RayleighInteractor or Physics) are stored in a helper 0042 * struct suffixed with \c Data . For cases where there is both persistent data 0043 * (problem-specific parameters) and transient data (track-specific states), 0044 * the collections must be grouped into two separate classes. \c StateData are 0045 * meant to be mutable and never directly copied between host and device; its 0046 * data collections are typically accessed by thread ID. \c ParamsData are 0047 * immutable and always "mirrored" on both host and device. Sometimes it's 0048 * sensible to partition \c ParamsData into discrete helper structs (stored by 0049 * value), each with a group of collections, and perhaps another struct that 0050 * has non-templated scalars (since the default assignment operator is less 0051 * work than manually copying scalars in a templated assignment operator. 0052 * 0053 * A collection group has the following requirements to be compatible with the 0054 \c 0055 * CollectionMirror, \c CollectionStateStore, and other such helper classes: 0056 * - Be a struct templated with \c template<Ownership W, MemSpace M> 0057 * - Contain only Collection objects and trivially copyable structs 0058 * - Define an operator bool that is true if and only if the class data is 0059 * assigned and consistent 0060 * - Define a templated assignment operator on "other" Ownership and MemSpace 0061 * which assigns every member to the right-hand-side's member 0062 * 0063 * Additionally, a \c StateData collection group must define 0064 * - A member function \c size() returning the number of entries (i.e. number 0065 * of threads) 0066 * - A free function \c resize with one of two signatures: 0067 * \code 0068 void resize( 0069 StateData<Ownership::value, M>* data, 0070 HostCRef<ParamsData> const& params, 0071 StreamId stream, 0072 size_type size); 0073 // or... 0074 void resize( 0075 StateData<Ownership::value, M>* data, 0076 const HostCRef<ParamsData>& params, 0077 size_type size); 0078 // or... 0079 void resize( 0080 StateData<Ownership::value, M>* data, 0081 size_type size); 0082 * \endcode 0083 * 0084 * By convention, related groups of collections are stored in a header file 0085 * named \c Data.hh . 0086 * 0087 * See ParticleParamsData and ParticleStateData for minimal examples of using 0088 * collections. The MaterialParamsData demonstrates additional complexity 0089 * by having a multi-level data hierarchy, and MaterialStateData has a resize 0090 * function that uses params data. PhysicsParamsData is a very complex example, 0091 * and GeoParamsData demonstates how to use template specialization to adapt 0092 * Collections to another codebase with a different convention for host-device 0093 * portability. 0094 */ 0095 0096 //! Opaque ID representing a single element of a container. 0097 template<class T> 0098 using ItemId = OpaqueId<T, size_type>; 0099 0100 //---------------------------------------------------------------------------// 0101 /*! 0102 * Reference a contiguous range of IDs corresponding to a slice of items. 0103 * 0104 * \tparam T The value type of items to represent. 0105 * 0106 * An ItemRange is a range of \c OpaqueId<T> that reference a range of values 0107 * of type \c T in a \c Collection . The ItemRange acts like a \c slice object 0108 * in Python when used on a Collection, returning a Span<T> of the underlying 0109 * data. 0110 * 0111 * An ItemRange is only meaningful in connection with a particular Collection 0112 * of type T. It doesn't have any persistent connection to its associated 0113 * collection and thus must be used carefully. 0114 * 0115 * \code 0116 struct MyMaterial 0117 { 0118 real_type number_density; 0119 ItemRange<ElementComponents> components; 0120 }; 0121 0122 template<Ownership W, MemSpace M> 0123 struct MyData 0124 { 0125 Collection<ElementComponents, W, M> components; 0126 Collection<MyMaterial, W, M> materials; 0127 }; 0128 * \endcode 0129 */ 0130 template<class T, class Size = size_type> 0131 using ItemRange = Range<OpaqueId<T, Size>>; 0132 0133 //---------------------------------------------------------------------------// 0134 /*! 0135 * Access data in a Range<T2> with an index of type T1. 0136 * 0137 * Here, T1 and T2 are expected to be OpaqueId types. This is simply a 0138 * type-safe "offset" with range checking. 0139 */ 0140 template<class T1, class T2> 0141 class ItemMap 0142 { 0143 static_assert(detail::is_opaque_id_v<T1>, "T1 is not OpaqueID"); 0144 static_assert(detail::is_opaque_id_v<T2>, "T2 is not OpaqueID"); 0145 0146 public: 0147 //!@{ 0148 //! \name Type aliases 0149 using key_type = T1; 0150 using mapped_type = T2; 0151 //!@} 0152 0153 public: 0154 //// CONSTRUCTION //// 0155 0156 ItemMap() = default; 0157 0158 //! Contruct from an exising Range<T2> 0159 explicit CELER_FUNCTION ItemMap(Range<T2> range) : range_(range) {} 0160 0161 //// ACCESS //// 0162 0163 //! Access Range via OpaqueId of type T1 0164 CELER_FORCEINLINE_FUNCTION T2 operator[](T1 id) const 0165 { 0166 CELER_EXPECT(id < this->size()); 0167 return range_[id.unchecked_get()]; 0168 } 0169 0170 //! Whether the underlying Range<T2> is empty 0171 CELER_FORCEINLINE_FUNCTION bool empty() const { return range_.empty(); } 0172 0173 //! Size of the underlying Range<T2> 0174 CELER_FORCEINLINE_FUNCTION size_type size() const { return range_.size(); } 0175 0176 private: 0177 //// DATA //// 0178 Range<T2> range_; 0179 }; 0180 0181 // Forward-declare collection builder, needed for GCC7 0182 template<class T2, MemSpace M2, class Id2> 0183 class CollectionBuilder; 0184 0185 //---------------------------------------------------------------------------// 0186 /*! 0187 * Sentinel class for obtaining a view to all items of a collection. 0188 */ 0189 template<class T, MemSpace M = MemSpace::native> 0190 struct AllItems 0191 { 0192 }; 0193 0194 //---------------------------------------------------------------------------// 0195 /*! 0196 * Manage generic array-like data ownership and transfer from host to device. 0197 * 0198 * Data are constructed incrementally on the host, then copied (along with 0199 * their associated ItemRange) to device. A Collection can act as a 0200 * std::vector<T>, DeviceVector<T>, Span<T>, or Span<const T>. The Spans can 0201 * point to host or device memory, but the MemSpace template argument protects 0202 * against accidental accesses from the wrong memory space. 0203 * 0204 * Each Collection object is usually accessed with an ItemRange, which 0205 * references a 0206 * contiguous set of elements in the Collection. For example, setup code on the 0207 * host would extend the Collection with a series of vectors, the addition of 0208 * which returns a ItemRange that returns the equivalent data on host or 0209 * device. This methodology allows complex nested data structures to be built 0210 * up quickly at setup time without knowing the size requirements beforehand. 0211 * 0212 * Host-device functions and classes should use \c Collection with a reference 0213 * or const_reference Ownership, and the \c MemSpace::native type, which 0214 * expects device memory when compiled inside a CUDA file and host memory when 0215 * used inside a C++ source or test. (This design choice prevents a single CUDA 0216 * file from compiling separate host-compatible and device-compatible compute 0217 * kernels, but in the case of Celeritas this situation won't arise, because 0218 * we always want to build host code in C++ files for development ease and to 0219 * allow testing when CUDA is disabled.) 0220 * 0221 * A \c MemSpace::Mapped collection will be accessible on the host and the 0222 * device. Unified addressing must be supported by the current device or an 0223 * exception will be thrown when using the collection. Mapped pinned memory 0224 * (i.e. zero-copy memory) is allocated, pages will always reside on host 0225 * memory and each access from device code will require a slow memory transfer. 0226 * Allocating pinned memory is slow and reduce the memory available to the 0227 * system: only allocate the smallest amount needed with the longest possible 0228 * lifetime. Frequently accessing data from device code will result in low 0229 * performance. Usecase for this MemSapce are: as a src / dst memory space for 0230 * asynchronous operations, on integrated GPU architecture, or a single 0231 * coalesced read or write from device code. 0232 * https://docs.nvidia.com/cuda/cuda-c-best-practices-guide/index.html#zero-copy 0233 * 0234 * Accessing a \c const_reference collection in \c device memory will return a 0235 * wrapper container that accesses the low-level data through the \c __ldg 0236 * primitive, which can accelerate random access by telling the compiler 0237 * <em>the memory will not be changed during the lifetime of the kernel</em>. 0238 * Therefore it is important to \em only use Collections for shared, 0239 * constant "params" data. 0240 */ 0241 template<class T, Ownership W, MemSpace M, class I = ItemId<T>> 0242 class Collection 0243 { 0244 // rocrand states have nontrivial destructors 0245 static_assert(std::is_trivially_copyable<T>::value || CELERITAS_USE_HIP, 0246 "Collection element is not trivially copyable"); 0247 static_assert(std::is_trivially_destructible<T>::value || CELERITAS_USE_HIP, 0248 "Collection element is not trivially destructible"); 0249 0250 using CollectionTraitsT = detail::CollectionTraits<T, W, M>; 0251 using const_value_type = typename CollectionTraitsT::const_type; 0252 0253 public: 0254 //!@{ 0255 //! \name Type aliases 0256 using value_type = typename CollectionTraitsT::type; 0257 using SpanT = typename CollectionTraitsT::SpanT; 0258 using SpanConstT = typename CollectionTraitsT::SpanConstT; 0259 using pointer = ObserverPtr<value_type, M>; 0260 using const_pointer = ObserverPtr<const_value_type, M>; 0261 using reference_type = typename CollectionTraitsT::reference_type; 0262 using const_reference_type = 0263 typename CollectionTraitsT::const_reference_type; 0264 using size_type = typename I::size_type; 0265 using ItemIdT = I; 0266 using ItemRangeT = Range<ItemIdT>; 0267 using AllItemsT = AllItems<T, M>; 0268 //!@} 0269 0270 public: 0271 //// CONSTRUCTION //// 0272 0273 //!@{ 0274 //! Default constructors 0275 Collection() = default; 0276 Collection(Collection const&) = default; 0277 Collection(Collection&&) = default; 0278 //!@} 0279 0280 // Construct from another collection 0281 template<Ownership W2, MemSpace M2> 0282 explicit inline Collection(Collection<T, W2, M2, I> const& other); 0283 0284 // Construct from another collection (mutable) 0285 template<Ownership W2, MemSpace M2> 0286 explicit inline Collection(Collection<T, W2, M2, I>& other); 0287 0288 //!@{ 0289 //! Default assignment 0290 Collection& operator=(Collection const& other) = default; 0291 Collection& operator=(Collection&& other) = default; 0292 //!@} 0293 0294 // Assign from another collection 0295 template<Ownership W2, MemSpace M2> 0296 inline Collection& operator=(Collection<T, W2, M2, I> const& other); 0297 0298 // Assign (mutable!) from another collection 0299 template<Ownership W2, MemSpace M2> 0300 inline Collection& operator=(Collection<T, W2, M2, I>& other); 0301 0302 //// ACCESS //// 0303 0304 // Access a single element 0305 CELER_FORCEINLINE_FUNCTION reference_type operator[](ItemIdT i); 0306 CELER_FORCEINLINE_FUNCTION const_reference_type operator[](ItemIdT i) const; 0307 0308 // Access a subset of the data with a slice 0309 CELER_FORCEINLINE_FUNCTION SpanT operator[](ItemRangeT ps); 0310 CELER_FORCEINLINE_FUNCTION SpanConstT operator[](ItemRangeT ps) const; 0311 0312 // Access all data. 0313 CELER_FORCEINLINE_FUNCTION SpanT operator[](AllItemsT); 0314 CELER_FORCEINLINE_FUNCTION SpanConstT operator[](AllItemsT) const; 0315 0316 //!@{ 0317 //! Direct accesors to underlying data 0318 CELER_FORCEINLINE_FUNCTION size_type size() const 0319 { 0320 return static_cast<size_type>(this->storage().size()); 0321 } 0322 CELER_FORCEINLINE_FUNCTION bool empty() const 0323 { 0324 return this->storage().empty(); 0325 } 0326 CELER_FORCEINLINE_FUNCTION pointer data() 0327 { 0328 return pointer{this->storage().data()}; 0329 } 0330 CELER_FORCEINLINE_FUNCTION const_pointer data() const 0331 { 0332 return const_pointer{this->storage().data()}; 0333 } 0334 //!@} 0335 0336 private: 0337 //// DATA //// 0338 0339 detail::CollectionStorage<T, W, M> storage_{}; 0340 0341 protected: 0342 //// FRIENDS //// 0343 0344 template<class T2, Ownership W2, MemSpace M2, class Id2> 0345 friend class Collection; 0346 0347 template<class T2, MemSpace M2, class Id2> 0348 friend class CollectionBuilder; 0349 0350 template<class T2, class Id2> 0351 friend class DedupeCollectionBuilder; 0352 0353 //!@{ 0354 // Private accessors for collection construction/access 0355 using StorageT = typename detail::CollectionStorage<T, W, M>::type; 0356 CELER_FORCEINLINE_FUNCTION StorageT const& storage() const 0357 { 0358 return storage_.data; 0359 } 0360 CELER_FORCEINLINE_FUNCTION StorageT& storage() { return storage_.data; } 0361 //@} 0362 }; 0363 0364 //! Collection for data of type T but indexed by TrackSlotId for use in States 0365 template<class T, Ownership W, MemSpace M> 0366 using StateCollection = Collection<T, W, M, TrackSlotId>; 0367 0368 //---------------------------------------------------------------------------// 0369 // INLINE DEFINITIONS 0370 //---------------------------------------------------------------------------// 0371 //!@{ 0372 /*! 0373 * Construct or assign from another collection. 0374 * 0375 * These are generally used to create "references" to "values" (same memory 0376 * space) but can also be used to copy from device to host. The \c 0377 * detail::CollectionAssigner class statically checks for allowable 0378 * transformations and memory moves. 0379 * 0380 * TODO: add optimization to do an in-place copy (rather than a new allocation) 0381 * if the host and destination are the same size. 0382 */ 0383 template<class T, Ownership W, MemSpace M, class I> 0384 template<Ownership W2, MemSpace M2> 0385 Collection<T, W, M, I>::Collection(Collection<T, W2, M2, I> const& other) 0386 { 0387 detail::copy_collection(other.storage_, &storage_); 0388 detail::CollectionStorageValidator<W2>()(this->size(), 0389 other.storage().size()); 0390 } 0391 0392 template<class T, Ownership W, MemSpace M, class I> 0393 template<Ownership W2, MemSpace M2> 0394 Collection<T, W, M, I>::Collection(Collection<T, W2, M2, I>& other) 0395 { 0396 detail::copy_collection(other.storage_, &storage_); 0397 detail::CollectionStorageValidator<W2>()(this->size(), 0398 other.storage().size()); 0399 } 0400 0401 template<class T, Ownership W, MemSpace M, class I> 0402 template<Ownership W2, MemSpace M2> 0403 Collection<T, W, M, I>& 0404 Collection<T, W, M, I>::operator=(Collection<T, W2, M2, I> const& other) 0405 { 0406 detail::copy_collection(other.storage_, &storage_); 0407 detail::CollectionStorageValidator<W2>()(this->size(), 0408 other.storage().size()); 0409 return *this; 0410 } 0411 0412 template<class T, Ownership W, MemSpace M, class I> 0413 template<Ownership W2, MemSpace M2> 0414 Collection<T, W, M, I>& 0415 Collection<T, W, M, I>::operator=(Collection<T, W2, M2, I>& other) 0416 { 0417 detail::copy_collection(other.storage_, &storage_); 0418 detail::CollectionStorageValidator<W2>()(this->size(), 0419 other.storage().size()); 0420 return *this; 0421 } 0422 //!@} 0423 0424 //---------------------------------------------------------------------------// 0425 /*! 0426 * Access a single element. 0427 */ 0428 template<class T, Ownership W, MemSpace M, class I> 0429 CELER_FUNCTION auto 0430 Collection<T, W, M, I>::operator[](ItemIdT i) -> reference_type 0431 { 0432 CELER_EXPECT(i < this->size()); 0433 return this->storage()[i.unchecked_get()]; 0434 } 0435 0436 //---------------------------------------------------------------------------// 0437 /*! 0438 * Access a single element (const). 0439 */ 0440 template<class T, Ownership W, MemSpace M, class I> 0441 CELER_FUNCTION auto 0442 Collection<T, W, M, I>::operator[](ItemIdT i) const -> const_reference_type 0443 { 0444 CELER_EXPECT(i < this->size()); 0445 return this->storage()[i.unchecked_get()]; 0446 } 0447 0448 //---------------------------------------------------------------------------// 0449 /*! 0450 * Access a subset of the data as a Span. 0451 */ 0452 template<class T, Ownership W, MemSpace M, class I> 0453 CELER_FUNCTION auto Collection<T, W, M, I>::operator[](ItemRangeT ps) -> SpanT 0454 { 0455 CELER_EXPECT(*ps.begin() <= *ps.end()); 0456 CELER_EXPECT(*ps.end() < this->size() + 1); 0457 auto* data = this->storage().data(); 0458 return {data + ps.begin()->unchecked_get(), 0459 data + ps.end()->unchecked_get()}; 0460 } 0461 0462 //---------------------------------------------------------------------------// 0463 /*! 0464 * Access a subset of the data as a Span (const). 0465 */ 0466 template<class T, Ownership W, MemSpace M, class I> 0467 CELER_FUNCTION auto 0468 Collection<T, W, M, I>::operator[](ItemRangeT ps) const -> SpanConstT 0469 { 0470 CELER_EXPECT(*ps.begin() <= *ps.end()); 0471 CELER_EXPECT(*ps.end() < this->size() + 1); 0472 auto* data = this->storage().data(); 0473 return {data + ps.begin()->unchecked_get(), 0474 data + ps.end()->unchecked_get()}; 0475 } 0476 0477 //---------------------------------------------------------------------------// 0478 /*! 0479 * Access all of the data as a Span. 0480 */ 0481 template<class T, Ownership W, MemSpace M, class I> 0482 CELER_FUNCTION auto Collection<T, W, M, I>::operator[](AllItemsT) -> SpanT 0483 { 0484 return {this->storage().data(), this->storage().size()}; 0485 } 0486 0487 //---------------------------------------------------------------------------// 0488 /*! 0489 * Access all of the data as a Span (const). 0490 */ 0491 template<class T, Ownership W, MemSpace M, class I> 0492 CELER_FUNCTION auto 0493 Collection<T, W, M, I>::operator[](AllItemsT) const -> SpanConstT 0494 { 0495 return {this->storage().data(), this->storage().size()}; 0496 } 0497 0498 //---------------------------------------------------------------------------// 0499 } // namespace celeritas
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |