celeritas/inp/Control.hh

0001 //------------------------------- -*- C++ -*- -------------------------------//
0002 // Copyright Celeritas contributors: see top-level COPYRIGHT file for details
0003 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0004 //---------------------------------------------------------------------------//
0005 //! \file celeritas/inp/Control.hh
0006 //---------------------------------------------------------------------------//
0007 #pragma once
0008
0009 #include <optional>
0010
0011 #include "celeritas/Types.hh"
0012
0013 namespace celeritas
0014 {
0015 namespace inp
0016 {
0017 //---------------------------------------------------------------------------//
0018 /*!
0019  * Set up per-process state/buffer capacities.
0020  *
0021  * Increasing these values increases resource requirements with the trade-off
0022  * of (usually!) improving performance. A larger number of \c tracks in flight
0023  * means improved performance on GPU because the standard kernel size
0024  * increases, but it also means higher memory usage because of the larger
0025  * number of full states. More \c initializers are necessary for more (and
0026  * higher-energy) tracks when lots of particles are in flight and producing new
0027  * child particles. More \c secondaries may be necessary if physical processes
0028  * that produce many daughters (e.g., atomic relaxation or Bertini cascade) are
0029  * active. The number of \c events in flight primarily increases the number of
0030  * active tracks, possible initializers, and produced secondaries (NOTE: see
0031  * [#1233](https://github.com/celeritas-project/celeritas/issues/1233) ).
0032  * Finally, the number of \c primaries is the maximum number of pending tracks
0033  * from an external application before running a kernel to construct \c
0034  * initializers and execute the stpeping loop.
0035  *
0036  * Capacities are defined as the number per application process (task): this
0037  * means that in a multithreaded context it implies "strong scaling" (i.e., the
0038  * allocations are divided among threads), and in a multiprocess context it
0039  * implies "weak scaling" (the problem size grows with the number of
0040  * processes).
0041  * In other words, if used in a multithread "event-parallel" context, each
0042  * state gets the specified \c tracks divided by the number of threads.  When
0043  * used in MPI parallel (e.g., one process per GPU), each process \em rank has
0044  * \c tracks total threads.
0045  *
0046  * \note The \c primaries was previously named \c auto_flush .
0047  * \note Previously, \c SetupOptions and \c celer-g4 treated these quantities
0048  * as "per stream" whereas \c celer-sim used "per process".
0049  *
0050  * \todo Some of these parameters will be more automated in the future.
0051  */
0052 struct StateCapacity
0053 {
0054     //! Maximum number of primaries that can be buffered before stepping
0055     size_type primaries{};
0056     //! Maximum number of queued primaries+secondaries
0057     size_type initializers{};
0058     //! Maximum number of track slots to be simultaneously stepped
0059     size_type tracks{};
0060 };
0061
0062 //---------------------------------------------------------------------------//
0063 /*!
0064  * Set up per-process state/buffer capacities for the main tracking loop.
0065  *
0066  * \note The \c primaries was previously named \c auto_flush .
0067  * \note Previously, \c SetupOptions and \c celer-g4 treated these quantities
0068  * as "per stream" whereas \c celer-sim used "per process".
0069  *
0070  * Defaults:
0071  * - \c secondaries: twice the number of track slots
0072  * - \c events: single event runs at a time
0073  */
0074 struct CoreStateCapacity : StateCapacity
0075 {
0076     //! Maximum number of secondaries created per step
0077     std::optional<size_type> secondaries;
0078
0079     //! Maximum number of simultaneous events (zero for doing one event at a
0080     //! time)
0081     std::optional<size_type> events;
0082 };
0083
0084 //---------------------------------------------------------------------------//
0085 /*!
0086  * Set up per-process state/buffer capacities for the optical tracking loop.
0087  *
0088  * \note \c generators was previously named \c buffer_capacity .
0089  */
0090 struct OpticalStateCapacity : StateCapacity
0091 {
0092     //! Maximum number of queued photon-generating steps
0093     size_type generators{};
0094 };
0095
0096 //---------------------------------------------------------------------------//
0097 /*!
0098  * When using GPU, change execution options that make it easier to debug.
0099  *
0100  * Defaults:
0101  * - \c sync_stream: \c false unless \c timers.diagnostics.action is \c true.
0102  */
0103 struct DeviceDebug
0104 {
0105     //! Synchronize the stream after every kernel launch
0106     std::optional<bool> sync_stream;
0107 };
0108
0109 //---------------------------------------------------------------------------//
0110 /*!
0111  * Set up control/tuning parameters that do not affect physics.
0112  *
0113  * Defaults:
0114  * - \c device_debug: absent unless device is enabled
0115  * - \c optical_capacity: absent unless optical physics is enabled
0116  * - \c track_order: \c init_charge on GPU, \c none on CPU
0117  */
0118 struct Control
0119 {
0120     //! Per-process state sizes
0121     CoreStateCapacity capacity;
0122
0123     //! Per-process state sizes for *optical* tracking loop
0124     std::optional<OpticalStateCapacity> optical_capacity;
0125
0126     //! Number of streams
0127     size_type num_streams{};
0128
0129     //! Track sorting and initialization
0130     std::optional<TrackOrder> track_order;
0131
0132     //! Debug options for device
0133     std::optional<DeviceDebug> device_debug;
0134
0135     //! Perform a no-op step at the beginning to improve timing measurements
0136     bool warm_up{false};
0137
0138     //! Random number generator seed
0139     unsigned int seed{};
0140 };
0141
0142 //---------------------------------------------------------------------------//
0143 }  // namespace inp
0144 }  // namespace celeritas