Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:54:50

0001 //----------------------------------*-C++-*----------------------------------//
0002 // Copyright 2020-2024 UT-Battelle, LLC, and other Celeritas developers.
0003 // See the top-level COPYRIGHT file for details.
0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0005 //---------------------------------------------------------------------------//
0006 /*!
0007  * \file Assert.hh
0008  * \brief Macros, exceptions, and helpers for assertions and error handling.
0009  *
0010  * This defines host- and device-compatible assertion macros that are toggled
0011  * on the \c CELERITAS_DEBUG and \c CELERITAS_DEVICE_DEBUG configure macros.
0012  */
0013 //---------------------------------------------------------------------------//
0014 #pragma once
0015 
0016 #include <stdexcept>
0017 #include <string>
0018 #if defined(__HIP__)
0019 #    include <hip/hip_runtime.h>
0020 #elif defined(__CUDA_ARCH__)
0021 // No assert header needed for CUDA
0022 #else
0023 #    include <ostream>  // IWYU pragma: export
0024 #    include <sstream>  // IWYU pragma: keep
0025 #endif
0026 
0027 #include "corecel/Config.hh"
0028 
0029 #include "Macros.hh"
0030 
0031 //---------------------------------------------------------------------------//
0032 // MACROS
0033 //---------------------------------------------------------------------------//
0034 /*!
0035  * \def CELER_EXPECT
0036  *
0037  * Precondition debug assertion macro. We "expect" that the input values
0038  * or initial state satisfy a precondition, and we throw exception in
0039  * debug mode if they do not.
0040  */
0041 /*!
0042  * \def CELER_ASSERT
0043  *
0044  * Internal debug assertion macro. This replaces standard \c assert usage.
0045  */
0046 /*!
0047  * \def CELER_ENSURE
0048  *
0049  * Postcondition debug assertion macro. Use to "ensure" that return values or
0050  * side effects are as expected when leaving a function.
0051  */
0052 /*!
0053  * \def CELER_ASSUME
0054  *
0055  * Always-on compiler assumption. This should be used very rarely: you should
0056  * make sure the resulting assembly is simplified in optimize mode from using
0057  * the assumption. For example, sometimes informing the compiler of an
0058  * assumption can reduce code bloat by skipping standard library exception
0059  * handling code (e.g. in \c std::visit by assuming \c
0060  * !var_obj.valueless_by_exception() ).
0061  */
0062 /*!
0063  * \def CELER_VALIDATE
0064  *
0065  * Always-on runtime assertion macro. This can check user input and input data
0066  * consistency, and will raise RuntimeError on failure with a descriptive error
0067  * message that is streamed as the second argument. This macro cannot be used
0068  * in \c __device__ -annotated code.
0069  *
0070  * The error message should read: \verbatim
0071    "<PROBLEM> (<WHY IT'S A PROBLEM>) <SUGGESTION>?"
0072   \endverbatim
0073  *
0074  * Examples with correct casing and punctuation:
0075  * - "failed to open '{filename}' (should contain relaxation data)"
0076  * - "unexpected end of file '{filename}' (data is inconsistent with
0077  *   boundaries)"
0078  * - "MPI was not initialized (needed to construct a communicator). Maybe set
0079  *   the environment variable CELER_DISABLE_PARALLEL=1 to disable
0080  *   externally?"
0081  * - "invalid min_range={opts.min_range} (must be positive)"
0082  *
0083  * This looks in pracice like:
0084  * \code
0085    CELER_VALIDATE(file_stream,
0086                   << "failed to open '" << filename
0087                   << "' (should contain relaxation data)");
0088  * \endcode
0089  *
0090  * An always-on debug-type assertion without a detailed message can be
0091  * constructed by omitting the stream (but leaving the comma):
0092  * \code
0093     CELER_VALIDATE(file_stream,);
0094  * \endcode
0095  */
0096 /*!
0097  * \def CELER_DEBUG_FAIL
0098  *
0099  * Throw a debug assertion regardless of the \c CELERITAS_DEBUG setting. This
0100  * is used internally but is also useful for catching subtle programming errors
0101  * in downstream code.
0102  */
0103 /*!
0104  * \def CELER_ASSERT_UNREACHABLE
0105  *
0106  * Throw an assertion if the code point is reached. When debug assertions are
0107  * turned off, this changes to a compiler hint that improves optimization (and
0108  * may force the coded to exit uncermoniously if the point is encountered,
0109  * rather than continuing on with undefined behavior).
0110  */
0111 /*!
0112  * \def CELER_NOT_CONFIGURED
0113  *
0114  * Assert if the code point is reached because an optional feature is disabled.
0115  * This generally should be used for the constructors of dummy class
0116  * definitions in, e.g., \c Foo.nocuda.cc:
0117  * \code
0118     Foo::Foo()
0119     {
0120         CELER_NOT_CONFIGURED("CUDA");
0121     }
0122  * \endcode
0123  */
0124 /*!
0125  * \def CELER_NOT_IMPLEMENTED
0126  *
0127  * Assert if the code point is reached because a feature has yet to be fully
0128  * implemented.
0129  *
0130  * This placeholder is so that code paths can be "declared but not defined" and
0131  * implementations safely postponed in a greppable manner. This should \em not
0132  * be used to define "unused" overrides for virtual classes. A correct use case
0133  * would be:
0134  * \code
0135    if (z > AtomicNumber{26})
0136    {
0137        CELER_NOT_IMPLEMENTED("physics for heavy nuclides");
0138    }
0139  * \endcode
0140  */
0141 
0142 //! \cond
0143 
0144 #if !defined(__HIP__) && !defined(__CUDA_ARCH__)
0145 // Throw in host code
0146 #    define CELER_DEBUG_THROW_(MSG, WHICH) \
0147         throw ::celeritas::DebugError(     \
0148             {::celeritas::DebugErrorType::WHICH, MSG, __FILE__, __LINE__})
0149 #elif defined(__CUDA_ARCH__) && !defined(NDEBUG)
0150 // Use the assert macro for CUDA when supported
0151 #    define CELER_DEBUG_THROW_(MSG, WHICH) \
0152         assert(false && sizeof(#WHICH ": " MSG))
0153 #else
0154 // Use a special device function to emulate assertion failure if HIP
0155 // (assertion from multiple threads simultaeously can cause unexpected device
0156 // failures on AMD hardware) or if NDEBUG is in use with CUDA
0157 #    define CELER_DEBUG_THROW_(MSG, WHICH) \
0158         ::celeritas::device_debug_error(   \
0159             ::celeritas::DebugErrorType::WHICH, MSG, __FILE__, __LINE__)
0160 #endif
0161 
0162 #define CELER_DEBUG_ASSERT_(COND, WHICH)      \
0163     do                                        \
0164     {                                         \
0165         if (CELER_UNLIKELY(!(COND)))          \
0166         {                                     \
0167             CELER_DEBUG_THROW_(#COND, WHICH); \
0168         }                                     \
0169     } while (0)
0170 #define CELER_NDEBUG_ASSUME_(COND)      \
0171     do                                  \
0172     {                                   \
0173         if (CELER_UNLIKELY(!(COND)))    \
0174         {                               \
0175             ::celeritas::unreachable(); \
0176         }                               \
0177     } while (0)
0178 #define CELER_NOASSERT_(COND)   \
0179     do                          \
0180     {                           \
0181         if (false && (COND)) {} \
0182     } while (0)
0183 //! \endcond
0184 
0185 #define CELER_DEBUG_FAIL(MSG, WHICH)    \
0186     do                                  \
0187     {                                   \
0188         CELER_DEBUG_THROW_(MSG, WHICH); \
0189         ::celeritas::unreachable();     \
0190     } while (0)
0191 
0192 #if !CELER_DEVICE_COMPILE
0193 #    define CELER_RUNTIME_THROW(WHICH, WHAT, COND) \
0194         throw ::celeritas::RuntimeError({          \
0195             WHICH,                                 \
0196             WHAT,                                  \
0197             COND,                                  \
0198             __FILE__,                              \
0199             __LINE__,                              \
0200         })
0201 #elif CELERITAS_DEBUG
0202 #    define CELER_RUNTIME_THROW(WHICH, WHAT, COND)                           \
0203         CELER_DEBUG_FAIL("Runtime errors cannot be thrown from device code", \
0204                          unreachable);
0205 #else
0206 // Avoid printf statements which can add substantially to local memory
0207 #    define CELER_RUNTIME_THROW(WHICH, WHAT, COND) ::celeritas::unreachable()
0208 #endif
0209 
0210 #if (CELERITAS_DEBUG && !CELER_DEVICE_COMPILE) \
0211     || (CELERITAS_DEVICE_DEBUG && CELER_DEVICE_COMPILE)
0212 #    define CELER_EXPECT(COND) CELER_DEBUG_ASSERT_(COND, precondition)
0213 #    define CELER_ASSERT(COND) CELER_DEBUG_ASSERT_(COND, internal)
0214 #    define CELER_ENSURE(COND) CELER_DEBUG_ASSERT_(COND, postcondition)
0215 #    define CELER_ASSUME(COND) CELER_DEBUG_ASSERT_(COND, assumption)
0216 #    define CELER_ASSERT_UNREACHABLE() \
0217         CELER_DEBUG_FAIL("unreachable code point encountered", unreachable)
0218 #else
0219 #    define CELER_EXPECT(COND) CELER_NOASSERT_(COND)
0220 #    define CELER_ASSERT(COND) CELER_NOASSERT_(COND)
0221 #    define CELER_ENSURE(COND) CELER_NOASSERT_(COND)
0222 #    define CELER_ASSUME(COND) CELER_NDEBUG_ASSUME_(COND)
0223 #    define CELER_ASSERT_UNREACHABLE() ::celeritas::unreachable()
0224 #endif
0225 
0226 #if !CELER_DEVICE_COMPILE
0227 #    define CELER_VALIDATE(COND, MSG)                            \
0228         do                                                       \
0229         {                                                        \
0230             if (CELER_UNLIKELY(!(COND)))                         \
0231             {                                                    \
0232                 std::ostringstream celer_runtime_msg_;           \
0233                 celer_runtime_msg_ MSG;                          \
0234                 CELER_RUNTIME_THROW(                             \
0235                     ::celeritas::RuntimeError::validate_err_str, \
0236                     celer_runtime_msg_.str(),                    \
0237                     #COND);                                      \
0238             }                                                    \
0239         } while (0)
0240 #else
0241 #    define CELER_VALIDATE(COND, MSG) CELER_RUNTIME_THROW(nullptr, "", #COND)
0242 #endif
0243 
0244 #define CELER_NOT_CONFIGURED(WHAT) \
0245     CELER_RUNTIME_THROW(           \
0246         ::celeritas::RuntimeError::not_config_err_str, WHAT, {})
0247 #define CELER_NOT_IMPLEMENTED(WHAT) \
0248     CELER_RUNTIME_THROW(::celeritas::RuntimeError::not_impl_err_str, WHAT, {})
0249 
0250 /*!
0251  * \def CELER_CUDA_CALL
0252  *
0253  * When CUDA support is enabled, execute the wrapped statement and throw a
0254  * RuntimeError if it fails. If CUDA is disabled, throw an unconfigured
0255  * assertion.
0256  *
0257  * If it fails, we call \c cudaGetLastError to clear the error code. Note that
0258  * this will \em not clear the code in a few fatal error cases (kernel
0259  * assertion failure, invalid memory access) and all subsequent CUDA calls will
0260  * fail.
0261  *
0262  * \code
0263    CELER_CUDA_CALL(cudaMalloc(&ptr_gpu, 100 * sizeof(float)));
0264    CELER_CUDA_CALL(cudaDeviceSynchronize());
0265  * \endcode
0266  *
0267  * \note A file that uses this macro must include \c
0268  * corecel/DeviceRuntimeApi.hh .
0269  */
0270 #if CELERITAS_USE_CUDA
0271 #    define CELER_CUDA_CALL(STATEMENT)                                     \
0272         do                                                                 \
0273         {                                                                  \
0274             cudaError_t cuda_result_ = (STATEMENT);                        \
0275             if (CELER_UNLIKELY(cuda_result_ != cudaSuccess))               \
0276             {                                                              \
0277                 CELER_RUNTIME_THROW(                                       \
0278                     "CUDA", cudaGetErrorString(cuda_result_), #STATEMENT); \
0279             }                                                              \
0280         } while (0)
0281 #else
0282 #    define CELER_CUDA_CALL(STATEMENT)               \
0283         do                                           \
0284         {                                            \
0285             CELER_NOT_CONFIGURED("CUDA");            \
0286             CELER_DISCARD(CorecelDeviceRuntimeApiHh) \
0287         } while (0)
0288 #endif
0289 
0290 /*!
0291  * \def CELER_HIP_CALL
0292  *
0293  * When HIP support is enabled, execute the wrapped statement and throw a
0294  * RuntimeError if it fails. If HIP is disabled, throw an unconfigured
0295  * assertion.
0296  *
0297  * If it fails, we call \c hipGetLastError to clear the error code.
0298  *
0299  * \code
0300    CELER_HIP_CALL(hipMalloc(&ptr_gpu, 100 * sizeof(float)));
0301    CELER_HIP_CALL(hipDeviceSynchronize());
0302  * \endcode
0303  *
0304  * \note A file that uses this macro must include \c
0305  * corecel/DeviceRuntimeApi.hh . The \c CorecelDeviceRuntimeApiHh
0306  * declaration enforces this when HIP is disabled.
0307  */
0308 #if CELERITAS_USE_HIP
0309 #    define CELER_HIP_CALL(STATEMENT)                                   \
0310         do                                                              \
0311         {                                                               \
0312             hipError_t hip_result_ = (STATEMENT);                       \
0313             if (CELER_UNLIKELY(hip_result_ != hipSuccess))              \
0314             {                                                           \
0315                 hip_result_ = hipGetLastError();                        \
0316                 CELER_RUNTIME_THROW(                                    \
0317                     "HIP", hipGetErrorString(hip_result_), #STATEMENT); \
0318             }                                                           \
0319         } while (0)
0320 #else
0321 #    define CELER_HIP_CALL(STATEMENT)                \
0322         do                                           \
0323         {                                            \
0324             CELER_NOT_CONFIGURED("HIP");             \
0325             CELER_DISCARD(CorecelDeviceRuntimeApiHh) \
0326         } while (0)
0327 #endif
0328 
0329 /*!
0330  * \def CELER_DEVICE_CALL_PREFIX
0331  *
0332  * Prepend the argument with "cuda" or "hip" and call with the appropriate
0333  * checking statement as above.
0334  *
0335  * Example:
0336  *
0337  * \code
0338    CELER_DEVICE_CALL_PREFIX(Malloc(&ptr_gpu, 100 * sizeof(float)));
0339    CELER_DEVICE_CALL_PREFIX(DeviceSynchronize());
0340  * \endcode
0341  *
0342  * \note A file that uses this macro must include \c
0343  * corecel/DeviceRuntimeApi.hh . The \c CorecelDeviceRuntimeApiHh
0344  * declaration enforces this when CUDA/HIP are disabled.
0345  */
0346 #if CELERITAS_USE_CUDA
0347 #    define CELER_DEVICE_CALL_PREFIX(STMT) CELER_CUDA_CALL(cuda##STMT)
0348 #elif CELERITAS_USE_HIP
0349 #    define CELER_DEVICE_CALL_PREFIX(STMT) CELER_HIP_CALL(hip##STMT)
0350 #else
0351 #    define CELER_DEVICE_CALL_PREFIX(STMT)           \
0352         do                                           \
0353         {                                            \
0354             CELER_NOT_CONFIGURED("CUDA or HIP");     \
0355             CELER_DISCARD(CorecelDeviceRuntimeApiHh) \
0356         } while (0)
0357 #endif
0358 
0359 /*!
0360  * \def CELER_DEVICE_CHECK_ERROR
0361  *
0362  * After a kernel launch or other call, check that no CUDA errors have
0363  * occurred. This is also useful for checking success after external CUDA
0364  * libraries have been called.
0365  */
0366 #define CELER_DEVICE_CHECK_ERROR() CELER_DEVICE_CALL_PREFIX(PeekAtLastError())
0367 
0368 /*!
0369  * \def CELER_MPI_CALL
0370  *
0371  * When MPI support is enabled, execute the wrapped statement and throw a
0372  * RuntimeError if it fails. If MPI is disabled, throw an unconfigured
0373  * assertion.
0374  *
0375  * \note A file that uses this macro must include \c mpi.h.
0376  */
0377 #if CELERITAS_USE_MPI
0378 #    define CELER_MPI_CALL(STATEMENT)                                     \
0379         do                                                                \
0380         {                                                                 \
0381             int mpi_result_ = (STATEMENT);                                \
0382             if (CELER_UNLIKELY(mpi_result_ != MPI_SUCCESS))               \
0383             {                                                             \
0384                 CELER_RUNTIME_THROW(                                      \
0385                     "MPI", mpi_error_to_string(mpi_result_), #STATEMENT); \
0386             }                                                             \
0387         } while (0)
0388 #else
0389 #    define CELER_MPI_CALL(STATEMENT)    \
0390         do                               \
0391         {                                \
0392             CELER_NOT_CONFIGURED("MPI"); \
0393         } while (0)
0394 #endif
0395 
0396 //---------------------------------------------------------------------------//
0397 // ENUMERATIONS
0398 //---------------------------------------------------------------------------//
0399 
0400 namespace celeritas
0401 {
0402 //---------------------------------------------------------------------------//
0403 // ENUMERATIONS
0404 //---------------------------------------------------------------------------//
0405 enum class DebugErrorType
0406 {
0407     precondition,  //!< Precondition contract violation
0408     internal,  //!< Internal assertion check failure
0409     unreachable,  //!< Internal assertion: unreachable code path
0410     postcondition,  //!< Postcondition contract violation
0411     assumption,  //!< "Assume" violation
0412 };
0413 
0414 //! Detailed properties of a debug assertion failure
0415 struct DebugErrorDetails
0416 {
0417     DebugErrorType which;
0418     char const* condition;
0419     char const* file;
0420     int line;
0421 };
0422 
0423 //! Detailed properties of a runtime error
0424 struct RuntimeErrorDetails
0425 {
0426     char const* which{nullptr};  //!< Type of error (runtime, Geant4, MPI)
0427     std::string what{};  //!< Descriptive message
0428     std::string condition{};  //!< Code/test that failed
0429     std::string file{};  //!< Source file
0430     int line{0};  //!< Source line
0431 };
0432 
0433 //---------------------------------------------------------------------------//
0434 // FUNCTIONS
0435 //---------------------------------------------------------------------------//
0436 
0437 //! Invoke undefined behavior
0438 [[noreturn]] inline CELER_FUNCTION void unreachable()
0439 {
0440     CELER_UNREACHABLE;
0441 }
0442 
0443 // Get a pretty string version of a debug error
0444 char const* to_cstring(DebugErrorType which);
0445 
0446 // Get an MPI error string
0447 std::string mpi_error_to_string(int);
0448 
0449 //---------------------------------------------------------------------------//
0450 // TYPES
0451 //---------------------------------------------------------------------------//
0452 // Forward declaration of simple struct with pointer to an NLJSON object
0453 struct JsonPimpl;
0454 
0455 //---------------------------------------------------------------------------//
0456 /*!
0457  * Error thrown by Celeritas assertions.
0458  */
0459 class DebugError : public std::logic_error
0460 {
0461   public:
0462     // Construct from debug attributes
0463     explicit DebugError(DebugErrorDetails&&);
0464 
0465     //! Access the debug data
0466     DebugErrorDetails const& details() const { return details_; }
0467 
0468   private:
0469     DebugErrorDetails details_;
0470 };
0471 
0472 //---------------------------------------------------------------------------//
0473 /*!
0474  * Error thrown by working code from unexpected runtime conditions.
0475  */
0476 class RuntimeError : public std::runtime_error
0477 {
0478   public:
0479     // Construct from details
0480     explicit RuntimeError(RuntimeErrorDetails&&);
0481 
0482     //! Access detailed information
0483     RuntimeErrorDetails const& details() const { return details_; }
0484 
0485     //!@{
0486     //! String constants for "which" error message
0487     static char const validate_err_str[];
0488     static char const not_config_err_str[];
0489     static char const not_impl_err_str[];
0490     //!@}
0491 
0492   private:
0493     RuntimeErrorDetails details_;
0494 };
0495 
0496 //---------------------------------------------------------------------------//
0497 /*!
0498  * Base class for writing arbitrary exception context to JSON.
0499  *
0500  * This can be overridden in higher-level parts of the code for specific needs
0501  * (e.g., writing thread, event, and track contexts in Celeritas solver
0502  * kernels). Note that in order for derived classes to work with
0503  * `std::throw_with_nested`, they *MUST NOT* be `final`.
0504  */
0505 class RichContextException : public std::exception
0506 {
0507   public:
0508     //! Write output to the given JSON object
0509     virtual void output(JsonPimpl*) const = 0;
0510 
0511     //! Provide the name for this exception class
0512     virtual char const* type() const = 0;
0513 };
0514 
0515 //---------------------------------------------------------------------------//
0516 // INLINE FUNCTION DEFINITIONS
0517 //---------------------------------------------------------------------------//
0518 
0519 #if defined(__CUDA_ARCH__) && defined(NDEBUG)
0520 //! Host+device definition for CUDA when \c assert is unavailable
0521 inline __attribute__((noinline)) __host__ __device__ void device_debug_error(
0522     DebugErrorType, char const* condition, char const* file, int line)
0523 {
0524     printf("%s:%u:\nceleritas: internal assertion failed: %s\n",
0525            file,
0526            line,
0527            condition);
0528     __trap();
0529 }
0530 #elif defined(__HIP__)
0531 //! Host-only HIP call (whether or not NDEBUG is in use)
0532 inline __host__ void device_debug_error(DebugErrorType which,
0533                                         char const* condition,
0534                                         char const* file,
0535                                         int line)
0536 {
0537     throw DebugError({which, condition, file, line});
0538 }
0539 
0540 //! Device-only call for HIP (must always be declared; only used if
0541 //! NDEBUG)
0542 inline __attribute__((noinline)) __device__ void device_debug_error(
0543     DebugErrorType, char const* condition, char const* file, int line)
0544 {
0545     printf("%s:%u:\nceleritas: internal assertion failed: %s\n",
0546            file,
0547            line,
0548            condition);
0549     abort();
0550 }
0551 #endif
0552 
0553 //---------------------------------------------------------------------------//
0554 }  // namespace celeritas