|
||||
File indexing completed on 2025-01-18 09:54:50
0001 //----------------------------------*-C++-*----------------------------------// 0002 // Copyright 2020-2024 UT-Battelle, LLC, and other Celeritas developers. 0003 // See the top-level COPYRIGHT file for details. 0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT) 0005 //---------------------------------------------------------------------------// 0006 /*! 0007 * \file Assert.hh 0008 * \brief Macros, exceptions, and helpers for assertions and error handling. 0009 * 0010 * This defines host- and device-compatible assertion macros that are toggled 0011 * on the \c CELERITAS_DEBUG and \c CELERITAS_DEVICE_DEBUG configure macros. 0012 */ 0013 //---------------------------------------------------------------------------// 0014 #pragma once 0015 0016 #include <stdexcept> 0017 #include <string> 0018 #if defined(__HIP__) 0019 # include <hip/hip_runtime.h> 0020 #elif defined(__CUDA_ARCH__) 0021 // No assert header needed for CUDA 0022 #else 0023 # include <ostream> // IWYU pragma: export 0024 # include <sstream> // IWYU pragma: keep 0025 #endif 0026 0027 #include "corecel/Config.hh" 0028 0029 #include "Macros.hh" 0030 0031 //---------------------------------------------------------------------------// 0032 // MACROS 0033 //---------------------------------------------------------------------------// 0034 /*! 0035 * \def CELER_EXPECT 0036 * 0037 * Precondition debug assertion macro. We "expect" that the input values 0038 * or initial state satisfy a precondition, and we throw exception in 0039 * debug mode if they do not. 0040 */ 0041 /*! 0042 * \def CELER_ASSERT 0043 * 0044 * Internal debug assertion macro. This replaces standard \c assert usage. 0045 */ 0046 /*! 0047 * \def CELER_ENSURE 0048 * 0049 * Postcondition debug assertion macro. Use to "ensure" that return values or 0050 * side effects are as expected when leaving a function. 0051 */ 0052 /*! 0053 * \def CELER_ASSUME 0054 * 0055 * Always-on compiler assumption. This should be used very rarely: you should 0056 * make sure the resulting assembly is simplified in optimize mode from using 0057 * the assumption. For example, sometimes informing the compiler of an 0058 * assumption can reduce code bloat by skipping standard library exception 0059 * handling code (e.g. in \c std::visit by assuming \c 0060 * !var_obj.valueless_by_exception() ). 0061 */ 0062 /*! 0063 * \def CELER_VALIDATE 0064 * 0065 * Always-on runtime assertion macro. This can check user input and input data 0066 * consistency, and will raise RuntimeError on failure with a descriptive error 0067 * message that is streamed as the second argument. This macro cannot be used 0068 * in \c __device__ -annotated code. 0069 * 0070 * The error message should read: \verbatim 0071 "<PROBLEM> (<WHY IT'S A PROBLEM>) <SUGGESTION>?" 0072 \endverbatim 0073 * 0074 * Examples with correct casing and punctuation: 0075 * - "failed to open '{filename}' (should contain relaxation data)" 0076 * - "unexpected end of file '{filename}' (data is inconsistent with 0077 * boundaries)" 0078 * - "MPI was not initialized (needed to construct a communicator). Maybe set 0079 * the environment variable CELER_DISABLE_PARALLEL=1 to disable 0080 * externally?" 0081 * - "invalid min_range={opts.min_range} (must be positive)" 0082 * 0083 * This looks in pracice like: 0084 * \code 0085 CELER_VALIDATE(file_stream, 0086 << "failed to open '" << filename 0087 << "' (should contain relaxation data)"); 0088 * \endcode 0089 * 0090 * An always-on debug-type assertion without a detailed message can be 0091 * constructed by omitting the stream (but leaving the comma): 0092 * \code 0093 CELER_VALIDATE(file_stream,); 0094 * \endcode 0095 */ 0096 /*! 0097 * \def CELER_DEBUG_FAIL 0098 * 0099 * Throw a debug assertion regardless of the \c CELERITAS_DEBUG setting. This 0100 * is used internally but is also useful for catching subtle programming errors 0101 * in downstream code. 0102 */ 0103 /*! 0104 * \def CELER_ASSERT_UNREACHABLE 0105 * 0106 * Throw an assertion if the code point is reached. When debug assertions are 0107 * turned off, this changes to a compiler hint that improves optimization (and 0108 * may force the coded to exit uncermoniously if the point is encountered, 0109 * rather than continuing on with undefined behavior). 0110 */ 0111 /*! 0112 * \def CELER_NOT_CONFIGURED 0113 * 0114 * Assert if the code point is reached because an optional feature is disabled. 0115 * This generally should be used for the constructors of dummy class 0116 * definitions in, e.g., \c Foo.nocuda.cc: 0117 * \code 0118 Foo::Foo() 0119 { 0120 CELER_NOT_CONFIGURED("CUDA"); 0121 } 0122 * \endcode 0123 */ 0124 /*! 0125 * \def CELER_NOT_IMPLEMENTED 0126 * 0127 * Assert if the code point is reached because a feature has yet to be fully 0128 * implemented. 0129 * 0130 * This placeholder is so that code paths can be "declared but not defined" and 0131 * implementations safely postponed in a greppable manner. This should \em not 0132 * be used to define "unused" overrides for virtual classes. A correct use case 0133 * would be: 0134 * \code 0135 if (z > AtomicNumber{26}) 0136 { 0137 CELER_NOT_IMPLEMENTED("physics for heavy nuclides"); 0138 } 0139 * \endcode 0140 */ 0141 0142 //! \cond 0143 0144 #if !defined(__HIP__) && !defined(__CUDA_ARCH__) 0145 // Throw in host code 0146 # define CELER_DEBUG_THROW_(MSG, WHICH) \ 0147 throw ::celeritas::DebugError( \ 0148 {::celeritas::DebugErrorType::WHICH, MSG, __FILE__, __LINE__}) 0149 #elif defined(__CUDA_ARCH__) && !defined(NDEBUG) 0150 // Use the assert macro for CUDA when supported 0151 # define CELER_DEBUG_THROW_(MSG, WHICH) \ 0152 assert(false && sizeof(#WHICH ": " MSG)) 0153 #else 0154 // Use a special device function to emulate assertion failure if HIP 0155 // (assertion from multiple threads simultaeously can cause unexpected device 0156 // failures on AMD hardware) or if NDEBUG is in use with CUDA 0157 # define CELER_DEBUG_THROW_(MSG, WHICH) \ 0158 ::celeritas::device_debug_error( \ 0159 ::celeritas::DebugErrorType::WHICH, MSG, __FILE__, __LINE__) 0160 #endif 0161 0162 #define CELER_DEBUG_ASSERT_(COND, WHICH) \ 0163 do \ 0164 { \ 0165 if (CELER_UNLIKELY(!(COND))) \ 0166 { \ 0167 CELER_DEBUG_THROW_(#COND, WHICH); \ 0168 } \ 0169 } while (0) 0170 #define CELER_NDEBUG_ASSUME_(COND) \ 0171 do \ 0172 { \ 0173 if (CELER_UNLIKELY(!(COND))) \ 0174 { \ 0175 ::celeritas::unreachable(); \ 0176 } \ 0177 } while (0) 0178 #define CELER_NOASSERT_(COND) \ 0179 do \ 0180 { \ 0181 if (false && (COND)) {} \ 0182 } while (0) 0183 //! \endcond 0184 0185 #define CELER_DEBUG_FAIL(MSG, WHICH) \ 0186 do \ 0187 { \ 0188 CELER_DEBUG_THROW_(MSG, WHICH); \ 0189 ::celeritas::unreachable(); \ 0190 } while (0) 0191 0192 #if !CELER_DEVICE_COMPILE 0193 # define CELER_RUNTIME_THROW(WHICH, WHAT, COND) \ 0194 throw ::celeritas::RuntimeError({ \ 0195 WHICH, \ 0196 WHAT, \ 0197 COND, \ 0198 __FILE__, \ 0199 __LINE__, \ 0200 }) 0201 #elif CELERITAS_DEBUG 0202 # define CELER_RUNTIME_THROW(WHICH, WHAT, COND) \ 0203 CELER_DEBUG_FAIL("Runtime errors cannot be thrown from device code", \ 0204 unreachable); 0205 #else 0206 // Avoid printf statements which can add substantially to local memory 0207 # define CELER_RUNTIME_THROW(WHICH, WHAT, COND) ::celeritas::unreachable() 0208 #endif 0209 0210 #if (CELERITAS_DEBUG && !CELER_DEVICE_COMPILE) \ 0211 || (CELERITAS_DEVICE_DEBUG && CELER_DEVICE_COMPILE) 0212 # define CELER_EXPECT(COND) CELER_DEBUG_ASSERT_(COND, precondition) 0213 # define CELER_ASSERT(COND) CELER_DEBUG_ASSERT_(COND, internal) 0214 # define CELER_ENSURE(COND) CELER_DEBUG_ASSERT_(COND, postcondition) 0215 # define CELER_ASSUME(COND) CELER_DEBUG_ASSERT_(COND, assumption) 0216 # define CELER_ASSERT_UNREACHABLE() \ 0217 CELER_DEBUG_FAIL("unreachable code point encountered", unreachable) 0218 #else 0219 # define CELER_EXPECT(COND) CELER_NOASSERT_(COND) 0220 # define CELER_ASSERT(COND) CELER_NOASSERT_(COND) 0221 # define CELER_ENSURE(COND) CELER_NOASSERT_(COND) 0222 # define CELER_ASSUME(COND) CELER_NDEBUG_ASSUME_(COND) 0223 # define CELER_ASSERT_UNREACHABLE() ::celeritas::unreachable() 0224 #endif 0225 0226 #if !CELER_DEVICE_COMPILE 0227 # define CELER_VALIDATE(COND, MSG) \ 0228 do \ 0229 { \ 0230 if (CELER_UNLIKELY(!(COND))) \ 0231 { \ 0232 std::ostringstream celer_runtime_msg_; \ 0233 celer_runtime_msg_ MSG; \ 0234 CELER_RUNTIME_THROW( \ 0235 ::celeritas::RuntimeError::validate_err_str, \ 0236 celer_runtime_msg_.str(), \ 0237 #COND); \ 0238 } \ 0239 } while (0) 0240 #else 0241 # define CELER_VALIDATE(COND, MSG) CELER_RUNTIME_THROW(nullptr, "", #COND) 0242 #endif 0243 0244 #define CELER_NOT_CONFIGURED(WHAT) \ 0245 CELER_RUNTIME_THROW( \ 0246 ::celeritas::RuntimeError::not_config_err_str, WHAT, {}) 0247 #define CELER_NOT_IMPLEMENTED(WHAT) \ 0248 CELER_RUNTIME_THROW(::celeritas::RuntimeError::not_impl_err_str, WHAT, {}) 0249 0250 /*! 0251 * \def CELER_CUDA_CALL 0252 * 0253 * When CUDA support is enabled, execute the wrapped statement and throw a 0254 * RuntimeError if it fails. If CUDA is disabled, throw an unconfigured 0255 * assertion. 0256 * 0257 * If it fails, we call \c cudaGetLastError to clear the error code. Note that 0258 * this will \em not clear the code in a few fatal error cases (kernel 0259 * assertion failure, invalid memory access) and all subsequent CUDA calls will 0260 * fail. 0261 * 0262 * \code 0263 CELER_CUDA_CALL(cudaMalloc(&ptr_gpu, 100 * sizeof(float))); 0264 CELER_CUDA_CALL(cudaDeviceSynchronize()); 0265 * \endcode 0266 * 0267 * \note A file that uses this macro must include \c 0268 * corecel/DeviceRuntimeApi.hh . 0269 */ 0270 #if CELERITAS_USE_CUDA 0271 # define CELER_CUDA_CALL(STATEMENT) \ 0272 do \ 0273 { \ 0274 cudaError_t cuda_result_ = (STATEMENT); \ 0275 if (CELER_UNLIKELY(cuda_result_ != cudaSuccess)) \ 0276 { \ 0277 CELER_RUNTIME_THROW( \ 0278 "CUDA", cudaGetErrorString(cuda_result_), #STATEMENT); \ 0279 } \ 0280 } while (0) 0281 #else 0282 # define CELER_CUDA_CALL(STATEMENT) \ 0283 do \ 0284 { \ 0285 CELER_NOT_CONFIGURED("CUDA"); \ 0286 CELER_DISCARD(CorecelDeviceRuntimeApiHh) \ 0287 } while (0) 0288 #endif 0289 0290 /*! 0291 * \def CELER_HIP_CALL 0292 * 0293 * When HIP support is enabled, execute the wrapped statement and throw a 0294 * RuntimeError if it fails. If HIP is disabled, throw an unconfigured 0295 * assertion. 0296 * 0297 * If it fails, we call \c hipGetLastError to clear the error code. 0298 * 0299 * \code 0300 CELER_HIP_CALL(hipMalloc(&ptr_gpu, 100 * sizeof(float))); 0301 CELER_HIP_CALL(hipDeviceSynchronize()); 0302 * \endcode 0303 * 0304 * \note A file that uses this macro must include \c 0305 * corecel/DeviceRuntimeApi.hh . The \c CorecelDeviceRuntimeApiHh 0306 * declaration enforces this when HIP is disabled. 0307 */ 0308 #if CELERITAS_USE_HIP 0309 # define CELER_HIP_CALL(STATEMENT) \ 0310 do \ 0311 { \ 0312 hipError_t hip_result_ = (STATEMENT); \ 0313 if (CELER_UNLIKELY(hip_result_ != hipSuccess)) \ 0314 { \ 0315 hip_result_ = hipGetLastError(); \ 0316 CELER_RUNTIME_THROW( \ 0317 "HIP", hipGetErrorString(hip_result_), #STATEMENT); \ 0318 } \ 0319 } while (0) 0320 #else 0321 # define CELER_HIP_CALL(STATEMENT) \ 0322 do \ 0323 { \ 0324 CELER_NOT_CONFIGURED("HIP"); \ 0325 CELER_DISCARD(CorecelDeviceRuntimeApiHh) \ 0326 } while (0) 0327 #endif 0328 0329 /*! 0330 * \def CELER_DEVICE_CALL_PREFIX 0331 * 0332 * Prepend the argument with "cuda" or "hip" and call with the appropriate 0333 * checking statement as above. 0334 * 0335 * Example: 0336 * 0337 * \code 0338 CELER_DEVICE_CALL_PREFIX(Malloc(&ptr_gpu, 100 * sizeof(float))); 0339 CELER_DEVICE_CALL_PREFIX(DeviceSynchronize()); 0340 * \endcode 0341 * 0342 * \note A file that uses this macro must include \c 0343 * corecel/DeviceRuntimeApi.hh . The \c CorecelDeviceRuntimeApiHh 0344 * declaration enforces this when CUDA/HIP are disabled. 0345 */ 0346 #if CELERITAS_USE_CUDA 0347 # define CELER_DEVICE_CALL_PREFIX(STMT) CELER_CUDA_CALL(cuda##STMT) 0348 #elif CELERITAS_USE_HIP 0349 # define CELER_DEVICE_CALL_PREFIX(STMT) CELER_HIP_CALL(hip##STMT) 0350 #else 0351 # define CELER_DEVICE_CALL_PREFIX(STMT) \ 0352 do \ 0353 { \ 0354 CELER_NOT_CONFIGURED("CUDA or HIP"); \ 0355 CELER_DISCARD(CorecelDeviceRuntimeApiHh) \ 0356 } while (0) 0357 #endif 0358 0359 /*! 0360 * \def CELER_DEVICE_CHECK_ERROR 0361 * 0362 * After a kernel launch or other call, check that no CUDA errors have 0363 * occurred. This is also useful for checking success after external CUDA 0364 * libraries have been called. 0365 */ 0366 #define CELER_DEVICE_CHECK_ERROR() CELER_DEVICE_CALL_PREFIX(PeekAtLastError()) 0367 0368 /*! 0369 * \def CELER_MPI_CALL 0370 * 0371 * When MPI support is enabled, execute the wrapped statement and throw a 0372 * RuntimeError if it fails. If MPI is disabled, throw an unconfigured 0373 * assertion. 0374 * 0375 * \note A file that uses this macro must include \c mpi.h. 0376 */ 0377 #if CELERITAS_USE_MPI 0378 # define CELER_MPI_CALL(STATEMENT) \ 0379 do \ 0380 { \ 0381 int mpi_result_ = (STATEMENT); \ 0382 if (CELER_UNLIKELY(mpi_result_ != MPI_SUCCESS)) \ 0383 { \ 0384 CELER_RUNTIME_THROW( \ 0385 "MPI", mpi_error_to_string(mpi_result_), #STATEMENT); \ 0386 } \ 0387 } while (0) 0388 #else 0389 # define CELER_MPI_CALL(STATEMENT) \ 0390 do \ 0391 { \ 0392 CELER_NOT_CONFIGURED("MPI"); \ 0393 } while (0) 0394 #endif 0395 0396 //---------------------------------------------------------------------------// 0397 // ENUMERATIONS 0398 //---------------------------------------------------------------------------// 0399 0400 namespace celeritas 0401 { 0402 //---------------------------------------------------------------------------// 0403 // ENUMERATIONS 0404 //---------------------------------------------------------------------------// 0405 enum class DebugErrorType 0406 { 0407 precondition, //!< Precondition contract violation 0408 internal, //!< Internal assertion check failure 0409 unreachable, //!< Internal assertion: unreachable code path 0410 postcondition, //!< Postcondition contract violation 0411 assumption, //!< "Assume" violation 0412 }; 0413 0414 //! Detailed properties of a debug assertion failure 0415 struct DebugErrorDetails 0416 { 0417 DebugErrorType which; 0418 char const* condition; 0419 char const* file; 0420 int line; 0421 }; 0422 0423 //! Detailed properties of a runtime error 0424 struct RuntimeErrorDetails 0425 { 0426 char const* which{nullptr}; //!< Type of error (runtime, Geant4, MPI) 0427 std::string what{}; //!< Descriptive message 0428 std::string condition{}; //!< Code/test that failed 0429 std::string file{}; //!< Source file 0430 int line{0}; //!< Source line 0431 }; 0432 0433 //---------------------------------------------------------------------------// 0434 // FUNCTIONS 0435 //---------------------------------------------------------------------------// 0436 0437 //! Invoke undefined behavior 0438 [[noreturn]] inline CELER_FUNCTION void unreachable() 0439 { 0440 CELER_UNREACHABLE; 0441 } 0442 0443 // Get a pretty string version of a debug error 0444 char const* to_cstring(DebugErrorType which); 0445 0446 // Get an MPI error string 0447 std::string mpi_error_to_string(int); 0448 0449 //---------------------------------------------------------------------------// 0450 // TYPES 0451 //---------------------------------------------------------------------------// 0452 // Forward declaration of simple struct with pointer to an NLJSON object 0453 struct JsonPimpl; 0454 0455 //---------------------------------------------------------------------------// 0456 /*! 0457 * Error thrown by Celeritas assertions. 0458 */ 0459 class DebugError : public std::logic_error 0460 { 0461 public: 0462 // Construct from debug attributes 0463 explicit DebugError(DebugErrorDetails&&); 0464 0465 //! Access the debug data 0466 DebugErrorDetails const& details() const { return details_; } 0467 0468 private: 0469 DebugErrorDetails details_; 0470 }; 0471 0472 //---------------------------------------------------------------------------// 0473 /*! 0474 * Error thrown by working code from unexpected runtime conditions. 0475 */ 0476 class RuntimeError : public std::runtime_error 0477 { 0478 public: 0479 // Construct from details 0480 explicit RuntimeError(RuntimeErrorDetails&&); 0481 0482 //! Access detailed information 0483 RuntimeErrorDetails const& details() const { return details_; } 0484 0485 //!@{ 0486 //! String constants for "which" error message 0487 static char const validate_err_str[]; 0488 static char const not_config_err_str[]; 0489 static char const not_impl_err_str[]; 0490 //!@} 0491 0492 private: 0493 RuntimeErrorDetails details_; 0494 }; 0495 0496 //---------------------------------------------------------------------------// 0497 /*! 0498 * Base class for writing arbitrary exception context to JSON. 0499 * 0500 * This can be overridden in higher-level parts of the code for specific needs 0501 * (e.g., writing thread, event, and track contexts in Celeritas solver 0502 * kernels). Note that in order for derived classes to work with 0503 * `std::throw_with_nested`, they *MUST NOT* be `final`. 0504 */ 0505 class RichContextException : public std::exception 0506 { 0507 public: 0508 //! Write output to the given JSON object 0509 virtual void output(JsonPimpl*) const = 0; 0510 0511 //! Provide the name for this exception class 0512 virtual char const* type() const = 0; 0513 }; 0514 0515 //---------------------------------------------------------------------------// 0516 // INLINE FUNCTION DEFINITIONS 0517 //---------------------------------------------------------------------------// 0518 0519 #if defined(__CUDA_ARCH__) && defined(NDEBUG) 0520 //! Host+device definition for CUDA when \c assert is unavailable 0521 inline __attribute__((noinline)) __host__ __device__ void device_debug_error( 0522 DebugErrorType, char const* condition, char const* file, int line) 0523 { 0524 printf("%s:%u:\nceleritas: internal assertion failed: %s\n", 0525 file, 0526 line, 0527 condition); 0528 __trap(); 0529 } 0530 #elif defined(__HIP__) 0531 //! Host-only HIP call (whether or not NDEBUG is in use) 0532 inline __host__ void device_debug_error(DebugErrorType which, 0533 char const* condition, 0534 char const* file, 0535 int line) 0536 { 0537 throw DebugError({which, condition, file, line}); 0538 } 0539 0540 //! Device-only call for HIP (must always be declared; only used if 0541 //! NDEBUG) 0542 inline __attribute__((noinline)) __device__ void device_debug_error( 0543 DebugErrorType, char const* condition, char const* file, int line) 0544 { 0545 printf("%s:%u:\nceleritas: internal assertion failed: %s\n", 0546 file, 0547 line, 0548 condition); 0549 abort(); 0550 } 0551 #endif 0552 0553 //---------------------------------------------------------------------------// 0554 } // namespace celeritas
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |