corecel/math/Algorithms.hh

0001 //----------------------------------*-C++-*----------------------------------//
0002 // Copyright 2020-2024 UT-Battelle, LLC, and other Celeritas developers.
0003 // See the top-level COPYRIGHT file for details.
0004 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0005 //---------------------------------------------------------------------------//
0006 //! \file corecel/math/Algorithms.hh
0007 //---------------------------------------------------------------------------//
0008 #pragma once
0009
0010 #include <cmath>
0011 #include <type_traits>
0012
0013 #include "corecel/Config.hh"
0014
0015 #include "corecel/Assert.hh"
0016 #include "corecel/Macros.hh"
0017
0018 #include "detail/AlgorithmsImpl.hh"
0019
0020 #if !defined(CELER_DEVICE_SOURCE) && !defined(CELERITAS_SINCOSPI_PREFIX)
0021 #    include "detail/Sincospi.hh"
0022 #endif
0023
0024 namespace celeritas
0025 {
0026 //---------------------------------------------------------------------------//
0027 // Replace/extend <utility>
0028 //---------------------------------------------------------------------------//
0029 //! Implement perfect forwarding with device-friendly functions.
0030 template<class T>
0031 CELER_CONSTEXPR_FUNCTION T&&
0032 forward(typename std::remove_reference<T>::type& v) noexcept
0033 {
0034     return static_cast<T&&>(v);
0035 }
0036
0037 //! \cond (CELERITAS_DOC_DEV)
0038 template<class T>
0039 CELER_CONSTEXPR_FUNCTION T&&
0040 forward(typename std::remove_reference<T>::type&& v) noexcept
0041 {
0042     return static_cast<T&&>(v);
0043 }
0044 //! \endcond
0045
0046 //---------------------------------------------------------------------------//
0047 /*!
0048  * Cast a value as an rvalue reference to allow move construction.
0049  */
0050 template<class T>
0051 CELER_CONSTEXPR_FUNCTION auto move(T&& v) noexcept ->
0052     typename std::remove_reference<T>::type&&
0053 {
0054     return static_cast<typename std::remove_reference<T>::type&&>(v);
0055 }
0056
0057 //---------------------------------------------------------------------------//
0058 /*!
0059  * Support swapping of trivial types.
0060  */
0061 template<class T>
0062 CELER_FORCEINLINE_FUNCTION void trivial_swap(T& a, T& b) noexcept
0063 {
0064     static_assert(std::is_trivially_move_constructible<T>::value,
0065                   "Value is not trivially copyable");
0066     static_assert(std::is_trivially_move_assignable<T>::value,
0067                   "Value is not trivially movable");
0068     static_assert(std::is_trivially_destructible<T>::value,
0069                   "Value is not trivially destructible");
0070     T temp{::celeritas::move(a)};
0071     a = ::celeritas::move(b);
0072     b = ::celeritas::move(temp);
0073 }
0074
0075 //---------------------------------------------------------------------------//
0076 /*!
0077  * Exchange values on host or device.
0078  */
0079 template<class T, class U = T>
0080 CELER_FORCEINLINE_FUNCTION T exchange(T& dst, U&& src)
0081 {
0082     T orig = std::move(dst);
0083     dst = std::forward<U>(src);
0084     return orig;
0085 }
0086
0087 //---------------------------------------------------------------------------//
0088 // Replace/extend <functional>
0089 //---------------------------------------------------------------------------//
0090 /*!
0091  * Evaluator for the first argument being less than the second.
0092  */
0093 template<class T = void>
0094 struct Less
0095 {
0096     CELER_CONSTEXPR_FUNCTION auto
0097     operator()(T const& lhs, T const& rhs) const noexcept -> decltype(auto)
0098     {
0099         return lhs < rhs;
0100     }
0101 };
0102
0103 //! Specialization of less with template deduction
0104 template<>
0105 struct Less<void>
0106 {
0107     template<class T, class U>
0108     CELER_CONSTEXPR_FUNCTION auto
0109     operator()(T&& lhs, U&& rhs) const -> decltype(auto)
0110     {
0111         return ::celeritas::forward<T>(lhs) < ::celeritas::forward<U>(rhs);
0112     }
0113 };
0114
0115 //---------------------------------------------------------------------------//
0116 // Replace/extend <algorithm>
0117 //---------------------------------------------------------------------------//
0118 /*!
0119  * Whether the predicate is true for all items.
0120  */
0121 template<class InputIt, class Predicate>
0122 inline CELER_FUNCTION bool all_of(InputIt iter, InputIt last, Predicate p)
0123 {
0124     for (; iter != last; ++iter)
0125     {
0126         if (!p(*iter))
0127             return false;
0128     }
0129     return true;
0130 }
0131
0132 //---------------------------------------------------------------------------//
0133 /*!
0134  * Whether the predicate is true for any item.
0135  */
0136 template<class InputIt, class Predicate>
0137 inline CELER_FUNCTION bool any_of(InputIt iter, InputIt last, Predicate p)
0138 {
0139     for (; iter != last; ++iter)
0140     {
0141         if (p(*iter))
0142             return true;
0143     }
0144     return false;
0145 }
0146
0147 //---------------------------------------------------------------------------//
0148 /*!
0149  * Whether the predicate is true for pairs of consecutive items.
0150  */
0151 template<class InputIt, class Predicate>
0152 inline CELER_FUNCTION bool all_adjacent(InputIt iter, InputIt last, Predicate p)
0153 {
0154     if (iter == last)
0155         return true;
0156
0157     auto prev = *iter++;
0158     while (iter != last)
0159     {
0160         if (!p(prev, *iter))
0161             return false;
0162         prev = *iter++;
0163     }
0164     return true;
0165 }
0166
0167 //---------------------------------------------------------------------------//
0168 /*!
0169  * Clamp the value between lo and hi values.
0170  *
0171  * If the value is between lo and hi, return the value. Otherwise, return lo if
0172  * it's below it, or hi above it.
0173  *
0174  * This replaces:
0175  * \code
0176    min(hi, max(lo, v))
0177  * \endcode
0178  * or
0179  * \code
0180    max(v, min(v, lo))
0181  * \endcode
0182  * assuming that the relationship between \c lo and \c hi holds.
0183  */
0184 template<class T>
0185 inline CELER_FUNCTION T const& clamp(T const& v, T const& lo, T const& hi)
0186 {
0187     CELER_EXPECT(!(hi < lo));
0188     return v < lo ? lo : hi < v ? hi : v;
0189 }
0190
0191 //---------------------------------------------------------------------------//
0192 /*!
0193  * Return the value or (if it's negative) then zero.
0194  *
0195  * This is constructed to correctly propagate \c NaN.
0196  */
0197 template<class T>
0198 CELER_CONSTEXPR_FUNCTION T clamp_to_nonneg(T v) noexcept
0199 {
0200     return (v < 0) ? 0 : v;
0201 }
0202
0203 //---------------------------------------------------------------------------//
0204 /*!
0205  * Find the insertion point for a value in a sorted list using a binary search.
0206  */
0207 template<class ForwardIt, class T, class Compare>
0208 CELER_FORCEINLINE_FUNCTION ForwardIt
0209 lower_bound(ForwardIt first, ForwardIt last, T const& value, Compare comp)
0210 {
0211     using CompareRef = std::add_lvalue_reference_t<Compare>;
0212     return ::celeritas::detail::lower_bound_impl<CompareRef>(
0213         first, last, value, comp);
0214 }
0215
0216 //! \cond (CELERITAS_DOC_DEV)
0217 //---------------------------------------------------------------------------//
0218 /*!
0219  * Find the insertion point for a value in a sorted list using a binary search.
0220  */
0221 template<class ForwardIt, class T>
0222 CELER_FORCEINLINE_FUNCTION ForwardIt lower_bound(ForwardIt first,
0223                                                  ForwardIt last,
0224                                                  T const& value)
0225 {
0226     return ::celeritas::lower_bound(first, last, value, Less<>{});
0227 }
0228 //! \endcond
0229
0230 //---------------------------------------------------------------------------//
0231 /*!
0232  * Find the insertion point for a value in a sorted list using a linear search.
0233  */
0234 template<class ForwardIt, class T, class Compare>
0235 CELER_FORCEINLINE_FUNCTION ForwardIt lower_bound_linear(ForwardIt first,
0236                                                         ForwardIt last,
0237                                                         T const& value,
0238                                                         Compare comp)
0239 {
0240     using CompareRef = std::add_lvalue_reference_t<Compare>;
0241     return ::celeritas::detail::lower_bound_linear_impl<CompareRef>(
0242         first, last, value, comp);
0243 }
0244
0245 //---------------------------------------------------------------------------//
0246 /*!
0247  * Find the insertion point for a value in a sorted list using a linear search.
0248  */
0249 template<class ForwardIt, class T>
0250 CELER_FORCEINLINE_FUNCTION ForwardIt lower_bound_linear(ForwardIt first,
0251                                                         ForwardIt last,
0252                                                         T const& value)
0253 {
0254     return ::celeritas::lower_bound_linear(first, last, value, Less<>{});
0255 }
0256
0257 //---------------------------------------------------------------------------//
0258 /*!
0259  * Find the first element which is greater than <value>
0260  */
0261 template<class ForwardIt, class T, class Compare>
0262 CELER_FORCEINLINE_FUNCTION ForwardIt
0263 upper_bound(ForwardIt first, ForwardIt last, T const& value, Compare comp)
0264 {
0265     using CompareRef = std::add_lvalue_reference_t<Compare>;
0266     return ::celeritas::detail::upper_bound_impl<CompareRef>(
0267         first, last, value, comp);
0268 }
0269
0270 //! \cond (CELERITAS_DOC_DEV)
0271 //---------------------------------------------------------------------------//
0272 /*!
0273  * Find the first element which is greater than <value>
0274  */
0275 template<class ForwardIt, class T>
0276 CELER_FORCEINLINE_FUNCTION ForwardIt upper_bound(ForwardIt first,
0277                                                  ForwardIt last,
0278                                                  T const& value)
0279 {
0280     return ::celeritas::upper_bound(first, last, value, Less<>{});
0281 }
0282 //!\endcond
0283
0284 //---------------------------------------------------------------------------//
0285 /*!
0286  * Find the given element in a sorted range.
0287  */
0288 template<class ForwardIt, class T, class Compare>
0289 inline CELER_FUNCTION ForwardIt
0290 find_sorted(ForwardIt first, ForwardIt last, T const& value, Compare comp)
0291 {
0292     auto iter = ::celeritas::lower_bound(first, last, value, comp);
0293     if (iter == last || comp(*iter, value) || comp(value, *iter))
0294     {
0295         // Insertion point is off the end, or value is not equal
0296         return last;
0297     }
0298     return iter;
0299 }
0300
0301 //!\cond (CELERITAS_DOC_DEV)
0302 //---------------------------------------------------------------------------//
0303 /*!
0304  * Find the given element in a sorted range.
0305  */
0306 template<class ForwardIt, class T>
0307 CELER_FORCEINLINE_FUNCTION ForwardIt find_sorted(ForwardIt first,
0308                                                  ForwardIt last,
0309                                                  T const& value)
0310 {
0311     return ::celeritas::find_sorted(first, last, value, Less<>{});
0312 }
0313 //!\endcond
0314
0315 //---------------------------------------------------------------------------//
0316 /*!
0317  * Partition elements in the given range, "true" before "false".
0318  *
0319  * This is done by swapping elements until the range is partitioned.
0320  */
0321 template<class ForwardIt, class Predicate>
0322 CELER_FORCEINLINE_FUNCTION ForwardIt partition(ForwardIt first,
0323                                                ForwardIt last,
0324                                                Predicate pred)
0325 {
0326     using PredicateRef = std::add_lvalue_reference_t<Predicate>;
0327     return ::celeritas::detail::partition_impl<PredicateRef>(first, last, pred);
0328 }
0329
0330 //---------------------------------------------------------------------------//
0331 /*!
0332  * Sort an array on a single thread.
0333  *
0334  * This implementation is not thread-safe nor cooperative, but it can be called
0335  * from CUDA code.
0336  */
0337 template<class RandomAccessIt, class Compare>
0338 CELER_FORCEINLINE_FUNCTION void
0339 sort(RandomAccessIt first, RandomAccessIt last, Compare comp)
0340 {
0341     using CompareRef = std::add_lvalue_reference_t<Compare>;
0342     return ::celeritas::detail::heapsort_impl<CompareRef>(first, last, comp);
0343 }
0344
0345 //! \cond (CELERITAS_DOC_DEV)
0346 //---------------------------------------------------------------------------//
0347 /*!
0348  * Sort an array on a single thread.
0349  */
0350 template<class RandomAccessIt>
0351 CELER_FORCEINLINE_FUNCTION void sort(RandomAccessIt first, RandomAccessIt last)
0352 {
0353     ::celeritas::sort(first, last, Less<>{});
0354 }
0355 //! \endcond
0356
0357 //---------------------------------------------------------------------------//
0358 /*!
0359  * Return the higher of two values.
0360  *
0361  * This function is specialized when building CUDA device code, which has
0362  * special intrinsics for max.
0363  */
0364 template<class T, std::enable_if_t<!std::is_floating_point<T>::value, bool> = true>
0365 CELER_CONSTEXPR_FUNCTION T const& max(T const& a, T const& b) noexcept
0366 {
0367     return (b > a) ? b : a;
0368 }
0369
0370 //!\cond (CELERITAS_DOC_DEV)
0371 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0372 CELER_CONSTEXPR_FUNCTION T max(T a, T b) noexcept
0373 {
0374     return std::fmax(a, b);
0375 }
0376 //!\endcond
0377
0378 //---------------------------------------------------------------------------//
0379 /*!
0380  * Return the lower of two values.
0381  *
0382  * This function is specialized when building CUDA device code, which has
0383  * special intrinsics for min.
0384  */
0385 template<class T, std::enable_if_t<!std::is_floating_point<T>::value, bool> = true>
0386 CELER_CONSTEXPR_FUNCTION T const& min(T const& a, T const& b) noexcept
0387 {
0388     return (b < a) ? b : a;
0389 }
0390
0391 //!\cond (CELERITAS_DOC_DEV)
0392 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0393 CELER_CONSTEXPR_FUNCTION T min(T a, T b) noexcept
0394 {
0395     return std::fmin(a, b);
0396 }
0397 //!\endcond
0398
0399 //---------------------------------------------------------------------------//
0400 /*!
0401  * Return an iterator to the lowest value in the range as defined by Compare.
0402  */
0403 template<class ForwardIt, class Compare>
0404 inline CELER_FUNCTION ForwardIt min_element(ForwardIt iter,
0405                                             ForwardIt last,
0406                                             Compare comp)
0407 {
0408     // Avoid incrementing past the end
0409     if (iter == last)
0410         return last;
0411
0412     ForwardIt result = iter++;
0413     for (; iter != last; ++iter)
0414     {
0415         if (comp(*iter, *result))
0416             result = iter;
0417     }
0418     return result;
0419 }
0420
0421 //!\cond (CELERITAS_DOC_DEV)
0422 //---------------------------------------------------------------------------//
0423 /*!
0424  * Return an iterator to the lowest value in the range.
0425  */
0426 template<class ForwardIt>
0427 CELER_FORCEINLINE_FUNCTION ForwardIt min_element(ForwardIt first,
0428                                                  ForwardIt last)
0429 {
0430     return ::celeritas::min_element(first, last, Less<decltype(*first)>{});
0431 }
0432 //!\endcond
0433
0434 //---------------------------------------------------------------------------//
0435 // Replace/extend <cmath>
0436 //---------------------------------------------------------------------------//
0437 /*!
0438  * Return an integer power of the input value.
0439  *
0440  * Example: \code
0441   assert(9.0 == ipow<2>(3.0));
0442   assert(256 == ipow<8>(2));
0443   static_assert(256 == ipow<8>(2));
0444  \endcode
0445  */
0446 template<unsigned int N, class T>
0447 CELER_CONSTEXPR_FUNCTION T ipow(T v) noexcept
0448 {
0449     if constexpr (N == 0)
0450     {
0451         CELER_DISCARD(v)  // Suppress warning in older compilers
0452         return 1;
0453     }
0454     else if constexpr (N % 2 == 0)
0455     {
0456         return ipow<N / 2>(v) * ipow<N / 2>(v);
0457     }
0458     else
0459     {
0460         return v * ipow<(N - 1) / 2>(v) * ipow<(N - 1) / 2>(v);
0461     }
0462 #if (__CUDACC_VER_MAJOR__ < 11) \
0463     || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ < 5)
0464     // "error: missing return statement at end of non-void function"
0465     return T{};
0466 #endif
0467 }
0468
0469 //---------------------------------------------------------------------------//
0470 /*!
0471  * Raise a number to a power with simplifying assumptions.
0472  *
0473  * This should be faster than `std::pow` because we don't worry about
0474  * exceptions for zeros, infinities, or negative values for a.
0475  *
0476  * Example: \code
0477   assert(9.0 == fastpow(3.0, 2.0));
0478  \endcode
0479  */
0480 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0481 inline CELER_FUNCTION T fastpow(T a, T b)
0482 {
0483     CELER_EXPECT(a > 0 || (a == 0 && b != 0));
0484     return std::exp(b * std::log(a));
0485 }
0486
0487 //---------------------------------------------------------------------------//
0488 /*!
0489  * Use fused multiply-add for generic calculations.
0490  *
0491  * This provides a floating point specialization so that \c fma can be used in
0492  * code that is accelerated for floating point calculations but still works
0493  * correctly with integer arithmetic.
0494  *
0495  * Because of the single template parameter, it may be easier to use \c
0496  * std::fma directly in most cases.
0497  */
0498 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0499 CELER_FORCEINLINE_FUNCTION T fma(T a, T b, T y)
0500 {
0501     return std::fma(a, b, y);
0502 }
0503
0504 //---------------------------------------------------------------------------//
0505 /*!
0506  * Provide an FMA-like interface for integers.
0507  */
0508 template<class T, std::enable_if_t<!std::is_floating_point<T>::value, bool> = true>
0509 CELER_CONSTEXPR_FUNCTION T fma(T a, T b, T y)
0510 {
0511     return a * b + y;
0512 }
0513
0514 //---------------------------------------------------------------------------//
0515 /*!
0516  * Integer division, rounding up, for positive numbers.
0517  */
0518 template<class T>
0519 CELER_CONSTEXPR_FUNCTION T ceil_div(T top, T bottom)
0520 {
0521     static_assert(std::is_unsigned<T>::value, "Value is not an unsigned int");
0522     return (top / bottom) + (top % bottom != 0);
0523 }
0524
0525 //---------------------------------------------------------------------------//
0526 /*!
0527  * Calculate local work for a given worker ID.
0528  *
0529  * This calculates the amount of local work for the given worker ID when
0530  * dividing \c total_work tasks over \c num_workers workers.
0531  */
0532 template<class T>
0533 struct LocalWorkCalculator
0534 {
0535     static_assert(std::is_unsigned<T>::value, "Value is not an unsigned int");
0536
0537     T total_work;
0538     T num_workers;
0539
0540     CELER_CONSTEXPR_FUNCTION T operator()(T local_id)
0541     {
0542         CELER_EXPECT(local_id < num_workers);
0543         return total_work / num_workers + (local_id < total_work % num_workers);
0544     }
0545 };
0546
0547 //---------------------------------------------------------------------------//
0548 /*!
0549  * Negation that won't return signed zeros.
0550  */
0551 template<class T>
0552 [[nodiscard]] CELER_CONSTEXPR_FUNCTION T negate(T value)
0553 {
0554     return T{0} - value;
0555 }
0556
0557 //---------------------------------------------------------------------------//
0558 /*!
0559  * Calculate the difference of squares \f$ a^2 - b^2 \f$.
0560  *
0561  * This calculation exchanges one multiplication for one addition, but it does
0562  * not increase the accuracy of the computed result. It is used
0563  * occasionally in Geant4 but is likely a premature optimization... see
0564  * https://github.com/celeritas-project/celeritas/pull/1082
0565  */
0566 template<class T>
0567 CELER_CONSTEXPR_FUNCTION T diffsq(T a, T b)
0568 {
0569     return (a - b) * (a + b);
0570 }
0571
0572 //---------------------------------------------------------------------------//
0573 /*!
0574  * Calculate the Euclidian modulus of two numbers.
0575  *
0576  * If both numbers are positive, this should be the same as fmod. If the
0577  * sign of the remainder and denominator don't match, the remainder will be
0578  * remapped so that it is between zero and the denominator.
0579  *
0580  * This function is useful for normalizing user-provided angles.
0581  */
0582 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0583 CELER_CONSTEXPR_FUNCTION T eumod(T numer, T denom)
0584 {
0585     T r = std::fmod(numer, denom);
0586     if (r < 0)
0587     {
0588         if (denom >= 0)
0589         {
0590             r += denom;
0591         }
0592         else
0593         {
0594             r -= denom;
0595         }
0596     }
0597     return r;
0598 }
0599
0600 //---------------------------------------------------------------------------//
0601 /*!
0602  * Calculate the sign of a number.
0603  *
0604  * \return -1 if negative, 0 if exactly zero (or NaN), 1 if positive
0605  */
0606 template<class T>
0607 CELER_CONSTEXPR_FUNCTION int signum(T x)
0608 {
0609     return (0 < x) - (x < 0);
0610 }
0611
0612 //---------------------------------------------------------------------------//
0613 /*!
0614  * Double-precision math constant (POSIX derivative).
0615  *
0616  * These should be used in *host* or *type-dependent* circumstances because, if
0617  * using \c CELERITAS_REAL_TYPE=float, this could have more accuracy than
0618  * \c celeritas::constants::pi .
0619  */
0620 inline constexpr double m_pi{3.14159265358979323846};
0621
0622 //---------------------------------------------------------------------------//
0623 //!@{
0624 //! \name CUDA/HIP equivalent routines
0625
0626 /*!
0627  * Calculate an inverse square root.
0628  */
0629 CELER_FORCEINLINE_FUNCTION float rsqrt(float value)
0630 {
0631 #ifdef __CUDACC__
0632     return ::rsqrtf(value);
0633 #else
0634     // NOTE: some C++ library implementations (GCC?) don't define
0635     // single-precision functions in std namespace
0636     return 1.0f / sqrtf(value);
0637 #endif
0638 }
0639
0640 /*!
0641  * Calculate an inverse square root.
0642  */
0643 CELER_FORCEINLINE_FUNCTION double rsqrt(double value)
0644 {
0645 #ifdef __CUDACC__
0646     return ::rsqrt(value);
0647 #else
0648     return 1.0 / std::sqrt(value);
0649 #endif
0650 }
0651
0652 #if CELER_DEVICE_SOURCE
0653 // CUDA/HIP define sinpi, cospi, sinpif, cospif, ...
0654 #    undef CELERITAS_SINCOSPI_PREFIX
0655 #    define CELERITAS_SINCOSPI_PREFIX
0656 #endif
0657
0658 #ifdef CELERITAS_SINCOSPI_PREFIX
0659 // Apple-supplied headers define __sinpi, __sinpif, __sincospi, ...
0660 #    define CELER_CONCAT_IMPL(PREFIX, FUNC) PREFIX##FUNC
0661 #    define CELER_CONCAT(PREFIX, FUNC) CELER_CONCAT_IMPL(PREFIX, FUNC)
0662 #    define CELER_SINCOS_MANGLED(FUNC) \
0663         CELER_CONCAT(CELERITAS_SINCOSPI_PREFIX, FUNC)
0664 #else
0665 // Use implementations from detail/MathImpl.hh
0666 #    define CELERITAS_SINCOSPI_PREFIX ::celeritas::detail::
0667 #    define CELER_SINCOS_MANGLED(FUNC) ::celeritas::detail::FUNC
0668 #endif
0669
0670 //!@{
0671 //! Get the sine or cosine of a value multiplied by pi for increased precision
0672 CELER_FORCEINLINE_FUNCTION float sinpi(float a)
0673 {
0674     return CELER_SINCOS_MANGLED(sinpif)(a);
0675 }
0676 CELER_FORCEINLINE_FUNCTION double sinpi(double a)
0677 {
0678     return CELER_SINCOS_MANGLED(sinpi)(a);
0679 }
0680 CELER_FORCEINLINE_FUNCTION float cospi(float a)
0681 {
0682     return CELER_SINCOS_MANGLED(cospif)(a);
0683 }
0684 CELER_FORCEINLINE_FUNCTION double cospi(double a)
0685 {
0686     return CELER_SINCOS_MANGLED(cospi)(a);
0687 }
0688 //!@}
0689
0690 //!@{
0691 //! Simultaneously evaluate the sine and cosine of a value
0692 CELER_FORCEINLINE_FUNCTION void sincos(float a, float* s, float* c)
0693 {
0694     return CELER_SINCOS_MANGLED(sincosf)(a, s, c);
0695 }
0696 CELER_FORCEINLINE_FUNCTION void sincos(double a, double* s, double* c)
0697 {
0698     return CELER_SINCOS_MANGLED(sincos)(a, s, c);
0699 }
0700 CELER_FORCEINLINE_FUNCTION void sincospi(float a, float* s, float* c)
0701 {
0702     return CELER_SINCOS_MANGLED(sincospif)(a, s, c);
0703 }
0704 CELER_FORCEINLINE_FUNCTION void sincospi(double a, double* s, double* c)
0705 {
0706     return CELER_SINCOS_MANGLED(sincospi)(a, s, c);
0707 }
0708 //!@}
0709 //!@}
0710
0711 //---------------------------------------------------------------------------//
0712 }  // namespace celeritas