File indexing completed on 2025-01-18 09:54:49
0001
0002
0003
0004
0005
0006
0007
0008 #pragma once
0009
0010 #include <cmath>
0011 #include <type_traits>
0012
0013 #include "corecel/Config.hh"
0014
0015 #include "corecel/Assert.hh"
0016 #include "corecel/Macros.hh"
0017
0018 #include "detail/AlgorithmsImpl.hh"
0019
0020 #if !defined(CELER_DEVICE_SOURCE) && !defined(CELERITAS_SINCOSPI_PREFIX)
0021 # include "detail/Sincospi.hh"
0022 #endif
0023
0024 namespace celeritas
0025 {
0026
0027
0028
0029
0030 template<class T>
0031 CELER_CONSTEXPR_FUNCTION T&&
0032 forward(typename std::remove_reference<T>::type& v) noexcept
0033 {
0034 return static_cast<T&&>(v);
0035 }
0036
0037
0038 template<class T>
0039 CELER_CONSTEXPR_FUNCTION T&&
0040 forward(typename std::remove_reference<T>::type&& v) noexcept
0041 {
0042 return static_cast<T&&>(v);
0043 }
0044
0045
0046
0047
0048
0049
0050 template<class T>
0051 CELER_CONSTEXPR_FUNCTION auto move(T&& v) noexcept ->
0052 typename std::remove_reference<T>::type&&
0053 {
0054 return static_cast<typename std::remove_reference<T>::type&&>(v);
0055 }
0056
0057
0058
0059
0060
0061 template<class T>
0062 CELER_FORCEINLINE_FUNCTION void trivial_swap(T& a, T& b) noexcept
0063 {
0064 static_assert(std::is_trivially_move_constructible<T>::value,
0065 "Value is not trivially copyable");
0066 static_assert(std::is_trivially_move_assignable<T>::value,
0067 "Value is not trivially movable");
0068 static_assert(std::is_trivially_destructible<T>::value,
0069 "Value is not trivially destructible");
0070 T temp{::celeritas::move(a)};
0071 a = ::celeritas::move(b);
0072 b = ::celeritas::move(temp);
0073 }
0074
0075
0076
0077
0078
0079 template<class T, class U = T>
0080 CELER_FORCEINLINE_FUNCTION T exchange(T& dst, U&& src)
0081 {
0082 T orig = std::move(dst);
0083 dst = std::forward<U>(src);
0084 return orig;
0085 }
0086
0087
0088
0089
0090
0091
0092
0093 template<class T = void>
0094 struct Less
0095 {
0096 CELER_CONSTEXPR_FUNCTION auto
0097 operator()(T const& lhs, T const& rhs) const noexcept -> decltype(auto)
0098 {
0099 return lhs < rhs;
0100 }
0101 };
0102
0103
0104 template<>
0105 struct Less<void>
0106 {
0107 template<class T, class U>
0108 CELER_CONSTEXPR_FUNCTION auto
0109 operator()(T&& lhs, U&& rhs) const -> decltype(auto)
0110 {
0111 return ::celeritas::forward<T>(lhs) < ::celeritas::forward<U>(rhs);
0112 }
0113 };
0114
0115
0116
0117
0118
0119
0120
0121 template<class InputIt, class Predicate>
0122 inline CELER_FUNCTION bool all_of(InputIt iter, InputIt last, Predicate p)
0123 {
0124 for (; iter != last; ++iter)
0125 {
0126 if (!p(*iter))
0127 return false;
0128 }
0129 return true;
0130 }
0131
0132
0133
0134
0135
0136 template<class InputIt, class Predicate>
0137 inline CELER_FUNCTION bool any_of(InputIt iter, InputIt last, Predicate p)
0138 {
0139 for (; iter != last; ++iter)
0140 {
0141 if (p(*iter))
0142 return true;
0143 }
0144 return false;
0145 }
0146
0147
0148
0149
0150
0151 template<class InputIt, class Predicate>
0152 inline CELER_FUNCTION bool all_adjacent(InputIt iter, InputIt last, Predicate p)
0153 {
0154 if (iter == last)
0155 return true;
0156
0157 auto prev = *iter++;
0158 while (iter != last)
0159 {
0160 if (!p(prev, *iter))
0161 return false;
0162 prev = *iter++;
0163 }
0164 return true;
0165 }
0166
0167
0168
0169
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184 template<class T>
0185 inline CELER_FUNCTION T const& clamp(T const& v, T const& lo, T const& hi)
0186 {
0187 CELER_EXPECT(!(hi < lo));
0188 return v < lo ? lo : hi < v ? hi : v;
0189 }
0190
0191
0192
0193
0194
0195
0196
0197 template<class T>
0198 CELER_CONSTEXPR_FUNCTION T clamp_to_nonneg(T v) noexcept
0199 {
0200 return (v < 0) ? 0 : v;
0201 }
0202
0203
0204
0205
0206
0207 template<class ForwardIt, class T, class Compare>
0208 CELER_FORCEINLINE_FUNCTION ForwardIt
0209 lower_bound(ForwardIt first, ForwardIt last, T const& value, Compare comp)
0210 {
0211 using CompareRef = std::add_lvalue_reference_t<Compare>;
0212 return ::celeritas::detail::lower_bound_impl<CompareRef>(
0213 first, last, value, comp);
0214 }
0215
0216
0217
0218
0219
0220
0221 template<class ForwardIt, class T>
0222 CELER_FORCEINLINE_FUNCTION ForwardIt lower_bound(ForwardIt first,
0223 ForwardIt last,
0224 T const& value)
0225 {
0226 return ::celeritas::lower_bound(first, last, value, Less<>{});
0227 }
0228
0229
0230
0231
0232
0233
0234 template<class ForwardIt, class T, class Compare>
0235 CELER_FORCEINLINE_FUNCTION ForwardIt lower_bound_linear(ForwardIt first,
0236 ForwardIt last,
0237 T const& value,
0238 Compare comp)
0239 {
0240 using CompareRef = std::add_lvalue_reference_t<Compare>;
0241 return ::celeritas::detail::lower_bound_linear_impl<CompareRef>(
0242 first, last, value, comp);
0243 }
0244
0245
0246
0247
0248
0249 template<class ForwardIt, class T>
0250 CELER_FORCEINLINE_FUNCTION ForwardIt lower_bound_linear(ForwardIt first,
0251 ForwardIt last,
0252 T const& value)
0253 {
0254 return ::celeritas::lower_bound_linear(first, last, value, Less<>{});
0255 }
0256
0257
0258
0259
0260
0261 template<class ForwardIt, class T, class Compare>
0262 CELER_FORCEINLINE_FUNCTION ForwardIt
0263 upper_bound(ForwardIt first, ForwardIt last, T const& value, Compare comp)
0264 {
0265 using CompareRef = std::add_lvalue_reference_t<Compare>;
0266 return ::celeritas::detail::upper_bound_impl<CompareRef>(
0267 first, last, value, comp);
0268 }
0269
0270
0271
0272
0273
0274
0275 template<class ForwardIt, class T>
0276 CELER_FORCEINLINE_FUNCTION ForwardIt upper_bound(ForwardIt first,
0277 ForwardIt last,
0278 T const& value)
0279 {
0280 return ::celeritas::upper_bound(first, last, value, Less<>{});
0281 }
0282
0283
0284
0285
0286
0287
0288 template<class ForwardIt, class T, class Compare>
0289 inline CELER_FUNCTION ForwardIt
0290 find_sorted(ForwardIt first, ForwardIt last, T const& value, Compare comp)
0291 {
0292 auto iter = ::celeritas::lower_bound(first, last, value, comp);
0293 if (iter == last || comp(*iter, value) || comp(value, *iter))
0294 {
0295
0296 return last;
0297 }
0298 return iter;
0299 }
0300
0301
0302
0303
0304
0305
0306 template<class ForwardIt, class T>
0307 CELER_FORCEINLINE_FUNCTION ForwardIt find_sorted(ForwardIt first,
0308 ForwardIt last,
0309 T const& value)
0310 {
0311 return ::celeritas::find_sorted(first, last, value, Less<>{});
0312 }
0313
0314
0315
0316
0317
0318
0319
0320
0321 template<class ForwardIt, class Predicate>
0322 CELER_FORCEINLINE_FUNCTION ForwardIt partition(ForwardIt first,
0323 ForwardIt last,
0324 Predicate pred)
0325 {
0326 using PredicateRef = std::add_lvalue_reference_t<Predicate>;
0327 return ::celeritas::detail::partition_impl<PredicateRef>(first, last, pred);
0328 }
0329
0330
0331
0332
0333
0334
0335
0336
0337 template<class RandomAccessIt, class Compare>
0338 CELER_FORCEINLINE_FUNCTION void
0339 sort(RandomAccessIt first, RandomAccessIt last, Compare comp)
0340 {
0341 using CompareRef = std::add_lvalue_reference_t<Compare>;
0342 return ::celeritas::detail::heapsort_impl<CompareRef>(first, last, comp);
0343 }
0344
0345
0346
0347
0348
0349
0350 template<class RandomAccessIt>
0351 CELER_FORCEINLINE_FUNCTION void sort(RandomAccessIt first, RandomAccessIt last)
0352 {
0353 ::celeritas::sort(first, last, Less<>{});
0354 }
0355
0356
0357
0358
0359
0360
0361
0362
0363
0364 template<class T, std::enable_if_t<!std::is_floating_point<T>::value, bool> = true>
0365 CELER_CONSTEXPR_FUNCTION T const& max(T const& a, T const& b) noexcept
0366 {
0367 return (b > a) ? b : a;
0368 }
0369
0370
0371 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0372 CELER_CONSTEXPR_FUNCTION T max(T a, T b) noexcept
0373 {
0374 return std::fmax(a, b);
0375 }
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385 template<class T, std::enable_if_t<!std::is_floating_point<T>::value, bool> = true>
0386 CELER_CONSTEXPR_FUNCTION T const& min(T const& a, T const& b) noexcept
0387 {
0388 return (b < a) ? b : a;
0389 }
0390
0391
0392 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0393 CELER_CONSTEXPR_FUNCTION T min(T a, T b) noexcept
0394 {
0395 return std::fmin(a, b);
0396 }
0397
0398
0399
0400
0401
0402
0403 template<class ForwardIt, class Compare>
0404 inline CELER_FUNCTION ForwardIt min_element(ForwardIt iter,
0405 ForwardIt last,
0406 Compare comp)
0407 {
0408
0409 if (iter == last)
0410 return last;
0411
0412 ForwardIt result = iter++;
0413 for (; iter != last; ++iter)
0414 {
0415 if (comp(*iter, *result))
0416 result = iter;
0417 }
0418 return result;
0419 }
0420
0421
0422
0423
0424
0425
0426 template<class ForwardIt>
0427 CELER_FORCEINLINE_FUNCTION ForwardIt min_element(ForwardIt first,
0428 ForwardIt last)
0429 {
0430 return ::celeritas::min_element(first, last, Less<decltype(*first)>{});
0431 }
0432
0433
0434
0435
0436
0437
0438
0439
0440
0441
0442
0443
0444
0445
0446 template<unsigned int N, class T>
0447 CELER_CONSTEXPR_FUNCTION T ipow(T v) noexcept
0448 {
0449 if constexpr (N == 0)
0450 {
0451 CELER_DISCARD(v)
0452 return 1;
0453 }
0454 else if constexpr (N % 2 == 0)
0455 {
0456 return ipow<N / 2>(v) * ipow<N / 2>(v);
0457 }
0458 else
0459 {
0460 return v * ipow<(N - 1) / 2>(v) * ipow<(N - 1) / 2>(v);
0461 }
0462 #if (__CUDACC_VER_MAJOR__ < 11) \
0463 || (__CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ < 5)
0464
0465 return T{};
0466 #endif
0467 }
0468
0469
0470
0471
0472
0473
0474
0475
0476
0477
0478
0479
0480 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0481 inline CELER_FUNCTION T fastpow(T a, T b)
0482 {
0483 CELER_EXPECT(a > 0 || (a == 0 && b != 0));
0484 return std::exp(b * std::log(a));
0485 }
0486
0487
0488
0489
0490
0491
0492
0493
0494
0495
0496
0497
0498 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0499 CELER_FORCEINLINE_FUNCTION T fma(T a, T b, T y)
0500 {
0501 return std::fma(a, b, y);
0502 }
0503
0504
0505
0506
0507
0508 template<class T, std::enable_if_t<!std::is_floating_point<T>::value, bool> = true>
0509 CELER_CONSTEXPR_FUNCTION T fma(T a, T b, T y)
0510 {
0511 return a * b + y;
0512 }
0513
0514
0515
0516
0517
0518 template<class T>
0519 CELER_CONSTEXPR_FUNCTION T ceil_div(T top, T bottom)
0520 {
0521 static_assert(std::is_unsigned<T>::value, "Value is not an unsigned int");
0522 return (top / bottom) + (top % bottom != 0);
0523 }
0524
0525
0526
0527
0528
0529
0530
0531
0532 template<class T>
0533 struct LocalWorkCalculator
0534 {
0535 static_assert(std::is_unsigned<T>::value, "Value is not an unsigned int");
0536
0537 T total_work;
0538 T num_workers;
0539
0540 CELER_CONSTEXPR_FUNCTION T operator()(T local_id)
0541 {
0542 CELER_EXPECT(local_id < num_workers);
0543 return total_work / num_workers + (local_id < total_work % num_workers);
0544 }
0545 };
0546
0547
0548
0549
0550
0551 template<class T>
0552 [[nodiscard]] CELER_CONSTEXPR_FUNCTION T negate(T value)
0553 {
0554 return T{0} - value;
0555 }
0556
0557
0558
0559
0560
0561
0562
0563
0564
0565
0566 template<class T>
0567 CELER_CONSTEXPR_FUNCTION T diffsq(T a, T b)
0568 {
0569 return (a - b) * (a + b);
0570 }
0571
0572
0573
0574
0575
0576
0577
0578
0579
0580
0581
0582 template<class T, std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
0583 CELER_CONSTEXPR_FUNCTION T eumod(T numer, T denom)
0584 {
0585 T r = std::fmod(numer, denom);
0586 if (r < 0)
0587 {
0588 if (denom >= 0)
0589 {
0590 r += denom;
0591 }
0592 else
0593 {
0594 r -= denom;
0595 }
0596 }
0597 return r;
0598 }
0599
0600
0601
0602
0603
0604
0605
0606 template<class T>
0607 CELER_CONSTEXPR_FUNCTION int signum(T x)
0608 {
0609 return (0 < x) - (x < 0);
0610 }
0611
0612
0613
0614
0615
0616
0617
0618
0619
0620 inline constexpr double m_pi{3.14159265358979323846};
0621
0622
0623
0624
0625
0626
0627
0628
0629 CELER_FORCEINLINE_FUNCTION float rsqrt(float value)
0630 {
0631 #ifdef __CUDACC__
0632 return ::rsqrtf(value);
0633 #else
0634
0635
0636 return 1.0f / sqrtf(value);
0637 #endif
0638 }
0639
0640
0641
0642
0643 CELER_FORCEINLINE_FUNCTION double rsqrt(double value)
0644 {
0645 #ifdef __CUDACC__
0646 return ::rsqrt(value);
0647 #else
0648 return 1.0 / std::sqrt(value);
0649 #endif
0650 }
0651
0652 #if CELER_DEVICE_SOURCE
0653
0654 # undef CELERITAS_SINCOSPI_PREFIX
0655 # define CELERITAS_SINCOSPI_PREFIX
0656 #endif
0657
0658 #ifdef CELERITAS_SINCOSPI_PREFIX
0659
0660 # define CELER_CONCAT_IMPL(PREFIX, FUNC) PREFIX##FUNC
0661 # define CELER_CONCAT(PREFIX, FUNC) CELER_CONCAT_IMPL(PREFIX, FUNC)
0662 # define CELER_SINCOS_MANGLED(FUNC) \
0663 CELER_CONCAT(CELERITAS_SINCOSPI_PREFIX, FUNC)
0664 #else
0665
0666 # define CELERITAS_SINCOSPI_PREFIX ::celeritas::detail::
0667 # define CELER_SINCOS_MANGLED(FUNC) ::celeritas::detail::FUNC
0668 #endif
0669
0670
0671
0672 CELER_FORCEINLINE_FUNCTION float sinpi(float a)
0673 {
0674 return CELER_SINCOS_MANGLED(sinpif)(a);
0675 }
0676 CELER_FORCEINLINE_FUNCTION double sinpi(double a)
0677 {
0678 return CELER_SINCOS_MANGLED(sinpi)(a);
0679 }
0680 CELER_FORCEINLINE_FUNCTION float cospi(float a)
0681 {
0682 return CELER_SINCOS_MANGLED(cospif)(a);
0683 }
0684 CELER_FORCEINLINE_FUNCTION double cospi(double a)
0685 {
0686 return CELER_SINCOS_MANGLED(cospi)(a);
0687 }
0688
0689
0690
0691
0692 CELER_FORCEINLINE_FUNCTION void sincos(float a, float* s, float* c)
0693 {
0694 return CELER_SINCOS_MANGLED(sincosf)(a, s, c);
0695 }
0696 CELER_FORCEINLINE_FUNCTION void sincos(double a, double* s, double* c)
0697 {
0698 return CELER_SINCOS_MANGLED(sincos)(a, s, c);
0699 }
0700 CELER_FORCEINLINE_FUNCTION void sincospi(float a, float* s, float* c)
0701 {
0702 return CELER_SINCOS_MANGLED(sincospif)(a, s, c);
0703 }
0704 CELER_FORCEINLINE_FUNCTION void sincospi(double a, double* s, double* c)
0705 {
0706 return CELER_SINCOS_MANGLED(sincospi)(a, s, c);
0707 }
0708
0709
0710
0711
0712 }