Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:54:48

0001 //----------------------------------*-C++-*----------------------------------//
0002 // See https://llvm.org/LICENSE.txt for license information.
0003 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0004 //---------------------------------------------------------------------------//
0005 /*!
0006  * \file corecel/math/detail/AlgorithmsImpl.hh
0007  * \brief Algorithm implementations directly derived from LLVM libc++
0008  */
0009 //---------------------------------------------------------------------------//
0010 #pragma once
0011 
0012 #include <iterator>
0013 #include <type_traits>
0014 
0015 #include "corecel/Macros.hh"
0016 
0017 namespace celeritas
0018 {
0019 //---------------------------------------------------------------------------//
0020 // Forward declare cuda-compatible swap/move
0021 template<class T>
0022 CELER_FORCEINLINE_FUNCTION void trivial_swap(T&, T&) noexcept;
0023 
0024 namespace detail
0025 {
0026 //---------------------------------------------------------------------------//
0027 template<class RandomAccessIt>
0028 using difference_type_t =
0029     typename std::iterator_traits<RandomAccessIt>::difference_type;
0030 
0031 //---------------------------------------------------------------------------//
0032 // LOWER and UPPER BOUNDS
0033 //---------------------------------------------------------------------------//
0034 //!@{
0035 /*!
0036  * Perform division-by-two quickly for positive integers.
0037  *
0038  * See llvm.org/PR39129.
0039  */
0040 template<typename Integral>
0041 CELER_CONSTEXPR_FUNCTION
0042     typename std::enable_if<std::is_integral<Integral>::value, Integral>::type
0043     half_positive(Integral value)
0044 {
0045     return static_cast<Integral>(
0046         static_cast<typename std::make_unsigned<Integral>::type>(value) / 2);
0047 }
0048 
0049 template<typename T>
0050 CELER_CONSTEXPR_FUNCTION
0051     typename std::enable_if<!std::is_integral<T>::value, T>::type
0052     half_positive(T value)
0053 {
0054     return value / 2;
0055 }
0056 //!@}
0057 
0058 //---------------------------------------------------------------------------//
0059 /*!
0060  * Implementation of binary search lower-bound assuming iterator arithmetic.
0061  */
0062 template<class Compare, class ForwardIterator, class T>
0063 CELER_FUNCTION ForwardIterator lower_bound_impl(ForwardIterator first,
0064                                                 ForwardIterator last,
0065                                                 T const& value_,
0066                                                 Compare comp)
0067 {
0068     using difference_type = difference_type_t<ForwardIterator>;
0069 
0070     difference_type len = last - first;
0071     while (len != 0)
0072     {
0073         difference_type half_len = ::celeritas::detail::half_positive(len);
0074         ForwardIterator m = first + half_len;
0075         if (comp(*m, value_))
0076         {
0077             first = ++m;
0078             len -= half_len + 1;
0079         }
0080         else
0081             len = half_len;
0082     }
0083     return first;
0084 }
0085 
0086 //---------------------------------------------------------------------------//
0087 /*!
0088  * Implementation of linear search lower-bound assuming iterator arithmetic.
0089  */
0090 template<class Compare, class ForwardIterator, class T>
0091 CELER_FUNCTION ForwardIterator lower_bound_linear_impl(ForwardIterator first,
0092                                                        ForwardIterator last,
0093                                                        T const& value_,
0094                                                        Compare comp)
0095 {
0096     for (ForwardIterator it = first; it != last; ++it)
0097     {
0098         if (!comp(*it, value_))
0099         {
0100             return it;
0101         }
0102     }
0103 
0104     return last;
0105 }
0106 
0107 //---------------------------------------------------------------------------//
0108 /*!
0109  * Implementation of upper-bound assuming iterator arithmetic.
0110  */
0111 template<class Compare, class ForwardIterator, class T>
0112 CELER_FUNCTION ForwardIterator upper_bound_impl(ForwardIterator first,
0113                                                 ForwardIterator last,
0114                                                 T const& value_,
0115                                                 Compare comp)
0116 {
0117     using difference_type = difference_type_t<ForwardIterator>;
0118 
0119     difference_type len = last - first;
0120     while (len != 0)
0121     {
0122         difference_type half_len = ::celeritas::detail::half_positive(len);
0123         ForwardIterator m = first + half_len;
0124         if (comp(value_, *m))
0125         {
0126             len = half_len;
0127         }
0128         else
0129         {
0130             first = ++m;
0131             len -= half_len + 1;
0132         }
0133     }
0134     return first;
0135 }
0136 
0137 //---------------------------------------------------------------------------//
0138 // PARTITION
0139 //---------------------------------------------------------------------------//
0140 /*!
0141  * Partition elements in the given range, "true" before "false".
0142  *
0143  * This implementation requires bidirectional iterators (typically true since
0144  * celeritas tends to use contiguous data).
0145  */
0146 template<class Predicate, class BidirectionalIterator>
0147 CELER_FUNCTION BidirectionalIterator partition_impl(BidirectionalIterator first,
0148                                                     BidirectionalIterator last,
0149                                                     Predicate pred)
0150 {
0151     while (true)
0152     {
0153         while (true)
0154         {
0155             if (first == last)
0156                 return first;
0157             if (!pred(*first))
0158                 break;
0159             ++first;
0160         }
0161         do
0162         {
0163             if (first == --last)
0164                 return first;
0165         } while (!pred(*last));
0166         trivial_swap(*first, *last);
0167         ++first;
0168     }
0169 }
0170 
0171 //---------------------------------------------------------------------------//
0172 // SORT
0173 //---------------------------------------------------------------------------//
0174 /*!
0175  * Cast a value to an rvalue reference.
0176  */
0177 template<class T>
0178 CELER_CONSTEXPR_FUNCTION auto trivial_move(T&& v) noexcept ->
0179     typename std::remove_reference<T>::type&&
0180 {
0181     return static_cast<typename std::remove_reference<T>::type&&>(v);
0182 }
0183 
0184 //---------------------------------------------------------------------------//
0185 /*!
0186  * Move the top element of the heap to its properly ordered place.
0187  *
0188  * The reverse ordering of the template parameters here is to allow the public
0189  * functions (sort, partial_sort) to explicitly pass references to the
0190  * comparator rather than passing by value every time.
0191  */
0192 template<class Compare, class RandomAccessIt>
0193 CELER_FUNCTION void sift_down(RandomAccessIt first,
0194                               RandomAccessIt,
0195                               Compare comp,
0196                               difference_type_t<RandomAccessIt> len,
0197                               RandomAccessIt start)
0198 {
0199     using difference_type = difference_type_t<RandomAccessIt>;
0200     using value_type =
0201         typename std::iterator_traits<RandomAccessIt>::value_type;
0202 
0203     // Left-child of start is at 2 * start + 1
0204     // Right-child of start is at 2 * start + 2
0205     difference_type child = start - first;
0206 
0207     if (len < 2 || (len - 2) / 2 < child)
0208     {
0209         return;
0210     }
0211 
0212     child = 2 * child + 1;
0213     RandomAccessIt child_i = first + child;
0214 
0215     if ((child + 1) < len && comp(*child_i, *(child_i + difference_type(1))))
0216     {
0217         // Right-child exists and is greater than left-child
0218         ++child_i;
0219         ++child;
0220     }
0221 
0222     if (comp(*child_i, *start))
0223     {
0224         // We are in heap order: start is larger than its largest child
0225         return;
0226     }
0227 
0228     value_type top(trivial_move(*start));
0229     do
0230     {
0231         // We are not in heap order: swap the parent with its largest child
0232         *start = trivial_move(*child_i);
0233         start = child_i;
0234 
0235         if ((len - 2) / 2 < child)
0236             break;
0237 
0238         // Recompute the child based off of the updated parent
0239         child = 2 * child + 1;
0240         child_i = first + child;
0241 
0242         if ((child + 1) < len
0243             && comp(*child_i, *(child_i + difference_type(1))))
0244         {
0245             // Right-child exists and is greater than left-child
0246             ++child_i;
0247             ++child;
0248         }
0249     } while (!comp(*child_i, top));
0250     *start = trivial_move(top);
0251 }
0252 
0253 //---------------------------------------------------------------------------//
0254 /*!
0255  * Move the largest (first) element in a heap to the last position.
0256  */
0257 template<class Compare, class RandomAccessIt>
0258 CELER_FORCEINLINE_FUNCTION void pop_heap(RandomAccessIt first,
0259                                          RandomAccessIt last,
0260                                          Compare comp,
0261                                          difference_type_t<RandomAccessIt> len)
0262 {
0263     if (len > 1)
0264     {
0265         trivial_swap(*first, *--last);
0266         ::celeritas::detail::sift_down<Compare>(
0267             first, last, comp, len - 1, first);
0268     }
0269 }
0270 
0271 //---------------------------------------------------------------------------//
0272 /*!
0273  * Convert the given range to a heap.
0274  */
0275 template<class Compare, class RandomAccessIt>
0276 CELER_FUNCTION void
0277 make_heap(RandomAccessIt first, RandomAccessIt last, Compare comp)
0278 {
0279     using difference_type = difference_type_t<RandomAccessIt>;
0280 
0281     difference_type n = last - first;
0282     if (n > 1)
0283     {
0284         for (difference_type start = (n - 2) / 2; start >= 0; --start)
0285         {
0286             ::celeritas::detail::sift_down<Compare>(
0287                 first, last, comp, n, first + start);
0288         }
0289     }
0290 }
0291 
0292 //---------------------------------------------------------------------------//
0293 /*!
0294  * Convert a heap to a sorted range.
0295  *
0296  * The \c void cast shouldn't ever be something we have to worry about, but
0297  * it's left in there from the LLVM implementation "to handle evil iterators
0298  * that overload operator comma" (bd7c7b55511a4b4b50b77559a44eff6d350224c4).
0299  */
0300 template<class Compare, class RandomAccessIt>
0301 CELER_FUNCTION void
0302 sort_heap(RandomAccessIt first, RandomAccessIt last, Compare comp)
0303 {
0304     using difference_type = difference_type_t<RandomAccessIt>;
0305 
0306     for (difference_type n = last - first; n > 1; --last, (void)--n)
0307     {
0308         ::celeritas::detail::pop_heap<Compare>(first, last, comp, n);
0309     }
0310 }
0311 
0312 //---------------------------------------------------------------------------//
0313 /*!
0314  * Sort the first elements of the range.
0315  *
0316  * This uses the remaining elements of the range [middle, last) as swap space.
0317  */
0318 template<class Compare, class RandomAccessIt>
0319 CELER_FUNCTION void partial_sort(RandomAccessIt first,
0320                                  RandomAccessIt middle,
0321                                  RandomAccessIt last,
0322                                  Compare comp)
0323 {
0324     using difference_type = difference_type_t<RandomAccessIt>;
0325 
0326     ::celeritas::detail::make_heap<Compare>(first, middle, comp);
0327 
0328     difference_type len = middle - first;
0329     for (RandomAccessIt i = middle; i != last; ++i)
0330     {
0331         if (comp(*i, *first))
0332         {
0333             trivial_swap(*i, *first);
0334             ::celeritas::detail::sift_down<Compare>(
0335                 first, middle, comp, len, first);
0336         }
0337     }
0338     ::celeritas::detail::sort_heap<Compare>(first, middle, comp);
0339 }
0340 
0341 //---------------------------------------------------------------------------//
0342 /*!
0343  * Perform a heap sort on the given range.
0344  *
0345  * The heap sort algorithm converts the unsorted array into an ordered
0346  * contiguous heap in-place, then converts the heap back to a sorted array. We
0347  * choose heapsort because it requires no dynamic allocation and has both best
0348  * and worst case time of O(N log N).
0349  *
0350  * The heap sort algorithm here is derived almost verbatim from the LLVM
0351  * libc++.
0352  */
0353 template<class Compare, class RandomAccessIt>
0354 CELER_FUNCTION void
0355 heapsort_impl(RandomAccessIt first, RandomAccessIt last, Compare comp)
0356 {
0357     ::celeritas::detail::partial_sort<Compare>(first, last, last, comp);
0358 }
0359 
0360 //---------------------------------------------------------------------------//
0361 }  // namespace detail
0362 }  // namespace celeritas