|
||||
File indexing completed on 2025-01-18 09:51:48
0001 //Templated hybrid string_sort 0002 0003 // Copyright Steven J. Ross 2001 - 2009. 0004 // Distributed under the Boost Software License, Version 1.0. 0005 // (See accompanying file LICENSE_1_0.txt or copy at 0006 // http://www.boost.org/LICENSE_1_0.txt) 0007 0008 // See http://www.boost.org/libs/sort/ for library home page. 0009 0010 /* 0011 Some improvements suggested by: 0012 Phil Endecott and Frank Gennari 0013 */ 0014 0015 #ifndef BOOST_STRING_SORT_HPP 0016 #define BOOST_STRING_SORT_HPP 0017 #include <algorithm> 0018 #include <vector> 0019 #include <cstring> 0020 #include <limits> 0021 #include <boost/static_assert.hpp> 0022 #include <boost/sort/spreadsort/detail/constants.hpp> 0023 #include <boost/sort/spreadsort/detail/string_sort.hpp> 0024 #include <boost/range/begin.hpp> 0025 #include <boost/range/end.hpp> 0026 0027 namespace boost { 0028 namespace sort { 0029 namespace spreadsort { 0030 0031 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads.\n 0032 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0033 0034 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0035 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0036 \par 0037 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0038 so @c string_sort is asymptotically faster 0039 than pure comparison-based algorithms. \n\n 0040 Some performance plots of runtime vs. n and log(range) are provided:\n 0041 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0042 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0043 0044 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a> 0045 \tparam Unsigned_char_type Unsigned character type used for string. 0046 \param[in] first Iterator pointer to first element. 0047 \param[in] last Iterator pointing to one beyond the end of data. 0048 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused. 0049 0050 \pre [@c first, @c last) is a valid range. 0051 \pre @c RandomAccessIter @c value_type is mutable. 0052 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a> 0053 \pre @c RandomAccessIter @c value_type supports the @c operator>>, 0054 which returns an integer-type right-shifted a specified number of bits. 0055 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0056 0057 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0058 the right shift, subtraction of right-shifted elements, functors, 0059 or any operations on iterators throw. 0060 0061 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0062 \warning Invalid arguments cause undefined behaviour. 0063 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0064 enabling faster generic-programming. 0065 0066 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0067 \remark * N is @c last - @c first, 0068 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0069 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0070 0071 */ 0072 0073 template <class RandomAccessIter, class Unsigned_char_type> 0074 inline void string_sort(RandomAccessIter first, RandomAccessIter last, 0075 Unsigned_char_type unused) 0076 { 0077 //Don't sort if it's too small to optimize 0078 if (last - first < detail::min_sort_size) 0079 boost::sort::pdqsort(first, last); 0080 else 0081 detail::string_sort(first, last, unused); 0082 } 0083 0084 /*! \brief String sort algorithm using range, allowing character-type overloads.\n 0085 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0086 0087 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0088 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0089 \par 0090 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0091 so @c string_sort is asymptotically faster 0092 than pure comparison-based algorithms. \n\n 0093 Some performance plots of runtime vs. n and log(range) are provided:\n 0094 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0095 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0096 0097 \tparam Unsigned_char_type Unsigned character type used for string. 0098 \param[in] range Range [first, last) for sorting. 0099 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused. 0100 0101 \pre [@c first, @c last) is a valid range. 0102 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0103 0104 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0105 the right shift, subtraction of right-shifted elements, functors, 0106 or any operations on iterators throw. 0107 0108 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0109 \warning Invalid arguments cause undefined behaviour. 0110 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0111 enabling faster generic-programming. 0112 0113 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0114 \remark * N is @c last - @c first, 0115 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0116 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0117 0118 */ 0119 0120 template <class Range, class Unsigned_char_type> 0121 inline void string_sort(Range& range, Unsigned_char_type unused) 0122 { 0123 string_sort(boost::begin(range), boost::end(range), unused); 0124 } 0125 0126 /*! \brief String sort algorithm using random access iterators, wraps using default of unsigned char. 0127 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0128 0129 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0130 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0131 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0132 so @c string_sort is asymptotically faster 0133 than pure comparison-based algorithms. \n\n 0134 Some performance plots of runtime vs. n and log(range) are provided:\n 0135 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a> 0136 \n 0137 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0138 0139 \param[in] first Iterator pointer to first element. 0140 \param[in] last Iterator pointing to one beyond the end of data. 0141 0142 \pre [@c first, @c last) is a valid range. 0143 \pre @c RandomAccessIter @c value_type is mutable. 0144 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a> 0145 \pre @c RandomAccessIter @c value_type supports the @c operator>>, 0146 which returns an integer-type right-shifted a specified number of bits. 0147 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0148 0149 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0150 the right shift, subtraction of right-shifted elements, functors, 0151 or any operations on iterators throw. 0152 0153 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0154 \warning Invalid arguments cause undefined behaviour. 0155 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0156 enabling faster generic-programming. 0157 0158 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0159 \remark * N is @c last - @c first, 0160 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0161 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0162 0163 */ 0164 template <class RandomAccessIter> 0165 inline void string_sort(RandomAccessIter first, RandomAccessIter last) 0166 { 0167 unsigned char unused = '\0'; 0168 string_sort(first, last, unused); 0169 } 0170 0171 /*! \brief String sort algorithm using range, wraps using default of unsigned char. 0172 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0173 0174 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0175 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0176 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0177 so @c string_sort is asymptotically faster 0178 than pure comparison-based algorithms. \n\n 0179 Some performance plots of runtime vs. n and log(range) are provided:\n 0180 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a> 0181 \n 0182 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0183 0184 \param[in] range Range [first, last) for sorting. 0185 0186 \pre [@c first, @c last) is a valid range. 0187 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0188 0189 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0190 the right shift, subtraction of right-shifted elements, functors, 0191 or any operations on iterators throw. 0192 0193 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0194 \warning Invalid arguments cause undefined behaviour. 0195 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0196 enabling faster generic-programming. 0197 0198 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0199 \remark * N is @c last - @c first, 0200 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0201 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0202 0203 */ 0204 template <class Range> 0205 inline void string_sort(Range& range) 0206 { 0207 string_sort(boost::begin(range), boost::end(range)); 0208 } 0209 0210 /*! \brief String sort algorithm using random access iterators, allowing character-type overloads. 0211 0212 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size). 0213 0214 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0215 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0216 \par 0217 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0218 so @c string_sort is asymptotically faster 0219 than pure comparison-based algorithms. \n\n 0220 Some performance plots of runtime vs. n and log(range) are provided:\n 0221 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0222 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0223 0224 0225 \tparam RandomAccessIter <a href="http://www.cplusplus.com/reference/iterator/RandomAccessIterator/">Random access iterator</a> 0226 \tparam Comp Functor type to use for comparison. 0227 \tparam Unsigned_char_type Unsigned character type used for string. 0228 0229 \param[in] first Iterator pointer to first element. 0230 \param[in] last Iterator pointing to one beyond the end of data. 0231 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0232 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused. 0233 0234 \pre [@c first, @c last) is a valid range. 0235 \pre @c RandomAccessIter @c value_type is mutable. 0236 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a> 0237 \pre @c RandomAccessIter @c value_type supports the @c operator>>, 0238 which returns an integer-type right-shifted a specified number of bits. 0239 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0240 0241 \return @c void. 0242 0243 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0244 the right shift, subtraction of right-shifted elements, functors, 0245 or any operations on iterators throw. 0246 0247 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0248 \warning Invalid arguments cause undefined behaviour. 0249 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0250 enabling faster generic-programming. 0251 0252 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0253 \remark * N is @c last - @c first, 0254 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0255 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0256 */ 0257 template <class RandomAccessIter, class Compare, class Unsigned_char_type> 0258 inline void reverse_string_sort(RandomAccessIter first, 0259 RandomAccessIter last, Compare comp, Unsigned_char_type unused) 0260 { 0261 //Don't sort if it's too small to optimize. 0262 if (last - first < detail::min_sort_size) 0263 boost::sort::pdqsort(first, last, comp); 0264 else 0265 detail::reverse_string_sort(first, last, unused); 0266 } 0267 0268 /*! \brief String sort algorithm using range, allowing character-type overloads. 0269 0270 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < detail::min_sort_size). 0271 0272 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0273 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0274 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0275 so @c string_sort is asymptotically faster 0276 than pure comparison-based algorithms. \n\n 0277 Some performance plots of runtime vs. n and log(range) are provided:\n 0278 <a href="../../doc/graph/windows_integer_sort.htm"> windows_integer_sort</a> 0279 \n 0280 <a href="../../doc/graph/osx_integer_sort.htm"> osx_integer_sort</a> 0281 0282 0283 \tparam Comp Functor type to use for comparison. 0284 \tparam Unsigned_char_type Unsigned character type used for string. 0285 0286 \param[in] range Range [first, last) for sorting. 0287 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0288 \param[in] unused value with the same type as the result of the [] operator, defining the Unsigned_char_type. The actual value is unused. 0289 0290 \pre [@c first, @c last) is a valid range. 0291 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0292 0293 \return @c void. 0294 0295 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0296 the right shift, subtraction of right-shifted elements, functors, 0297 or any operations on iterators throw. 0298 0299 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0300 \warning Invalid arguments cause undefined behaviour. 0301 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0302 enabling faster generic-programming. 0303 0304 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0305 \remark * N is @c last - @c first, 0306 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0307 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0308 */ 0309 template <class Range, class Compare, class Unsigned_char_type> 0310 inline void reverse_string_sort(Range& range, Compare comp, Unsigned_char_type unused) 0311 { 0312 reverse_string_sort(boost::begin(range), boost::end(range), comp, unused); 0313 } 0314 0315 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char. 0316 0317 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0318 0319 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0320 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0321 \par 0322 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0323 so @c string_sort is asymptotically faster 0324 than pure comparison-based algorithms.\n\n 0325 Some performance plots of runtime vs. n and log(range) are provided:\n 0326 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0327 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0328 0329 \param[in] first Iterator pointer to first element. 0330 \param[in] last Iterator pointing to one beyond the end of data. 0331 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0332 0333 \pre [@c first, @c last) is a valid range. 0334 \pre @c RandomAccessIter @c value_type is mutable. 0335 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a> 0336 \pre @c RandomAccessIter @c value_type supports the @c operator>>, 0337 which returns an integer-type right-shifted a specified number of bits. 0338 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0339 0340 \return @c void. 0341 0342 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0343 the right shift, subtraction of right-shifted elements, functors, 0344 or any operations on iterators throw. 0345 0346 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0347 \warning Invalid arguments cause undefined behaviour. 0348 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0349 enabling faster generic-programming. 0350 0351 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0352 \remark * N is @c last - @c first, 0353 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0354 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0355 */ 0356 template <class RandomAccessIter, class Compare> 0357 inline void reverse_string_sort(RandomAccessIter first, 0358 RandomAccessIter last, Compare comp) 0359 { 0360 unsigned char unused = '\0'; 0361 reverse_string_sort(first, last, comp, unused); 0362 } 0363 0364 /*! \brief String sort algorithm using range, wraps using default of @c unsigned char. 0365 0366 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0367 0368 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0369 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0370 \par 0371 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0372 so @c string_sort is asymptotically faster 0373 than pure comparison-based algorithms. \n\n 0374 Some performance plots of runtime vs. n and log(range) are provided:\n 0375 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0376 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0377 0378 \param[in] range Range [first, last) for sorting. 0379 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0380 0381 \pre [@c first, @c last) is a valid range. 0382 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0383 0384 \return @c void. 0385 0386 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0387 the right shift, subtraction of right-shifted elements, functors, 0388 or any operations on iterators throw. 0389 0390 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0391 \warning Invalid arguments cause undefined behaviour. 0392 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0393 enabling faster generic-programming. 0394 0395 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0396 \remark * N is @c last - @c first, 0397 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0398 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0399 */ 0400 template <class Range, class Compare> 0401 inline void reverse_string_sort(Range& range, Compare comp) 0402 { 0403 reverse_string_sort(boost::begin(range), boost::end(range), comp); 0404 } 0405 0406 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char. 0407 0408 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0409 0410 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0411 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0412 \par 0413 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0414 so @c string_sort is asymptotically faster 0415 than pure comparison-based algorithms. \n\n 0416 Some performance plots of runtime vs. n and log(range) are provided:\n 0417 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0418 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0419 0420 \param[in] first Iterator pointer to first element. 0421 \param[in] last Iterator pointing to one beyond the end of data. 0422 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset. 0423 \param[in] length Functor to get the length of the string in characters. 0424 0425 \pre [@c first, @c last) is a valid range. 0426 \pre @c RandomAccessIter @c value_type is mutable. 0427 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a> 0428 \pre @c RandomAccessIter @c value_type supports the @c operator>>, 0429 which returns an integer-type right-shifted a specified number of bits. 0430 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0431 0432 \return @c void. 0433 0434 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0435 the right shift, subtraction of right-shifted elements, functors, 0436 or any operations on iterators throw. 0437 0438 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0439 \warning Invalid arguments cause undefined behaviour. 0440 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0441 enabling faster generic-programming. 0442 0443 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0444 \remark * N is @c last - @c first, 0445 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0446 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0447 0448 */ 0449 template <class RandomAccessIter, class Get_char, class Get_length> 0450 inline void string_sort(RandomAccessIter first, RandomAccessIter last, 0451 Get_char get_character, Get_length length) 0452 { 0453 //Don't sort if it's too small to optimize 0454 if (last - first < detail::min_sort_size) 0455 boost::sort::pdqsort(first, last); 0456 else { 0457 //skipping past empties, which allows us to get the character type 0458 //.empty() is not used so as not to require a user declaration of it 0459 while (!length(*first)) { 0460 if (++first == last) 0461 return; 0462 } 0463 detail::string_sort(first, last, get_character, length, get_character((*first), 0)); 0464 } 0465 } 0466 0467 /*! \brief String sort algorithm using range, wraps using default of @c unsigned char. 0468 0469 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0470 0471 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0472 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0473 \par 0474 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0475 so @c string_sort is asymptotically faster 0476 than pure comparison-based algorithms. \n\n 0477 Some performance plots of runtime vs. n and log(range) are provided:\n 0478 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0479 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0480 0481 \param[in] range Range [first, last) for sorting. 0482 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset. 0483 \param[in] length Functor to get the length of the string in characters. 0484 0485 \pre [@c first, @c last) is a valid range. 0486 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0487 0488 \return @c void. 0489 0490 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0491 the right shift, subtraction of right-shifted elements, functors, 0492 or any operations on iterators throw. 0493 0494 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0495 \warning Invalid arguments cause undefined behaviour. 0496 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0497 enabling faster generic-programming. 0498 0499 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0500 \remark * N is @c last - @c first, 0501 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0502 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0503 0504 */ 0505 template <class Range, class Get_char, class Get_length> 0506 inline void string_sort(Range& range, Get_char get_character, Get_length length) 0507 { 0508 string_sort(boost::begin(range), boost::end(range), get_character, length); 0509 } 0510 0511 0512 /*! \brief String sort algorithm using random access iterators, wraps using default of @c unsigned char. 0513 0514 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0515 0516 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0517 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0518 \par 0519 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0520 so @c string_sort is asymptotically faster 0521 than pure comparison-based algorithms. \n\n 0522 Some performance plots of runtime vs. n and log(range) are provided:\n 0523 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0524 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0525 0526 0527 \param[in] first Iterator pointer to first element. 0528 \param[in] last Iterator pointing to one beyond the end of data. 0529 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset. 0530 \param[in] length Functor to get the length of the string in characters. 0531 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0532 0533 0534 \pre [@c first, @c last) is a valid range. 0535 \pre @c RandomAccessIter @c value_type is mutable. 0536 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a> 0537 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0538 0539 \return @c void. 0540 0541 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0542 the right shift, subtraction of right-shifted elements, functors, 0543 or any operations on iterators throw. 0544 0545 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0546 \warning Invalid arguments cause undefined behaviour. 0547 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0548 enabling faster generic-programming. 0549 0550 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0551 \remark * N is @c last - @c first, 0552 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0553 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0554 0555 */ 0556 template <class RandomAccessIter, class Get_char, class Get_length, 0557 class Compare> 0558 inline void string_sort(RandomAccessIter first, RandomAccessIter last, 0559 Get_char get_character, Get_length length, Compare comp) 0560 { 0561 //Don't sort if it's too small to optimize 0562 if (last - first < detail::min_sort_size) 0563 boost::sort::pdqsort(first, last, comp); 0564 else { 0565 //skipping past empties, which allows us to get the character type 0566 //.empty() is not used so as not to require a user declaration of it 0567 while (!length(*first)) { 0568 if (++first == last) 0569 return; 0570 } 0571 detail::string_sort(first, last, get_character, length, comp, 0572 get_character((*first), 0)); 0573 } 0574 } 0575 0576 /*! \brief String sort algorithm using range, wraps using default of @c unsigned char. 0577 0578 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0579 0580 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0581 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0582 \par 0583 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0584 so @c string_sort is asymptotically faster 0585 than pure comparison-based algorithms. \n\n 0586 Some performance plots of runtime vs. n and log(range) are provided:\n 0587 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0588 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0589 0590 0591 \param[in] range Range [first, last) for sorting. 0592 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset. 0593 \param[in] length Functor to get the length of the string in characters. 0594 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0595 0596 0597 \pre [@c first, @c last) is a valid range. 0598 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0599 0600 \return @c void. 0601 0602 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0603 the right shift, subtraction of right-shifted elements, functors, 0604 or any operations on iterators throw. 0605 0606 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0607 \warning Invalid arguments cause undefined behaviour. 0608 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0609 enabling faster generic-programming. 0610 0611 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0612 \remark * N is @c last - @c first, 0613 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0614 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0615 0616 */ 0617 template <class Range, class Get_char, class Get_length, class Compare> 0618 inline void string_sort(Range& range, 0619 Get_char get_character, Get_length length, Compare comp) 0620 { 0621 string_sort(boost::begin(range), boost::end(range), get_character, length, comp); 0622 } 0623 0624 /*! \brief Reverse String sort algorithm using random access iterators. 0625 0626 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0627 0628 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0629 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0630 \par 0631 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0632 so @c string_sort is asymptotically faster 0633 than pure comparison-based algorithms. \n\n 0634 Some performance plots of runtime vs. n and log(range) are provided:\n 0635 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0636 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0637 0638 0639 \param[in] first Iterator pointer to first element. 0640 \param[in] last Iterator pointing to one beyond the end of data. 0641 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset. 0642 \param[in] length Functor to get the length of the string in characters. 0643 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0644 0645 0646 \pre [@c first, @c last) is a valid range. 0647 \pre @c RandomAccessIter @c value_type is mutable. 0648 \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a> 0649 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0650 0651 \return @c void. 0652 0653 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0654 the right shift, subtraction of right-shifted elements, functors, 0655 or any operations on iterators throw. 0656 0657 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0658 \warning Invalid arguments cause undefined behaviour. 0659 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0660 enabling faster generic-programming. 0661 0662 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0663 \remark * N is @c last - @c first, 0664 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0665 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0666 0667 */ 0668 template <class RandomAccessIter, class Get_char, class Get_length, 0669 class Compare> 0670 inline void reverse_string_sort(RandomAccessIter first, 0671 RandomAccessIter last, Get_char get_character, Get_length length, Compare comp) 0672 { 0673 //Don't sort if it's too small to optimize 0674 if (last - first < detail::min_sort_size) 0675 boost::sort::pdqsort(first, last, comp); 0676 else { 0677 //skipping past empties, which allows us to get the character type 0678 //.empty() is not used so as not to require a user declaration of it 0679 while (!length(*(--last))) { 0680 //If there is just one non-empty at the beginning, this is sorted 0681 if (first == last) 0682 return; 0683 } 0684 //making last just after the end of the non-empty part of the array 0685 detail::reverse_string_sort(first, last + 1, get_character, length, comp, 0686 get_character((*last), 0)); 0687 } 0688 } 0689 0690 /*! \brief Reverse String sort algorithm using range. 0691 0692 (All variants fall back to @c boost::sort::pdqsort if the data size is too small, < @c detail::min_sort_size). 0693 0694 \details @c string_sort is a fast templated in-place hybrid radix/comparison algorithm, 0695 which in testing tends to be roughly 50% to 2X faster than @c std::sort for large tests (>=100kB).\n 0696 \par 0697 Worst-case performance is <em> O(N * (lg(range)/s + s)) </em>, 0698 so @c string_sort is asymptotically faster 0699 than pure comparison-based algorithms. \n\n 0700 Some performance plots of runtime vs. n and log(range) are provided:\n 0701 <a href="../../doc/graph/windows_string_sort.htm"> windows_string_sort</a>\n 0702 <a href="../../doc/graph/osx_string_sort.htm"> osx_string_sort</a> 0703 0704 0705 \param[in] range Range [first, last) for sorting. 0706 \param[in] get_character Bracket functor equivalent to @c operator[], taking a number corresponding to the character offset. 0707 \param[in] length Functor to get the length of the string in characters. 0708 \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order. 0709 0710 0711 \pre [@c first, @c last) is a valid range. 0712 \post The elements in the range [@c first, @c last) are sorted in ascending order. 0713 0714 \return @c void. 0715 0716 \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps (or moves), 0717 the right shift, subtraction of right-shifted elements, functors, 0718 or any operations on iterators throw. 0719 0720 \warning Throwing an exception may cause data loss. This will also throw if a small vector resize throws, in which case there will be no data loss. 0721 \warning Invalid arguments cause undefined behaviour. 0722 \note @c spreadsort function provides a wrapper that calls the fastest sorting algorithm available for a data type, 0723 enabling faster generic-programming. 0724 0725 \remark The lesser of <em> O(N*log(N)) </em> comparisons and <em> O(N*log(K/S + S)) </em>operations worst-case, where: 0726 \remark * N is @c last - @c first, 0727 \remark * K is the log of the range in bits (32 for 32-bit integers using their full range), 0728 \remark * S is a constant called max_splits, defaulting to 11 (except for strings where it is the log of the character size). 0729 0730 */ 0731 template <class Range, class Get_char, class Get_length, 0732 class Compare> 0733 inline void reverse_string_sort(Range& range, Get_char get_character, Get_length length, Compare comp) 0734 { 0735 reverse_string_sort(boost::begin(range), boost::end(range), get_character, length, comp); 0736 } 0737 } 0738 } 0739 } 0740 0741 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |