Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:51:48

0001 // Details for a templated general-case hybrid-radix string_sort.
0002 
0003 //          Copyright Steven J. Ross 2001 - 2014.
0004 // Distributed under the Boost Software License, Version 1.0.
0005 //    (See accompanying file LICENSE_1_0.txt or copy at
0006 //          http://www.boost.org/LICENSE_1_0.txt)
0007 
0008 // See http://www.boost.org/libs/sort for library home page.
0009 
0010 /*
0011 Some improvements suggested by:
0012 Phil Endecott and Frank Gennari
0013 */
0014 
0015 #ifndef BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
0016 #define BOOST_SORT_SPREADSORT_DETAIL_SPREAD_SORT_HPP
0017 #include <algorithm>
0018 #include <vector>
0019 #include <cstring>
0020 #include <limits>
0021 #include <functional>
0022 #include <boost/static_assert.hpp>
0023 #include <boost/utility/enable_if.hpp>
0024 #include <boost/sort/spreadsort/detail/constants.hpp>
0025 #include <boost/sort/spreadsort/detail/spreadsort_common.hpp>
0026 #include <boost/cstdint.hpp>
0027 
0028 namespace boost {
0029 namespace sort {
0030 namespace spreadsort {
0031   namespace detail {
0032     static const int max_step_size = 64;
0033 
0034     //Offsetting on identical characters.  This function works a chunk of
0035     //characters at a time for cache efficiency and optimal worst-case
0036     //performance.
0037     template<class RandomAccessIter, class Unsigned_char_type>
0038     inline void
0039     update_offset(RandomAccessIter first, RandomAccessIter finish,
0040                   size_t &char_offset)
0041     {
0042       const int char_size = sizeof(Unsigned_char_type);
0043       size_t nextOffset = char_offset;
0044       int step_size = max_step_size / char_size;
0045       while (true) {
0046         RandomAccessIter curr = first;
0047         do {
0048           //Ignore empties, but if the nextOffset would exceed the length or
0049           //not match, exit; we've found the last matching character
0050           //This will reduce the step_size if the current step doesn't match.
0051           if ((*curr).size() > char_offset) {
0052             if((*curr).size() <= (nextOffset + step_size)) {
0053               step_size = (*curr).size() - nextOffset - 1;
0054               if (step_size < 1) {
0055                 char_offset = nextOffset;
0056                 return;
0057               }
0058             }
0059             const int step_byte_size = step_size * char_size;
0060             if (memcmp(curr->data() + nextOffset, first->data() + nextOffset, 
0061                        step_byte_size) != 0) {
0062               if (step_size == 1) {
0063                 char_offset = nextOffset;
0064                 return;
0065               }
0066               step_size = (step_size > 4) ? 4 : 1;
0067               continue;
0068             }
0069           }
0070           ++curr;
0071         } while (curr != finish);
0072         nextOffset += step_size;
0073       }
0074     }
0075 
0076     //Offsetting on identical characters.  This function works a character
0077     //at a time for optimal worst-case performance.
0078     template<class RandomAccessIter, class Get_char, class Get_length>
0079     inline void
0080     update_offset(RandomAccessIter first, RandomAccessIter finish,
0081                   size_t &char_offset, Get_char get_character, Get_length length)
0082     {
0083       size_t nextOffset = char_offset;
0084       while (true) {
0085         RandomAccessIter curr = first;
0086         do {
0087           //ignore empties, but if the nextOffset would exceed the length or
0088           //not match, exit; we've found the last matching character
0089           if (length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1)
0090             || get_character((*curr), nextOffset) != get_character((*first), nextOffset))) {
0091             char_offset = nextOffset;
0092             return;
0093           }
0094         } while (++curr != finish);
0095         ++nextOffset;
0096       }
0097     }
0098 
0099     //This comparison functor assumes strings are identical up to char_offset
0100     template<class Data_type, class Unsigned_char_type>
0101     struct offset_less_than {
0102       offset_less_than(size_t char_offset) : fchar_offset(char_offset){}
0103       inline bool operator()(const Data_type &x, const Data_type &y) const
0104       {
0105         size_t minSize = (std::min)(x.size(), y.size());
0106         for (size_t u = fchar_offset; u < minSize; ++u) {
0107           BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
0108           if (static_cast<Unsigned_char_type>(x[u]) !=
0109               static_cast<Unsigned_char_type>(y[u])) {
0110             return static_cast<Unsigned_char_type>(x[u]) < 
0111               static_cast<Unsigned_char_type>(y[u]);
0112           }
0113         }
0114         return x.size() < y.size();
0115       }
0116       size_t fchar_offset;
0117     };
0118 
0119     //Compares strings assuming they are identical up to char_offset
0120     template<class Data_type, class Unsigned_char_type>
0121     struct offset_greater_than {
0122       offset_greater_than(size_t char_offset) : fchar_offset(char_offset){}
0123       inline bool operator()(const Data_type &x, const Data_type &y) const
0124       {
0125         size_t minSize = (std::min)(x.size(), y.size());
0126         for (size_t u = fchar_offset; u < minSize; ++u) {
0127           BOOST_STATIC_ASSERT(sizeof(x[u]) == sizeof(Unsigned_char_type));
0128           if (static_cast<Unsigned_char_type>(x[u]) !=
0129               static_cast<Unsigned_char_type>(y[u])) {
0130             return static_cast<Unsigned_char_type>(x[u]) > 
0131               static_cast<Unsigned_char_type>(y[u]);
0132           }
0133         }
0134         return x.size() > y.size();
0135       }
0136       size_t fchar_offset;
0137     };
0138 
0139     //This comparison functor assumes strings are identical up to char_offset
0140     template<class Data_type, class Get_char, class Get_length>
0141     struct offset_char_less_than {
0142       offset_char_less_than(size_t char_offset) : fchar_offset(char_offset){}
0143       inline bool operator()(const Data_type &x, const Data_type &y) const
0144       {
0145         size_t minSize = (std::min)(length(x), length(y));
0146         for (size_t u = fchar_offset; u < minSize; ++u) {
0147           if (get_character(x, u) != get_character(y, u)) {
0148             return get_character(x, u) < get_character(y, u);
0149           }
0150         }
0151         return length(x) < length(y);
0152       }
0153       size_t fchar_offset;
0154       Get_char get_character;
0155       Get_length length;
0156     };
0157 
0158     //String sorting recursive implementation
0159     template <class RandomAccessIter, class Unsigned_char_type>
0160     inline void
0161     string_sort_rec(RandomAccessIter first, RandomAccessIter last,
0162                     size_t char_offset,
0163                     std::vector<RandomAccessIter> &bin_cache,
0164                     unsigned cache_offset, size_t *bin_sizes)
0165     {
0166       typedef typename std::iterator_traits<RandomAccessIter>::value_type
0167         Data_type;
0168       //This section makes handling of long identical substrings much faster
0169       //with a mild average performance impact.
0170       //Iterate to the end of the empties.  If all empty, return
0171       while ((*first).size() <= char_offset) {
0172         if (++first == last)
0173           return;
0174       }
0175       RandomAccessIter finish = last - 1;
0176       //Getting the last non-empty
0177       for (;(*finish).size() <= char_offset; --finish);
0178       ++finish;
0179       //Offsetting on identical characters.  This section works
0180       //a few characters at a time for optimal worst-case performance.
0181       update_offset<RandomAccessIter, Unsigned_char_type>(first, finish,
0182                                                           char_offset);
0183       
0184       const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
0185       //Equal worst-case of radix and comparison is when bin_count = n*log(n).
0186       const unsigned max_size = bin_count;
0187       const unsigned membin_count = bin_count + 1;
0188       unsigned cache_end;
0189       RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
0190                                           cache_end, membin_count) + 1;
0191 
0192       //Calculating the size of each bin; this takes roughly 10% of runtime
0193       for (RandomAccessIter current = first; current != last; ++current) {
0194         if ((*current).size() <= char_offset) {
0195           bin_sizes[0]++;
0196         }
0197         else
0198           bin_sizes[static_cast<Unsigned_char_type>((*current)[char_offset])
0199                     + 1]++;
0200       }
0201       //Assign the bin positions
0202       bin_cache[cache_offset] = first;
0203       for (unsigned u = 0; u < membin_count - 1; u++)
0204         bin_cache[cache_offset + u + 1] =
0205           bin_cache[cache_offset + u] + bin_sizes[u];
0206 
0207       //Swap into place
0208       RandomAccessIter next_bin_start = first;
0209       //handling empty bins
0210       RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
0211       next_bin_start +=  bin_sizes[0];
0212       RandomAccessIter * target_bin;
0213       //Iterating over each element in the bin of empties
0214       for (RandomAccessIter current = *local_bin; current < next_bin_start;
0215           ++current) {
0216         //empties belong in this bin
0217         while ((*current).size() > char_offset) {
0218           target_bin =
0219             bins + static_cast<Unsigned_char_type>((*current)[char_offset]);
0220           iter_swap(current, (*target_bin)++);
0221         }
0222       }
0223       *local_bin = next_bin_start;
0224       //iterate backwards to find the last bin with elements in it
0225       //this saves iterations in multiple loops
0226       unsigned last_bin = bin_count - 1;
0227       for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
0228       //This dominates runtime, mostly in the swap and bin lookups
0229       for (unsigned u = 0; u < last_bin; ++u) {
0230         local_bin = bins + u;
0231         next_bin_start += bin_sizes[u + 1];
0232         //Iterating over each element in this bin
0233         for (RandomAccessIter current = *local_bin; current < next_bin_start;
0234             ++current) {
0235           //Swapping into place until the correct element has been swapped in
0236           for (target_bin = bins + static_cast<Unsigned_char_type>
0237               ((*current)[char_offset]);  target_bin != local_bin;
0238             target_bin = bins + static_cast<Unsigned_char_type>
0239               ((*current)[char_offset])) iter_swap(current, (*target_bin)++);
0240         }
0241         *local_bin = next_bin_start;
0242       }
0243       bins[last_bin] = last;
0244       //Recursing
0245       RandomAccessIter lastPos = bin_cache[cache_offset];
0246       //Skip this loop for empties
0247       for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
0248           lastPos = bin_cache[u], ++u) {
0249         size_t count = bin_cache[u] - lastPos;
0250         //don't sort unless there are at least two items to Compare
0251         if (count < 2)
0252           continue;
0253         //using boost::sort::pdqsort if its worst-case is better
0254         if (count < max_size)
0255           boost::sort::pdqsort(lastPos, bin_cache[u],
0256               offset_less_than<Data_type, Unsigned_char_type>(char_offset + 1));
0257         else
0258           string_sort_rec<RandomAccessIter, Unsigned_char_type>(lastPos,
0259               bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
0260       }
0261     }
0262 
0263     //Sorts strings in reverse order, with empties at the end
0264     template <class RandomAccessIter, class Unsigned_char_type>
0265     inline void
0266     reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
0267                             size_t char_offset,
0268                             std::vector<RandomAccessIter> &bin_cache,
0269                             unsigned cache_offset,
0270                             size_t *bin_sizes)
0271     {
0272       typedef typename std::iterator_traits<RandomAccessIter>::value_type
0273         Data_type;
0274       //This section makes handling of long identical substrings much faster
0275       //with a mild average performance impact.
0276       RandomAccessIter curr = first;
0277       //Iterate to the end of the empties.  If all empty, return
0278       while ((*curr).size() <= char_offset) {
0279         if (++curr == last)
0280           return;
0281       }
0282       //Getting the last non-empty
0283       while ((*(--last)).size() <= char_offset);
0284       ++last;
0285       //Offsetting on identical characters.  This section works
0286       //a few characters at a time for optimal worst-case performance.
0287       update_offset<RandomAccessIter, Unsigned_char_type>(curr, last,
0288                                                           char_offset);
0289       RandomAccessIter * target_bin;
0290 
0291       const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
0292       //Equal worst-case of radix and comparison when bin_count = n*log(n).
0293       const unsigned max_size = bin_count;
0294       const unsigned membin_count = bin_count + 1;
0295       const unsigned max_bin = bin_count - 1;
0296       unsigned cache_end;
0297       RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
0298                                           cache_end, membin_count);
0299       RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
0300 
0301       //Calculating the size of each bin; this takes roughly 10% of runtime
0302       for (RandomAccessIter current = first; current != last; ++current) {
0303         if ((*current).size() <= char_offset) {
0304           bin_sizes[bin_count]++;
0305         }
0306         else
0307           bin_sizes[max_bin - static_cast<Unsigned_char_type>
0308             ((*current)[char_offset])]++;
0309       }
0310       //Assign the bin positions
0311       bin_cache[cache_offset] = first;
0312       for (unsigned u = 0; u < membin_count - 1; u++)
0313         bin_cache[cache_offset + u + 1] =
0314           bin_cache[cache_offset + u] + bin_sizes[u];
0315 
0316       //Swap into place
0317       RandomAccessIter next_bin_start = last;
0318       //handling empty bins
0319       RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
0320       RandomAccessIter lastFull = *local_bin;
0321       //Iterating over each element in the bin of empties
0322       for (RandomAccessIter current = *local_bin; current < next_bin_start;
0323           ++current) {
0324         //empties belong in this bin
0325         while ((*current).size() > char_offset) {
0326           target_bin =
0327             end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
0328           iter_swap(current, (*target_bin)++);
0329         }
0330       }
0331       *local_bin = next_bin_start;
0332       next_bin_start = first;
0333       //iterate backwards to find the last non-empty bin
0334       //this saves iterations in multiple loops
0335       unsigned last_bin = max_bin;
0336       for (; last_bin && !bin_sizes[last_bin]; --last_bin);
0337       //This dominates runtime, mostly in the swap and bin lookups
0338       for (unsigned u = 0; u < last_bin; ++u) {
0339         local_bin = bins + u;
0340         next_bin_start += bin_sizes[u];
0341         //Iterating over each element in this bin
0342         for (RandomAccessIter current = *local_bin; current < next_bin_start;
0343             ++current) {
0344           //Swapping into place until the correct element has been swapped in
0345           for (target_bin =
0346             end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]);
0347             target_bin != local_bin;
0348             target_bin =
0349             end_bin - static_cast<Unsigned_char_type>((*current)[char_offset]))
0350               iter_swap(current, (*target_bin)++);
0351         }
0352         *local_bin = next_bin_start;
0353       }
0354       bins[last_bin] = lastFull;
0355       //Recursing
0356       RandomAccessIter lastPos = first;
0357       //Skip this loop for empties
0358       for (unsigned u = cache_offset; u <= cache_offset + last_bin;
0359           lastPos = bin_cache[u], ++u) {
0360         size_t count = bin_cache[u] - lastPos;
0361         //don't sort unless there are at least two items to Compare
0362         if (count < 2)
0363           continue;
0364         //using boost::sort::pdqsort if its worst-case is better
0365         if (count < max_size)
0366           boost::sort::pdqsort(lastPos, bin_cache[u], offset_greater_than<Data_type,
0367                     Unsigned_char_type>(char_offset + 1));
0368         else
0369           reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
0370     (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
0371       }
0372     }
0373 
0374     //String sorting recursive implementation
0375     template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
0376               class Get_length>
0377     inline void
0378     string_sort_rec(RandomAccessIter first, RandomAccessIter last,
0379               size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
0380               unsigned cache_offset, size_t *bin_sizes,
0381               Get_char get_character, Get_length length)
0382     {
0383       typedef typename std::iterator_traits<RandomAccessIter>::value_type
0384         Data_type;
0385       //This section makes handling of long identical substrings much faster
0386       //with a mild average performance impact.
0387       //Iterate to the end of the empties.  If all empty, return
0388       while (length(*first) <= char_offset) {
0389         if (++first == last)
0390           return;
0391       }
0392       RandomAccessIter finish = last - 1;
0393       //Getting the last non-empty
0394       for (;length(*finish) <= char_offset; --finish);
0395       ++finish;
0396       update_offset(first, finish, char_offset, get_character, length);
0397 
0398       const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
0399       //Equal worst-case of radix and comparison is when bin_count = n*log(n).
0400       const unsigned max_size = bin_count;
0401       const unsigned membin_count = bin_count + 1;
0402       unsigned cache_end;
0403       RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
0404                                           cache_end, membin_count) + 1;
0405 
0406       //Calculating the size of each bin; this takes roughly 10% of runtime
0407       for (RandomAccessIter current = first; current != last; ++current) {
0408         if (length(*current) <= char_offset) {
0409           bin_sizes[0]++;
0410         }
0411         else
0412           bin_sizes[get_character((*current), char_offset) + 1]++;
0413       }
0414       //Assign the bin positions
0415       bin_cache[cache_offset] = first;
0416       for (unsigned u = 0; u < membin_count - 1; u++)
0417         bin_cache[cache_offset + u + 1] =
0418           bin_cache[cache_offset + u] + bin_sizes[u];
0419 
0420       //Swap into place
0421       RandomAccessIter next_bin_start = first;
0422       //handling empty bins
0423       RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
0424       next_bin_start +=  bin_sizes[0];
0425       RandomAccessIter * target_bin;
0426       //Iterating over each element in the bin of empties
0427       for (RandomAccessIter current = *local_bin; current < next_bin_start;
0428           ++current) {
0429         //empties belong in this bin
0430         while (length(*current) > char_offset) {
0431           target_bin = bins + get_character((*current), char_offset);
0432           iter_swap(current, (*target_bin)++);
0433         }
0434       }
0435       *local_bin = next_bin_start;
0436       //iterate backwards to find the last bin with elements in it
0437       //this saves iterations in multiple loops
0438       unsigned last_bin = bin_count - 1;
0439       for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
0440       //This dominates runtime, mostly in the swap and bin lookups
0441       for (unsigned ii = 0; ii < last_bin; ++ii) {
0442         local_bin = bins + ii;
0443         next_bin_start += bin_sizes[ii + 1];
0444         //Iterating over each element in this bin
0445         for (RandomAccessIter current = *local_bin; current < next_bin_start;
0446             ++current) {
0447           //Swapping into place until the correct element has been swapped in
0448           for (target_bin = bins + get_character((*current), char_offset);
0449               target_bin != local_bin;
0450               target_bin = bins + get_character((*current), char_offset))
0451             iter_swap(current, (*target_bin)++);
0452         }
0453         *local_bin = next_bin_start;
0454       }
0455       bins[last_bin] = last;
0456 
0457       //Recursing
0458       RandomAccessIter lastPos = bin_cache[cache_offset];
0459       //Skip this loop for empties
0460       for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
0461           lastPos = bin_cache[u], ++u) {
0462         size_t count = bin_cache[u] - lastPos;
0463         //don't sort unless there are at least two items to Compare
0464         if (count < 2)
0465           continue;
0466         //using boost::sort::pdqsort if its worst-case is better
0467         if (count < max_size)
0468           boost::sort::pdqsort(lastPos, bin_cache[u], offset_char_less_than<Data_type,
0469                     Get_char, Get_length>(char_offset + 1));
0470         else
0471           string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
0472             Get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache,
0473                         cache_end, bin_sizes, get_character, length);
0474       }
0475     }
0476 
0477     //String sorting recursive implementation
0478     template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
0479               class Get_length, class Compare>
0480     inline void
0481     string_sort_rec(RandomAccessIter first, RandomAccessIter last,
0482               size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
0483               unsigned cache_offset, size_t *bin_sizes,
0484               Get_char get_character, Get_length length, Compare comp)
0485     {
0486       //This section makes handling of long identical substrings much faster
0487       //with a mild average performance impact.
0488       //Iterate to the end of the empties.  If all empty, return
0489       while (length(*first) <= char_offset) {
0490         if (++first == last)
0491           return;
0492       }
0493       RandomAccessIter finish = last - 1;
0494       //Getting the last non-empty
0495       for (;length(*finish) <= char_offset; --finish);
0496       ++finish;
0497       update_offset(first, finish, char_offset, get_character, length);
0498 
0499       const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
0500       //Equal worst-case of radix and comparison is when bin_count = n*log(n).
0501       const unsigned max_size = bin_count;
0502       const unsigned membin_count = bin_count + 1;
0503       unsigned cache_end;
0504       RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
0505                                           cache_end, membin_count) + 1;
0506 
0507       //Calculating the size of each bin; this takes roughly 10% of runtime
0508       for (RandomAccessIter current = first; current != last; ++current) {
0509         if (length(*current) <= char_offset) {
0510           bin_sizes[0]++;
0511         }
0512         else
0513           bin_sizes[get_character((*current), char_offset) + 1]++;
0514       }
0515       //Assign the bin positions
0516       bin_cache[cache_offset] = first;
0517       for (unsigned u = 0; u < membin_count - 1; u++)
0518         bin_cache[cache_offset + u + 1] =
0519           bin_cache[cache_offset + u] + bin_sizes[u];
0520 
0521       //Swap into place
0522       RandomAccessIter next_bin_start = first;
0523       //handling empty bins
0524       RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
0525       next_bin_start +=  bin_sizes[0];
0526       RandomAccessIter * target_bin;
0527       //Iterating over each element in the bin of empties
0528       for (RandomAccessIter current = *local_bin; current < next_bin_start;
0529           ++current) {
0530         //empties belong in this bin
0531         while (length(*current) > char_offset) {
0532           target_bin = bins + get_character((*current), char_offset);
0533           iter_swap(current, (*target_bin)++);
0534         }
0535       }
0536       *local_bin = next_bin_start;
0537       //iterate backwards to find the last bin with elements in it
0538       //this saves iterations in multiple loops
0539       unsigned last_bin = bin_count - 1;
0540       for (; last_bin && !bin_sizes[last_bin + 1]; --last_bin);
0541       //This dominates runtime, mostly in the swap and bin lookups
0542       for (unsigned u = 0; u < last_bin; ++u) {
0543         local_bin = bins + u;
0544         next_bin_start += bin_sizes[u + 1];
0545         //Iterating over each element in this bin
0546         for (RandomAccessIter current = *local_bin; current < next_bin_start;
0547             ++current) {
0548           //Swapping into place until the correct element has been swapped in
0549           for (target_bin = bins + get_character((*current), char_offset);
0550               target_bin != local_bin;
0551               target_bin = bins + get_character((*current), char_offset))
0552             iter_swap(current, (*target_bin)++);
0553         }
0554         *local_bin = next_bin_start;
0555       }
0556       bins[last_bin] = last;
0557 
0558       //Recursing
0559       RandomAccessIter lastPos = bin_cache[cache_offset];
0560       //Skip this loop for empties
0561       for (unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2;
0562           lastPos = bin_cache[u], ++u) {
0563         size_t count = bin_cache[u] - lastPos;
0564         //don't sort unless there are at least two items to Compare
0565         if (count < 2)
0566           continue;
0567         //using boost::sort::pdqsort if its worst-case is better
0568         if (count < max_size)
0569           boost::sort::pdqsort(lastPos, bin_cache[u], comp);
0570         else
0571           string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
0572                           Get_length, Compare>
0573             (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
0574              bin_sizes, get_character, length, comp);
0575       }
0576     }
0577 
0578     //Sorts strings in reverse order, with empties at the end
0579     template <class RandomAccessIter, class Unsigned_char_type, class Get_char,
0580               class Get_length, class Compare>
0581     inline void
0582     reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last,
0583               size_t char_offset, std::vector<RandomAccessIter> &bin_cache,
0584               unsigned cache_offset, size_t *bin_sizes,
0585               Get_char get_character, Get_length length, Compare comp)
0586     {
0587       //This section makes handling of long identical substrings much faster
0588       //with a mild average performance impact.
0589       RandomAccessIter curr = first;
0590       //Iterate to the end of the empties.  If all empty, return
0591       while (length(*curr) <= char_offset) {
0592         if (++curr == last)
0593           return;
0594       }
0595       //Getting the last non-empty
0596       while (length(*(--last)) <= char_offset);
0597       ++last;
0598       //Offsetting on identical characters.  This section works
0599       //a character at a time for optimal worst-case performance.
0600       update_offset(curr, last, char_offset, get_character, length);
0601 
0602       const unsigned bin_count = (1 << (sizeof(Unsigned_char_type)*8));
0603       //Equal worst-case of radix and comparison is when bin_count = n*log(n).
0604       const unsigned max_size = bin_count;
0605       const unsigned membin_count = bin_count + 1;
0606       const unsigned max_bin = bin_count - 1;
0607       unsigned cache_end;
0608       RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset,
0609                                           cache_end, membin_count);
0610       RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
0611 
0612       //Calculating the size of each bin; this takes roughly 10% of runtime
0613       for (RandomAccessIter current = first; current != last; ++current) {
0614         if (length(*current) <= char_offset) {
0615           bin_sizes[bin_count]++;
0616         }
0617         else
0618           bin_sizes[max_bin - get_character((*current), char_offset)]++;
0619       }
0620       //Assign the bin positions
0621       bin_cache[cache_offset] = first;
0622       for (unsigned u = 0; u < membin_count - 1; u++)
0623         bin_cache[cache_offset + u + 1] =
0624           bin_cache[cache_offset + u] + bin_sizes[u];
0625 
0626       //Swap into place
0627       RandomAccessIter next_bin_start = last;
0628       //handling empty bins
0629       RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
0630       RandomAccessIter lastFull = *local_bin;
0631       RandomAccessIter * target_bin;
0632       //Iterating over each element in the bin of empties
0633       for (RandomAccessIter current = *local_bin; current < next_bin_start;
0634           ++current) {
0635         //empties belong in this bin
0636         while (length(*current) > char_offset) {
0637           target_bin = end_bin - get_character((*current), char_offset);
0638           iter_swap(current, (*target_bin)++);
0639         }
0640       }
0641       *local_bin = next_bin_start;
0642       next_bin_start = first;
0643       //iterate backwards to find the last bin with elements in it
0644       //this saves iterations in multiple loops
0645       unsigned last_bin = max_bin;
0646       for (; last_bin && !bin_sizes[last_bin]; --last_bin);
0647       //This dominates runtime, mostly in the swap and bin lookups
0648       for (unsigned u = 0; u < last_bin; ++u) {
0649         local_bin = bins + u;
0650         next_bin_start += bin_sizes[u];
0651         //Iterating over each element in this bin
0652         for (RandomAccessIter current = *local_bin; current < next_bin_start;
0653             ++current) {
0654           //Swapping into place until the correct element has been swapped in
0655           for (target_bin = end_bin - get_character((*current), char_offset);
0656               target_bin != local_bin;
0657               target_bin = end_bin - get_character((*current), char_offset))
0658             iter_swap(current, (*target_bin)++);
0659         }
0660         *local_bin = next_bin_start;
0661       }
0662       bins[last_bin] = lastFull;
0663       //Recursing
0664       RandomAccessIter lastPos = first;
0665       //Skip this loop for empties
0666       for (unsigned u = cache_offset; u <= cache_offset + last_bin;
0667           lastPos = bin_cache[u], ++u) {
0668         size_t count = bin_cache[u] - lastPos;
0669         //don't sort unless there are at least two items to Compare
0670         if (count < 2)
0671           continue;
0672         //using boost::sort::pdqsort if its worst-case is better
0673         if (count < max_size)
0674           boost::sort::pdqsort(lastPos, bin_cache[u], comp);
0675         else
0676           reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type,
0677                                   Get_char, Get_length, Compare>
0678             (lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end,
0679              bin_sizes, get_character, length, comp);
0680       }
0681     }
0682 
0683     //Holds the bin vector and makes the initial recursive call
0684     template <class RandomAccessIter, class Unsigned_char_type>
0685     inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
0686                                                                       >::type
0687     string_sort(RandomAccessIter first, RandomAccessIter last,
0688                 Unsigned_char_type)
0689     {
0690       size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
0691       std::vector<RandomAccessIter> bin_cache;
0692       string_sort_rec<RandomAccessIter, Unsigned_char_type>
0693         (first, last, 0, bin_cache, 0, bin_sizes);
0694     }
0695 
0696     template <class RandomAccessIter, class Unsigned_char_type>
0697     inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
0698                                                                        >::type
0699     string_sort(RandomAccessIter first, RandomAccessIter last,
0700                 Unsigned_char_type)
0701     {
0702       // Use boost::sort::pdqsort if the char_type is too large for string_sort.
0703       boost::sort::pdqsort(first, last);
0704     }
0705 
0706     //Holds the bin vector and makes the initial recursive call
0707     template <class RandomAccessIter, class Unsigned_char_type>
0708     inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
0709                                                                       >::type
0710     reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
0711                         Unsigned_char_type)
0712     {
0713       size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
0714       std::vector<RandomAccessIter> bin_cache;
0715       reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type>
0716         (first, last, 0, bin_cache, 0, bin_sizes);
0717     }
0718 
0719     template <class RandomAccessIter, class Unsigned_char_type>
0720     inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
0721                                                                        >::type
0722     reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
0723                 Unsigned_char_type)
0724     {
0725       typedef typename std::iterator_traits<RandomAccessIter>::value_type
0726         Data_type;
0727       // Use boost::sort::pdqsort if the char_type is too large for string_sort.
0728       boost::sort::pdqsort(first, last, std::greater<Data_type>());
0729     }
0730 
0731     //Holds the bin vector and makes the initial recursive call
0732     template <class RandomAccessIter, class Get_char, class Get_length,
0733               class Unsigned_char_type>
0734     inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
0735                                                                       >::type
0736     string_sort(RandomAccessIter first, RandomAccessIter last,
0737                 Get_char get_character, Get_length length, Unsigned_char_type)
0738     {
0739       size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
0740       std::vector<RandomAccessIter> bin_cache;
0741       string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
0742         Get_length>(first, last, 0, bin_cache, 0, bin_sizes, get_character, length);
0743     }
0744 
0745     template <class RandomAccessIter, class Get_char, class Get_length,
0746               class Unsigned_char_type>
0747     inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
0748                                                                        >::type
0749     string_sort(RandomAccessIter first, RandomAccessIter last,
0750                 Get_char get_character, Get_length length, Unsigned_char_type)
0751     {
0752       // Use boost::sort::pdqsort if the char_type is too large for string_sort.
0753       boost::sort::pdqsort(first, last);
0754     }
0755 
0756     //Holds the bin vector and makes the initial recursive call
0757     template <class RandomAccessIter, class Get_char, class Get_length,
0758               class Compare, class Unsigned_char_type>
0759     inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
0760                                                                       >::type
0761     string_sort(RandomAccessIter first, RandomAccessIter last,
0762         Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
0763     {
0764       size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
0765       std::vector<RandomAccessIter> bin_cache;
0766       string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char
0767         , Get_length, Compare>
0768         (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
0769     }
0770 
0771     //disable_if_c was refusing to compile, so rewrote to use enable_if_c
0772     template <class RandomAccessIter, class Get_char, class Get_length,
0773               class Compare, class Unsigned_char_type>
0774     inline typename boost::enable_if_c< (sizeof(Unsigned_char_type) > 2), void
0775                                         >::type
0776     string_sort(RandomAccessIter first, RandomAccessIter last,
0777         Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
0778     {
0779       // Use boost::sort::pdqsort if the char_type is too large for string_sort.
0780       boost::sort::pdqsort(first, last, comp);
0781     }
0782 
0783     //Holds the bin vector and makes the initial recursive call
0784     template <class RandomAccessIter, class Get_char, class Get_length,
0785               class Compare, class Unsigned_char_type>
0786     inline typename boost::enable_if_c< sizeof(Unsigned_char_type) <= 2, void
0787                                                                       >::type
0788     reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
0789         Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
0790     {
0791       size_t bin_sizes[(1 << (8 * sizeof(Unsigned_char_type))) + 1];
0792       std::vector<RandomAccessIter> bin_cache;
0793       reverse_string_sort_rec<RandomAccessIter, Unsigned_char_type, Get_char,
0794                               Get_length, Compare>
0795         (first, last, 0, bin_cache, 0, bin_sizes, get_character, length, comp);
0796     }
0797 
0798     template <class RandomAccessIter, class Get_char, class Get_length,
0799               class Compare, class Unsigned_char_type>
0800     inline typename boost::disable_if_c< sizeof(Unsigned_char_type) <= 2, void
0801                                                                        >::type
0802     reverse_string_sort(RandomAccessIter first, RandomAccessIter last,
0803         Get_char get_character, Get_length length, Compare comp, Unsigned_char_type)
0804     {
0805       // Use boost::sort::pdqsort if the char_type is too large for string_sort.
0806       boost::sort::pdqsort(first, last, comp);
0807     }
0808   }
0809 }
0810 }
0811 }
0812 
0813 #endif