Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 09:46:47

0001 //////////////////////////////////////////////////////////////////////////////
0002 //
0003 // (C) Copyright Ion Gaztanaga 2015-2016.
0004 // Distributed under the Boost Software License, Version 1.0.
0005 // (See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt)
0007 //
0008 // See http://www.boost.org/libs/move for documentation.
0009 //
0010 //////////////////////////////////////////////////////////////////////////////
0011 
0012 #ifndef BOOST_MOVE_ADAPTIVE_SORT_HPP
0013 #define BOOST_MOVE_ADAPTIVE_SORT_HPP
0014 
0015 #include <boost/move/detail/config_begin.hpp>
0016 
0017 #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
0018 #include <cassert>
0019 
0020 #if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600))
0021 #pragma GCC diagnostic push
0022 #pragma GCC diagnostic ignored "-Wsign-conversion"
0023 #endif
0024 
0025 namespace boost {
0026 namespace movelib {
0027 
0028 ///@cond
0029 namespace detail_adaptive {
0030 
0031 template<class RandIt>
0032 void move_data_backward( RandIt cur_pos
0033               , typename iter_size<RandIt>::type const l_data
0034               , RandIt new_pos
0035               , bool const xbuf_used)
0036 {
0037    //Move buffer to the total combination right
0038    if(xbuf_used){
0039       boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);      
0040    }
0041    else{
0042       boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);      
0043       //Rotate does less moves but it seems slower due to cache issues
0044       //rotate_gcd(first-l_block, first+len-l_block, first+len);
0045    }
0046 }
0047 
0048 template<class RandIt>
0049 void move_data_forward( RandIt cur_pos
0050               , typename iter_size<RandIt>::type const l_data
0051               , RandIt new_pos
0052               , bool const xbuf_used)
0053 {
0054    //Move buffer to the total combination right
0055    if(xbuf_used){
0056       boost::move(cur_pos, cur_pos+l_data, new_pos);
0057    }
0058    else{
0059       boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
0060       //Rotate does less moves but it seems slower due to cache issues
0061       //rotate_gcd(first-l_block, first+len-l_block, first+len);
0062    }
0063 }
0064 
0065 // build blocks of length 2*l_build_buf. l_build_buf is power of two
0066 // input: [0, l_build_buf) elements are buffer, rest unsorted elements
0067 // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
0068 //
0069 // First elements are merged from right to left until elements start
0070 // at first. All old elements [first, first + l_build_buf) are placed at the end
0071 // [first+len-l_build_buf, first+len). To achieve this:
0072 // - If we have external memory to merge, we save elements from the buffer
0073 //   so that a non-swapping merge is used. Buffer elements are restored
0074 //   at the end of the buffer from the external memory.
0075 //
0076 // - When the external memory is not available or it is insufficient
0077 //   for a merge operation, left swap merging is used.
0078 //
0079 // Once elements are merged left to right in blocks of l_build_buf, then a single left
0080 // to right merge step is performed to achieve merged blocks of size 2K.
0081 // If external memory is available, usual merge is used, swap merging otherwise.
0082 //
0083 // As a last step, if auxiliary memory is available in-place merge is performed.
0084 // until all is merged or auxiliary memory is not large enough.
0085 template<class RandIt, class Compare, class XBuf>
0086 typename iter_size<RandIt>::type  
0087    adaptive_sort_build_blocks
0088       ( RandIt const first
0089       , typename iter_size<RandIt>::type const len
0090       , typename iter_size<RandIt>::type const l_base
0091       , typename iter_size<RandIt>::type const l_build_buf
0092       , XBuf & xbuf
0093       , Compare comp)
0094 {
0095    typedef typename iter_size<RandIt>::type       size_type;
0096    assert(l_build_buf <= len);
0097    assert(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
0098 
0099    //Place the start pointer after the buffer
0100    RandIt first_block = first + l_build_buf;
0101    size_type const elements_in_blocks = size_type(len - l_build_buf);
0102 
0103    //////////////////////////////////
0104    // Start of merge to left step
0105    //////////////////////////////////
0106    size_type l_merged = 0u;
0107 
0108    assert(l_build_buf);
0109    //If there is no enough buffer for the insertion sort step, just avoid the external buffer
0110    size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
0111    kbuf = kbuf < l_base ? 0 : kbuf;
0112 
0113    if(kbuf){
0114       //Backup internal buffer values in external buffer so they can be overwritten
0115       xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
0116       l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
0117 
0118       //Now combine them using the buffer. Elements from buffer can be
0119       //overwritten since they've been saved to xbuf
0120       l_merged = op_merge_left_step_multiple
0121          ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, size_type(kbuf - l_merged), comp, move_op());
0122 
0123       //Restore internal buffer from external buffer unless kbuf was l_build_buf,
0124       //in that case restoration will happen later
0125       if(kbuf != l_build_buf){
0126          boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
0127       }
0128    }
0129    else{
0130       l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
0131       rotate_gcd(first_block-l_merged, first_block, first_block+elements_in_blocks);
0132    }
0133 
0134    //Now combine elements using the buffer. Elements from buffer can't be
0135    //overwritten since xbuf was not big enough, so merge swapping elements.
0136    l_merged = op_merge_left_step_multiple
0137       (first_block-l_merged, elements_in_blocks, l_merged, l_build_buf, size_type(l_build_buf - l_merged), comp, swap_op());
0138 
0139    assert(l_merged == l_build_buf);
0140 
0141    //////////////////////////////////
0142    // Start of merge to right step
0143    //////////////////////////////////
0144 
0145    //If kbuf is l_build_buf then we can merge right without swapping
0146    //Saved data is still in xbuf
0147    if(kbuf && kbuf == l_build_buf){
0148       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
0149       //Restore internal buffer from external buffer if kbuf was l_build_buf.
0150       //as this operation was previously delayed.
0151       boost::move(xbuf.data(), xbuf.data() + kbuf, first);
0152    }
0153    else{
0154       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
0155    }
0156    xbuf.clear();
0157    //2*l_build_buf or total already merged
0158    return min_value<size_type>(elements_in_blocks, size_type(2u*l_build_buf));
0159 }
0160 
0161 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
0162 void adaptive_sort_combine_blocks
0163    ( RandItKeys const keys
0164    , KeyCompare key_comp
0165    , RandIt const first
0166    , typename iter_size<RandIt>::type const len
0167    , typename iter_size<RandIt>::type const l_prev_merged
0168    , typename iter_size<RandIt>::type const l_block
0169    , bool const use_buf
0170    , bool const xbuf_used
0171    , XBuf & xbuf
0172    , Compare comp
0173    , bool merge_left)
0174 {
0175    boost::movelib::ignore(xbuf);
0176    typedef typename iter_size<RandIt>::type         size_type;
0177 
0178    size_type const l_reg_combined   = size_type(2u*l_prev_merged);
0179    size_type l_irreg_combined = 0;
0180    size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
0181    size_type const n_reg_combined = len/l_reg_combined;
0182    RandIt combined_first = first;
0183 
0184    boost::movelib::ignore(l_total_combined);
0185    assert(l_total_combined <= len);
0186 
0187    size_type const max_i = size_type(n_reg_combined + (l_irreg_combined != 0));
0188 
0189    if(merge_left || !use_buf) {
0190       for( size_type combined_i = 0; combined_i != max_i; ) {
0191          //Now merge blocks
0192          bool const is_last = combined_i==n_reg_combined;
0193          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
0194 
0195          range_xbuf<RandIt, size_type, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
0196          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
0197          combine_params( keys, key_comp, l_cur_combined
0198                         , l_prev_merged, l_block, rbuf
0199                         , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
0200          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
0201          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
0202             BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
0203          if(!use_buf){
0204             merge_blocks_bufferless
0205                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
0206          }
0207          else{
0208             merge_blocks_left
0209                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
0210          }
0211          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_L: ", len + l_block);
0212          ++combined_i;
0213          if(combined_i != max_i)
0214             combined_first += l_reg_combined;
0215       }
0216    }
0217    else{
0218       combined_first += size_type(l_reg_combined*(max_i-1u));
0219       for( size_type combined_i = max_i; combined_i; ) {
0220          --combined_i;
0221          bool const is_last = combined_i==n_reg_combined;
0222          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
0223 
0224          RandIt const combined_last(combined_first+l_cur_combined);
0225          range_xbuf<RandIt, size_type, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
0226          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
0227          combine_params( keys, key_comp, l_cur_combined
0228                         , l_prev_merged, l_block, rbuf
0229                         , n_block_a, n_block_b, l_irreg1, l_irreg2);  //Outputs
0230          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
0231          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
0232          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(boost::movelib::is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
0233          merge_blocks_right
0234             (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
0235          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_R: ", len + l_block);
0236          if(combined_i)
0237             combined_first -= l_reg_combined;
0238       }
0239    }
0240 }
0241 
0242 //Returns true if buffer is placed in 
0243 //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
0244 //[buffer,buffer+l_intbuf)
0245 template<class RandIt, class Compare, class XBuf>
0246 bool adaptive_sort_combine_all_blocks
0247    ( RandIt keys
0248    , typename iter_size<RandIt>::type &n_keys
0249    , RandIt const buffer
0250    , typename iter_size<RandIt>::type const l_buf_plus_data
0251    , typename iter_size<RandIt>::type l_merged
0252    , typename iter_size<RandIt>::type &l_intbuf
0253    , XBuf & xbuf
0254    , Compare comp)
0255 {
0256    typedef typename iter_size<RandIt>::type       size_type;
0257 
0258    RandIt const first = buffer + l_intbuf;
0259    size_type const l_data = size_type(l_buf_plus_data - l_intbuf);
0260    size_type const l_unique = size_type(l_intbuf + n_keys);
0261    //Backup data to external buffer once if possible
0262    bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
0263    if(common_xbuf){
0264       xbuf.move_assign(buffer, l_intbuf);
0265    }
0266 
0267    bool prev_merge_left = true;
0268    size_type l_prev_total_combined = l_merged, l_prev_block = 0;
0269    bool prev_use_internal_buf = true;
0270 
0271    for( size_type n = 0; l_data > l_merged
0272       ; l_merged = size_type(2u*l_merged)
0273       , ++n){
0274       //If l_intbuf is non-zero, use that internal buffer.
0275       //    Implies l_block == l_intbuf && use_internal_buf == true
0276       //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
0277       //    Implies l_block == n_keys/2 && use_internal_buf == true
0278       //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
0279       bool use_internal_buf = false;
0280       size_type const l_block = lblock_for_combine(l_intbuf, n_keys, size_type(2*l_merged), use_internal_buf);
0281       assert(!l_intbuf || (l_block == l_intbuf));
0282       assert(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
0283       assert(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
0284       
0285       bool const is_merge_left = (n&1) == 0;
0286       size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
0287       if(n && prev_use_internal_buf && prev_merge_left){
0288          if(is_merge_left || !use_internal_buf){
0289             move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
0290          }
0291          else{
0292             //Put the buffer just after l_total_combined
0293             RandIt const buf_end = first+l_prev_total_combined;
0294             RandIt const buf_beg = buf_end-l_block;
0295             if(l_prev_total_combined > l_total_combined){
0296                size_type const l_diff = size_type(l_prev_total_combined - l_total_combined);
0297                move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
0298             }
0299             else if(l_prev_total_combined < l_total_combined){
0300                size_type const l_diff = size_type(l_total_combined - l_prev_total_combined);
0301                move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
0302             }
0303          }
0304          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After move_data     : ", l_data + l_intbuf);
0305       }
0306 
0307       //Combine to form l_merged*2 segments
0308       if(n_keys){
0309          size_type upper_n_keys_this_iter = size_type(2u*l_merged/l_block);
0310          if(upper_n_keys_this_iter > 256){
0311             adaptive_sort_combine_blocks
0312                ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
0313                , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
0314          }
0315          else{
0316             unsigned char uint_keys[256];
0317             adaptive_sort_combine_blocks
0318                ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
0319                , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
0320             }
0321       }
0322       else{
0323          size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
0324          adaptive_sort_combine_blocks
0325             ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
0326             , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
0327       }
0328 
0329       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? "   After comb blocks L:  " : "   After comb blocks R:  ", l_data + l_intbuf);
0330       prev_merge_left = is_merge_left;
0331       l_prev_total_combined = l_total_combined;
0332       l_prev_block = l_block;
0333       prev_use_internal_buf = use_internal_buf;
0334    }
0335    assert(l_prev_total_combined == l_data);
0336    bool const buffer_right = prev_use_internal_buf && prev_merge_left;
0337 
0338    l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
0339    n_keys = size_type(l_unique - l_intbuf);
0340    //Restore data from to external common buffer if used
0341    if(common_xbuf){
0342       if(buffer_right){
0343          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
0344       }
0345       else{
0346          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
0347       }
0348    }
0349    return buffer_right;
0350 }
0351 
0352 
0353 template<class RandIt, class Compare, class XBuf>
0354 void adaptive_sort_final_merge( bool buffer_right
0355                               , RandIt const first
0356                               , typename iter_size<RandIt>::type const l_intbuf
0357                               , typename iter_size<RandIt>::type const n_keys
0358                               , typename iter_size<RandIt>::type const len
0359                               , XBuf & xbuf
0360                               , Compare comp)
0361 {
0362    //assert(n_keys || xbuf.size() == l_intbuf);
0363    xbuf.clear();
0364 
0365    typedef typename iter_size<RandIt>::type         size_type;
0366 
0367    size_type const n_key_plus_buf = size_type(l_intbuf+n_keys);
0368    if(buffer_right){
0369       //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
0370       stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
0371       stable_merge( first+n_keys, first+len-l_intbuf, first+len,    antistable<Compare>(comp), xbuf);
0372       unstable_sort(first, first+n_keys, comp, xbuf);
0373       stable_merge(first, first+n_keys, first+len, comp, xbuf);
0374    }
0375    else{
0376       //Use stable sort as some buffer elements might not be unique (see non_unique_buf)
0377       stable_sort(first, first+n_key_plus_buf, comp, xbuf);
0378       if(xbuf.capacity() >= n_key_plus_buf){
0379          buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
0380       }
0381       else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
0382          stable_merge( first+n_keys, first+n_key_plus_buf
0383                      , first+len, comp, xbuf);
0384          stable_merge(first, first+n_keys, first+len, comp, xbuf);
0385       }
0386       else{
0387          stable_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
0388       }
0389    }
0390    BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After final_merge   : ", len);
0391 }
0392 
0393 template<class RandIt, class Compare, class Unsigned, class XBuf>
0394 bool adaptive_sort_build_params
0395    (RandIt first, Unsigned const len, Compare comp
0396    , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
0397    , XBuf & xbuf
0398    )
0399 {
0400    typedef typename iter_size<RandIt>::type         size_type;
0401 
0402    //Calculate ideal parameters and try to collect needed unique keys
0403    l_base = 0u;
0404 
0405    //Try to find a value near sqrt(len) that is 2^N*l_base where
0406    //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
0407    //as build_blocks merges to the left iteratively duplicating the
0408    //merged size and all the buffer must be used just before the final
0409    //merge to right step. This guarantees "build_blocks" produces 
0410    //segments of size l_build_buf*2, maximizing the classic merge phase.
0411    l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
0412 
0413    //The internal buffer can be expanded if there is enough external memory
0414    while(xbuf.capacity() >= l_intbuf*2){
0415       l_intbuf = size_type(2u*l_intbuf);
0416    }
0417 
0418    //This is the minimum number of keys to implement the ideal algorithm
0419    //
0420    //l_intbuf is used as buffer plus the key count
0421    size_type n_min_ideal_keys = size_type(l_intbuf-1u);
0422    while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
0423       --n_min_ideal_keys;
0424    }
0425    ++n_min_ideal_keys;
0426    assert(n_min_ideal_keys <= l_intbuf);
0427 
0428    if(xbuf.template supports_aligned_trailing<size_type>
0429          (l_intbuf, size_type((size_type(len-l_intbuf)-1u)/l_intbuf+1u))){
0430       n_keys = 0u;
0431       l_build_buf = l_intbuf;
0432    }
0433    else{
0434       //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
0435       //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
0436       //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
0437       //
0438       //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
0439       //(to be used for keys in combine_all_blocks) as the whole l_build_buf
0440       //will be backuped in the buffer during build_blocks.
0441       bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
0442       size_type const to_collect = non_unique_buf ? n_min_ideal_keys : size_type(l_intbuf*2u);
0443       size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
0444 
0445       //If available memory is 2*sqrt(l), then for "build_params" 
0446       //the situation is the same as if 2*l_intbuf were collected.
0447       if(non_unique_buf && collected == n_min_ideal_keys){
0448          l_build_buf = l_intbuf;
0449          n_keys = n_min_ideal_keys;
0450       }
0451       else if(collected == 2*l_intbuf){
0452          //l_intbuf*2 elements found. Use all of them in the build phase 
0453          l_build_buf = size_type(l_intbuf*2);
0454          n_keys = l_intbuf;
0455       }
0456       else if(collected >= (n_min_ideal_keys+l_intbuf)){ 
0457          l_build_buf = l_intbuf;
0458          n_keys = size_type(collected - l_intbuf);
0459       }
0460       //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
0461       //is possible (due to very low unique keys), then go to a slow sort based on rotations.
0462       else{
0463          assert(collected < (n_min_ideal_keys+l_intbuf));
0464          if(collected < 4){  //No combination possible with less that 4 keys
0465             return false;
0466          }
0467          n_keys = l_intbuf;
0468          while(n_keys & (n_keys-1u)){
0469             n_keys &= size_type(n_keys-1u);  // make it power or 2
0470          }
0471          while(n_keys > collected){
0472             n_keys/=2;
0473          }
0474          //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
0475          l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
0476          l_intbuf = 0;
0477          l_build_buf = n_keys;
0478       }
0479       assert((n_keys+l_intbuf) >= l_build_buf);
0480    }
0481 
0482    return true;
0483 }
0484 
0485 // Main explanation of the sort algorithm.
0486 //
0487 // csqrtlen = ceil(sqrt(len));
0488 //
0489 // * First, 2*csqrtlen unique elements elements are extracted from elements to be
0490 //   sorted and placed in the beginning of the range.
0491 //
0492 // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
0493 //   will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
0494 //   are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
0495 //   2*csqrtlen unique elements are again the leading elements of the whole range.
0496 //
0497 // * Step "combine_blocks": pairs of previously formed blocks are merged with a different
0498 //   ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
0499 //   "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
0500 //   elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
0501 //
0502 //   In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
0503 //   know if elements belong to the first or second block to be merged and another 
0504 //   leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
0505 //
0506 //   Iteratively until all trailing (len-2*csqrtlen) elements are merged:
0507 //      Iteratively for each pair of previously merged block:
0508 //         * Blocks are divided groups of csqrtlen elements and
0509 //           2*merged_block/csqrtlen keys are sorted to be used as markers
0510 //         * Groups are selection-sorted by first or last element (depending whether they are going
0511 //           to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
0512 //         * Elements of each block pair are merged using the csqrtlen buffer taking into account
0513 //           if they belong to the first half or second half (marked by the key).
0514 //
0515 // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
0516 //   rotations with the rest of sorted elements in the "combine_blocks" step.
0517 //
0518 // Corner cases:
0519 //
0520 // * If no 2*csqrtlen elements can be extracted:
0521 //
0522 //    * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
0523 //      as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
0524 //      means that an additional "combine_blocks" step will be needed to merge all elements.
0525 //    
0526 //    * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
0527 //      then reduces the number of elements used as buffer and keys in the "build_blocks"
0528 //      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
0529 //      then uses a rotation based smart merge.
0530 //
0531 //    * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
0532 //
0533 // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
0534 //
0535 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
0536 //   then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
0537 //   keys to combine blocks.
0538 //
0539 // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
0540 //   using classic merge and "combine_blocks" will use bigger blocks when merging.
0541 template<class RandIt, class Compare, class XBuf>
0542 void adaptive_sort_impl
0543    ( RandIt first
0544    , typename iter_size<RandIt>::type const len
0545    , Compare comp
0546    , XBuf & xbuf
0547    )
0548 {
0549    typedef typename iter_size<RandIt>::type         size_type;
0550 
0551    //Small sorts go directly to insertion sort
0552    if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
0553       insertion_sort(first, first + len, comp);
0554    }
0555    else if((len-len/2) <= xbuf.capacity()){
0556       merge_sort(first, first+len, comp, xbuf.data());
0557    }
0558    else{
0559       //Make sure it is at least four
0560       BOOST_MOVE_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
0561 
0562       size_type l_base = 0;
0563       size_type l_intbuf = 0;
0564       size_type n_keys = 0;
0565       size_type l_build_buf = 0;
0566 
0567       //Calculate and extract needed unique elements. If a minimum is not achieved
0568       //fallback to a slow stable sort
0569       if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
0570          stable_sort(first, first+len, comp, xbuf);
0571       }
0572       else{
0573          assert(l_build_buf);
0574          //Otherwise, continue the adaptive_sort
0575          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   After collect_unique: ", len);
0576          size_type const n_key_plus_buf = size_type(l_intbuf+n_keys);
0577          //l_build_buf is always power of two if l_intbuf is zero
0578          assert(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
0579 
0580          //Classic merge sort until internal buffer and xbuf are exhausted
0581          size_type const l_merged = adaptive_sort_build_blocks
0582             ( first + n_key_plus_buf-l_build_buf
0583             , size_type(len-n_key_plus_buf+l_build_buf)
0584             , l_base, l_build_buf, xbuf, comp);
0585          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After build_blocks:   ", len);
0586 
0587          //Non-trivial merge
0588          bool const buffer_right = adaptive_sort_combine_all_blocks
0589             (first, n_keys, first+n_keys, size_type(len-n_keys), l_merged, l_intbuf, xbuf, comp);
0590 
0591          //Sort keys and buffer and merge the whole sequence
0592          adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
0593       }
0594    }
0595 }
0596 
0597 }  //namespace detail_adaptive {
0598 
0599 ///@endcond
0600 
0601 //! <b>Effects</b>: Sorts the elements in the range [first, last) in ascending order according
0602 //!   to comparison functor "comp". The sort is stable (order of equal elements
0603 //!   is guaranteed to be preserved). Performance is improved if additional raw storage is
0604 //!   provided.
0605 //!
0606 //! <b>Requires</b>:
0607 //!   - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
0608 //!   - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
0609 //!
0610 //! <b>Parameters</b>:
0611 //!   - first, last: the range of elements to sort
0612 //!   - comp: comparison function object which returns true if the first argument is is ordered before the second.
0613 //!   - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
0614 //!      elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
0615 //!      is ceil(std::distance(first, last)/2).
0616 //!
0617 //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
0618 //!   of dereferenced RandIt throws.
0619 //!
0620 //! <b>Complexity</b>: Always K x O(Nxlog(N)) comparisons and move assignments/constructors/swaps.
0621 //!   Comparisons are close to minimum even with no additional memory. Constant factor for data movement is minimized
0622 //!   when uninitialized_len is ceil(std::distance(first, last)/2). Pretty good enough performance is achieved when
0623 //!   ceil(sqrt(std::distance(first, last)))*2.
0624 //!
0625 //! <b>Caution</b>: Experimental implementation, not production-ready.
0626 template<class RandIt, class RandRawIt, class Compare>
0627 void adaptive_sort( RandIt first, RandIt last, Compare comp
0628                , RandRawIt uninitialized
0629                , typename iter_size<RandIt>::type uninitialized_len)
0630 {
0631    typedef typename iter_size<RandIt>::type  size_type;
0632    typedef typename iterator_traits<RandIt>::value_type value_type;
0633 
0634    ::boost::movelib::adaptive_xbuf<value_type, RandRawIt, size_type> xbuf(uninitialized, uninitialized_len);
0635    ::boost::movelib::detail_adaptive::adaptive_sort_impl(first, size_type(last - first), comp, xbuf);
0636 }
0637 
0638 template<class RandIt, class Compare>
0639 void adaptive_sort( RandIt first, RandIt last, Compare comp)
0640 {
0641    typedef typename iterator_traits<RandIt>::value_type value_type;
0642    adaptive_sort(first, last, comp, (value_type*)0, 0u);
0643 }
0644 
0645 }  //namespace movelib {
0646 }  //namespace boost {
0647 
0648 #include <boost/move/detail/config_end.hpp>
0649 
0650 #if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600))
0651 #pragma GCC diagnostic pop
0652 #endif
0653 
0654 #endif   //#define BOOST_MOVE_ADAPTIVE_SORT_HPP