Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:40:51

0001 //////////////////////////////////////////////////////////////////////////////
0002 //
0003 // (C) Copyright Ion Gaztanaga 2015-2016.
0004 // Distributed under the Boost Software License, Version 1.0.
0005 // (See accompanying file LICENSE_1_0.txt or copy at
0006 // http://www.boost.org/LICENSE_1_0.txt)
0007 //
0008 // See http://www.boost.org/libs/move for documentation.
0009 //
0010 //////////////////////////////////////////////////////////////////////////////
0011 
0012 #ifndef BOOST_MOVE_ADAPTIVE_MERGE_HPP
0013 #define BOOST_MOVE_ADAPTIVE_MERGE_HPP
0014 
0015 #include <boost/move/detail/config_begin.hpp>
0016 #include <boost/move/algo/detail/adaptive_sort_merge.hpp>
0017 #include <cassert>
0018 
0019 #if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600))
0020 #pragma GCC diagnostic push
0021 #pragma GCC diagnostic ignored "-Wsign-conversion"
0022 #endif
0023 
0024 namespace boost {
0025 namespace movelib {
0026 
0027 ///@cond
0028 namespace detail_adaptive {
0029 
0030 template<class RandIt, class Compare, class XBuf>
0031 inline void adaptive_merge_combine_blocks( RandIt first
0032                                       , typename iter_size<RandIt>::type len1
0033                                       , typename iter_size<RandIt>::type len2
0034                                       , typename iter_size<RandIt>::type collected
0035                                       , typename iter_size<RandIt>::type n_keys
0036                                       , typename iter_size<RandIt>::type l_block
0037                                       , bool use_internal_buf
0038                                       , bool xbuf_used
0039                                       , Compare comp
0040                                       , XBuf & xbuf
0041                                       )
0042 {
0043    typedef typename iter_size<RandIt>::type       size_type;
0044 
0045    size_type const len = size_type(len1+len2);
0046    size_type const l_combine  = size_type(len-collected);
0047    size_type const l_combine1 = size_type(len1-collected);
0048 
0049     if(n_keys){
0050       RandIt const first_data = first+collected;
0051       RandIt const keys = first;
0052       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
0053       if(xbuf_used){
0054          if(xbuf.size() < l_block){
0055             xbuf.initialize_until(l_block, *first);
0056          }
0057          assert(xbuf.size() >= l_block);
0058          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
0059          combine_params( keys, comp, l_combine
0060                            , l_combine1, l_block, xbuf
0061                            , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
0062          op_merge_blocks_with_buf
0063             (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
0064          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg xbf: ", len);
0065       }
0066       else{
0067          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
0068          combine_params( keys, comp, l_combine
0069                            , l_combine1, l_block, xbuf
0070                            , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
0071          if(use_internal_buf){
0072             op_merge_blocks_with_buf
0073                ( keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b
0074                , l_irreg2, comp, swap_op(), first_data-l_block);
0075             BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A mrg buf: ", len);
0076          }
0077          else{
0078             merge_blocks_bufferless
0079                (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
0080             BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg nbf: ", len);
0081          }
0082       }
0083    }
0084    else{
0085       xbuf.shrink_to_fit(l_block);
0086       if(xbuf.size() < l_block){
0087          xbuf.initialize_until(l_block, *first);
0088       }
0089       size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
0090       size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
0091       combine_params( uint_keys, less(), l_combine
0092                      , l_combine1, l_block, xbuf
0093                      , n_block_a, n_block_b, l_irreg1, l_irreg2, true);   //Outputs
0094       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
0095       assert(xbuf.size() >= l_block);
0096       op_merge_blocks_with_buf
0097          (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), xbuf.data());
0098       xbuf.clear();
0099       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg buf: ", len);
0100    }
0101 }
0102 
0103 template<class RandIt, class Compare, class XBuf>
0104 inline void adaptive_merge_final_merge( RandIt first
0105                                       , typename iter_size<RandIt>::type len1
0106                                       , typename iter_size<RandIt>::type len2
0107                                       , typename iter_size<RandIt>::type collected
0108                                       , typename iter_size<RandIt>::type l_intbuf
0109                                       , typename iter_size<RandIt>::type //l_block
0110                                       , bool //use_internal_buf
0111                                       , bool xbuf_used
0112                                       , Compare comp
0113                                       , XBuf & xbuf
0114                                       )
0115 {
0116    typedef typename iter_size<RandIt>::type       size_type;
0117 
0118    size_type n_keys = size_type(collected-l_intbuf);
0119    size_type len = size_type(len1+len2);
0120    if (!xbuf_used || n_keys) {
0121       xbuf.clear();
0122       const size_type middle = xbuf_used && n_keys ? n_keys: collected;
0123       unstable_sort(first, first + middle, comp, xbuf);
0124       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A k/b srt: ", len);
0125       stable_merge(first, first + middle, first + len, comp, xbuf);
0126    }
0127    BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A fin mrg: ", len);
0128 }
0129 
0130 template<class SizeType>
0131 inline static SizeType adaptive_merge_n_keys_without_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
0132 {
0133    typedef SizeType size_type;
0134    //This is the minimum number of keys to implement the ideal algorithm
0135    size_type n_keys = size_type(len1/l_block + len2/l_block);
0136    const size_type second_half_blocks = size_type(len2/l_block);
0137    const size_type first_half_aux = size_type(len1 - l_intbuf);
0138    while(n_keys >= ((first_half_aux-n_keys)/l_block + second_half_blocks)){
0139       --n_keys;
0140    }
0141    ++n_keys;
0142    return n_keys;
0143 }
0144 
0145 template<class SizeType>
0146 inline static SizeType adaptive_merge_n_keys_with_external_keys(SizeType l_block, SizeType len1, SizeType len2, SizeType l_intbuf)
0147 {
0148    typedef SizeType size_type;
0149    //This is the minimum number of keys to implement the ideal algorithm
0150    size_type n_keys = size_type((len1-l_intbuf)/l_block + len2/l_block);
0151    return n_keys;
0152 }
0153 
0154 template<class SizeType, class Xbuf>
0155 inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
0156 {
0157    typedef SizeType size_type;
0158    size_type l_block = rl_block;
0159    size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
0160 
0161    if (xbuf.capacity() > l_block){
0162       l_block = xbuf.capacity();
0163    }
0164 
0165    //This is the minimum number of keys to implement the ideal algorithm
0166    size_type n_keys = adaptive_merge_n_keys_without_external_keys(l_block, len1, len2, l_intbuf);
0167    assert(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
0168 
0169    if(xbuf.template supports_aligned_trailing<size_type>
0170       ( l_block
0171       , adaptive_merge_n_keys_with_external_keys(l_block, len1, len2, l_intbuf)))
0172    {
0173       n_keys = 0u;
0174    }
0175    l_intbuf_inout = l_intbuf;
0176    rl_block = l_block;
0177    return n_keys;
0178 }
0179 
0180 // Main explanation of the merge algorithm.
0181 //
0182 // csqrtlen = ceil(sqrt(len));
0183 //
0184 // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
0185 //   unique elements are extracted from elements to be sorted and placed in the beginning of the range.
0186 //
0187 // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
0188 //   are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
0189 //
0190 //   Explanation of the "combine_blocks" step:
0191 //
0192 //         * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
0193 //           Remaining elements that can't form a group are grouped in front of those elements.
0194 //         * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
0195 //           Remaining elements that can't form a group are grouped in the back of those elements.
0196 //         * In parallel the following two steps are performed:
0197 //             *  Groups are selection-sorted by first or last element (depending whether they are going
0198 //                to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
0199 //             * Elements of each block pair are merged using the csqrtlen buffer taking into account
0200 //                if they belong to the first half or second half (marked by the key).
0201 //
0202 // * In the final merge step leading "to_collect" elements are merged with rotations
0203 //   with the rest of merged elements in the "combine_blocks" step.
0204 //
0205 // Corner cases:
0206 //
0207 // * If no "to_collect" elements can be extracted:
0208 //
0209 //    * If more than a minimum number of elements is extracted
0210 //      then reduces the number of elements used as buffer and keys in the
0211 //      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
0212 //      then uses a rotation based smart merge.
0213 //
0214 //    * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
0215 //
0216 // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
0217 //
0218 // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
0219 //
0220 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
0221 //   then no csqrtlen need to be extracted and "combine_blocks" will use integral
0222 //   keys to combine blocks.
0223 template<class RandIt, class Compare, class XBuf>
0224 void adaptive_merge_impl
0225    ( RandIt first
0226    , typename iter_size<RandIt>::type len1
0227    , typename iter_size<RandIt>::type len2
0228    , Compare comp
0229    , XBuf & xbuf
0230    )
0231 {
0232    typedef typename iter_size<RandIt>::type size_type;
0233 
0234    if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
0235       buffered_merge( first, first+len1
0236                     , first + len1+len2, comp, xbuf);
0237    }
0238    else{
0239       const size_type len = size_type(len1+len2);
0240       //Calculate ideal parameters and try to collect needed unique keys
0241       size_type l_block = size_type(ceil_sqrt(len));
0242 
0243       //One range is not big enough to extract keys and the internal buffer so a
0244       //rotation-based based merge will do just fine
0245       if(len1 <= l_block*2 || len2 <= l_block*2){
0246          merge_bufferless(first, first+len1, first+len1+len2, comp);
0247          return;
0248       }
0249 
0250       //Detail the number of keys and internal buffer. If xbuf has enough memory, no
0251       //internal buffer is needed so l_intbuf will remain 0.
0252       size_type l_intbuf = 0;
0253       size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
0254       size_type const to_collect = size_type(l_intbuf+n_keys);
0255       //Try to extract needed unique values from the first range
0256       size_type const collected  = collect_unique(first, first+len1, to_collect, comp, xbuf);
0257       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   A collect: ", len);
0258 
0259       //Not the minimum number of keys is not available on the first range, so fallback to rotations
0260       if(collected != to_collect && collected < 4){
0261          merge_bufferless(first, first+collected, first+len1, comp);
0262          merge_bufferless(first, first + len1, first + len1 + len2, comp);
0263          return;
0264       }
0265 
0266       //If not enough keys but more than minimum, adjust the internal buffer and key count
0267       bool use_internal_buf = collected == to_collect;
0268       if (!use_internal_buf){
0269          l_intbuf = 0u;
0270          n_keys = collected;
0271          l_block  = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
0272          //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
0273          l_intbuf = use_internal_buf ? l_block : 0u;
0274       }
0275 
0276       bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
0277       //Merge trailing elements using smart merges
0278       adaptive_merge_combine_blocks(first, len1, len2, collected,   n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
0279       //Merge buffer and keys with the rest of the values
0280       adaptive_merge_final_merge   (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
0281    }
0282 }
0283 
0284 }  //namespace detail_adaptive {
0285 
0286 ///@endcond
0287 
0288 //! <b>Effects</b>: Merges two consecutive sorted ranges [first, middle) and [middle, last)
0289 //!   into one sorted range [first, last) according to the given comparison function comp.
0290 //!   The algorithm is stable (if there are equivalent elements in the original two ranges,
0291 //!   the elements from the first range (preserving their original order) precede the elements
0292 //!   from the second range (preserving their original order).
0293 //!
0294 //! <b>Requires</b>:
0295 //!   - RandIt must meet the requirements of ValueSwappable and RandomAccessIterator.
0296 //!   - The type of dereferenced RandIt must meet the requirements of MoveAssignable and MoveConstructible.
0297 //!
0298 //! <b>Parameters</b>:
0299 //!   - first: the beginning of the first sorted range. 
0300 //!   - middle: the end of the first sorted range and the beginning of the second
0301 //!   - last: the end of the second sorted range
0302 //!   - comp: comparison function object which returns true if the first argument is is ordered before the second.
0303 //!   - uninitialized, uninitialized_len: raw storage starting on "uninitialized", able to hold "uninitialized_len"
0304 //!      elements of type iterator_traits<RandIt>::value_type. Maximum performance is achieved when uninitialized_len
0305 //!      is min(std::distance(first, middle), std::distance(middle, last)).
0306 //!
0307 //! <b>Throws</b>: If comp throws or the move constructor, move assignment or swap of the type
0308 //!   of dereferenced RandIt throws.
0309 //!
0310 //! <b>Complexity</b>: Always K x O(N) comparisons and move assignments/constructors/swaps.
0311 //!   Constant factor for comparisons and data movement is minimized when uninitialized_len
0312 //!   is min(std::distance(first, middle), std::distance(middle, last)).
0313 //!   Pretty good enough performance is achieved when uninitialized_len is
0314 //!   ceil(sqrt(std::distance(first, last)))*2.
0315 //!
0316 //! <b>Caution</b>: Experimental implementation, not production-ready.
0317 template<class RandIt, class Compare>
0318 void adaptive_merge( RandIt first, RandIt middle, RandIt last, Compare comp
0319                 , typename iterator_traits<RandIt>::value_type* uninitialized = 0
0320                 , typename iter_size<RandIt>::type uninitialized_len = 0)
0321 {
0322    typedef typename iter_size<RandIt>::type  size_type;
0323    typedef typename iterator_traits<RandIt>::value_type value_type;
0324 
0325    if (first == middle || middle == last){
0326       return;
0327    }
0328 
0329    //Reduce ranges to merge if possible
0330    do {
0331       if (comp(*middle, *first)){
0332          break;
0333       }
0334       ++first;
0335       if (first == middle)
0336          return;
0337    } while(1);
0338 
0339    RandIt first_high(middle);
0340    --first_high;
0341    do {
0342       --last;
0343       if (comp(*last, *first_high)){
0344          ++last;
0345          break;
0346       }
0347       if (last == middle)
0348          return;
0349    } while(1);
0350 
0351    ::boost::movelib::adaptive_xbuf<value_type, value_type*, size_type> xbuf(uninitialized, size_type(uninitialized_len));
0352    ::boost::movelib::detail_adaptive::adaptive_merge_impl(first, size_type(middle - first), size_type(last - middle), comp, xbuf);
0353 }
0354 
0355 }  //namespace movelib {
0356 }  //namespace boost {
0357 
0358 #if defined(BOOST_CLANG) || (defined(BOOST_GCC) && (BOOST_GCC >= 40600))
0359 #pragma GCC diagnostic pop
0360 #endif
0361 
0362 #include <boost/move/detail/config_end.hpp>
0363 
0364 #endif   //#define BOOST_MOVE_ADAPTIVE_MERGE_HPP