Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:51:47

0001 //----------------------------------------------------------------------------
0002 /// @file sample_sort.hpp
0003 /// @brief contains the class sample_sort
0004 ///
0005 /// @author Copyright (c) 2016 Francisco Jose Tapia (fjtapia@gmail.com )\n
0006 ///         Distributed under the Boost Software License, Version 1.0.\n
0007 ///         ( See accompanying file LICENSE_1_0.txt or copy at
0008 ///           http://www.boost.org/LICENSE_1_0.txt  )
0009 /// @version 0.1
0010 ///
0011 /// @remarks
0012 //-----------------------------------------------------------------------------
0013 #ifndef __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
0014 #define __BOOST_SORT_PARALLEL_DETAIL_SAMPLE_SORT_HPP
0015 
0016 #include <ciso646>
0017 #include <functional>
0018 #include <future>
0019 #include <iterator>
0020 #include <memory>
0021 #include <type_traits>
0022 #include <vector>
0023 
0024 #include <algorithm>
0025 #include <boost/sort/spinsort/spinsort.hpp>
0026 #include <boost/sort/common/indirect.hpp>
0027 #include <boost/sort/common/util/atomic.hpp>
0028 #include <boost/sort/common/merge_four.hpp>
0029 #include <boost/sort/common/merge_vector.hpp>
0030 #include <boost/sort/common/range.hpp>
0031 
0032 namespace boost
0033 {
0034 namespace sort
0035 {
0036 namespace sample_detail
0037 {
0038 //---------------------------------------------------------------------------
0039 //                    USING SENTENCES
0040 //---------------------------------------------------------------------------
0041 namespace bsc = boost::sort::common;
0042 namespace bss = boost::sort::spin_detail;
0043 namespace bscu = boost::sort::common::util;
0044 using bsc::range;
0045 using bscu::atomic_add;
0046 using bsc::merge_vector4;
0047 using bsc::uninit_merge_level4;
0048 using bsc::less_ptr_no_null;
0049 
0050 //
0051 ///---------------------------------------------------------------------------
0052 /// @struct sample_sort
0053 /// @brief This a structure for to implement a sample sort, exception
0054 ///        safe
0055 /// @tparam
0056 /// @remarks
0057 //----------------------------------------------------------------------------
0058 template<class Iter_t, class Compare>
0059 struct sample_sort
0060 {
0061     //------------------------------------------------------------------------
0062     //                     DEFINITIONS
0063     //------------------------------------------------------------------------
0064     typedef value_iter<Iter_t> value_t;
0065     typedef range<Iter_t> range_it;
0066     typedef range<value_t *> range_buf;
0067     typedef sample_sort<Iter_t, Compare> this_t;
0068 
0069     //------------------------------------------------------------------------
0070     //                VARIABLES AND CONSTANTS
0071     //------------------------------------------------------------------------
0072     // minimun numbers of elements for to be sortd in parallel mode
0073     static const uint32_t thread_min = (1 << 16);
0074 
0075     // Number of threads to use in the algorithm
0076     // Number of intervals for to do the internal division of the data
0077     uint32_t nthread, ninterval;
0078 
0079     // Bool variables indicating if the auxiliary memory is constructed
0080     // and indicating in the auxiliary memory had been obtained inside the
0081     /// algorithm or had been received as a parameter
0082     bool construct = false, owner = false;
0083 
0084     // Comparison object for to compare two elements
0085     Compare comp;
0086 
0087     // Range with all the elements to sort
0088     range_it global_range;
0089 
0090     // range with the auxiliary memory
0091     range_buf global_buf;
0092 
0093     // vector of futures
0094     std::vector<std::future<void>> vfuture;
0095 
0096     // vector of vectors which contains the ranges to merge obtained in the
0097     // subdivision
0098     std::vector<std::vector<range_it>> vv_range_it;
0099 
0100     // each vector of ranges of the vv_range_it, need their corresponding buffer
0101     // for to do the merge
0102     std::vector<std::vector<range_buf>> vv_range_buf;
0103 
0104     // Initial vector of ranges
0105     std::vector<range_it> vrange_it_ini;
0106 
0107     // Initial vector of buffers
0108     std::vector<range_buf> vrange_buf_ini;
0109 
0110     // atomic counter for to know when are finished the function_t created
0111     // inside a function
0112     std::atomic<uint32_t> njob;
0113 
0114     // Indicate if an error in the algorithm for to undo all
0115     bool error;
0116 
0117     //------------------------------------------------------------------------
0118     //                       FUNCTIONS OF THE STRUCT
0119     //------------------------------------------------------------------------
0120     void initial_configuration(void);
0121 
0122     sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
0123                  value_t *paux, size_t naux);
0124 
0125     sample_sort(Iter_t first, Iter_t last)
0126     : sample_sort (first, last, Compare(), std::thread::hardware_concurrency(),
0127                    nullptr, 0) { };
0128 
0129     sample_sort(Iter_t first, Iter_t last, Compare cmp)
0130     : sample_sort(first, last, cmp, std::thread::hardware_concurrency(),
0131                   nullptr, 0) { };
0132 
0133     sample_sort(Iter_t first, Iter_t last, uint32_t num_thread)
0134     : sample_sort(first, last, Compare(), num_thread, nullptr, 0) { };
0135 
0136     sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread)
0137     : sample_sort(first, last, cmp, num_thread, nullptr, 0) { };
0138 
0139     sample_sort(Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
0140                 range_buf range_buf_initial)
0141     : sample_sort(first, last, cmp, num_thread,
0142                   range_buf_initial.first, range_buf_initial.size()) { };
0143 
0144     void destroy_all(void);
0145     //
0146     //-----------------------------------------------------------------------------
0147     //  function :~sample_sort
0148     /// @brief destructor of the class. The utility is to destroy the temporary
0149     ///        buffer used in the sorting process
0150     //-----------------------------------------------------------------------------
0151     ~sample_sort(void) { destroy_all(); };
0152     //
0153     //-----------------------------------------------------------------------
0154     //  function : execute first
0155     /// @brief this a function to assign to each thread in the first merge
0156     //-----------------------------------------------------------------------
0157     void execute_first(void)
0158     {
0159         uint32_t job = 0;
0160         while ((job = atomic_add(njob, 1)) < ninterval)
0161         {
0162             uninit_merge_level4(vrange_buf_ini[job], vv_range_it[job],
0163                             vv_range_buf[job], comp);
0164         };
0165     };
0166     //
0167     //-----------------------------------------------------------------------
0168     //  function : execute
0169     /// @brief this is a function to assignt each thread the final merge
0170     //-----------------------------------------------------------------------
0171     void execute(void)
0172     {
0173         uint32_t job = 0;
0174         while ((job = atomic_add(njob, 1)) < ninterval)
0175         {
0176             merge_vector4(vrange_buf_ini[job], vrange_it_ini[job],
0177                             vv_range_buf[job], vv_range_it[job], comp);
0178         };
0179     };
0180     //
0181     //-----------------------------------------------------------------------
0182     //  function : first merge
0183     /// @brief Implement the merge of the initially sparse ranges
0184     //-----------------------------------------------------------------------
0185     void first_merge(void)
0186     { //---------------------------------- begin --------------------------
0187         njob = 0;
0188 
0189         for (uint32_t i = 0; i < nthread; ++i)
0190         {
0191             vfuture[i] = std::async(std::launch::async, &this_t::execute_first,
0192                             this);
0193         };
0194         for (uint32_t i = 0; i < nthread; ++i)
0195             vfuture[i].get();
0196     };
0197     //
0198     //-----------------------------------------------------------------------
0199     //  function : final merge
0200     /// @brief Implement the final merge of the ranges
0201     //-----------------------------------------------------------------------
0202     void final_merge(void)
0203     { //---------------------------------- begin --------------------------
0204         njob = 0;
0205 
0206         for (uint32_t i = 0; i < nthread; ++i)
0207         {
0208             vfuture[i] = std::async(std::launch::async, &this_t::execute, this);
0209         };
0210         for (uint32_t i = 0; i < nthread; ++i)
0211             vfuture[i].get();
0212     };
0213     //----------------------------------------------------------------------------
0214 };
0215 //                    End class sample_sort
0216 //----------------------------------------------------------------------------
0217 //
0218 //############################################################################
0219 //                                                                          ##
0220 //              N O N    I N L I N E      F U N C T I O N S                 ##
0221 //                                                                          ##
0222 //                                                                          ##
0223 //############################################################################
0224 //
0225 //-----------------------------------------------------------------------------
0226 //  function : sample_sort
0227 /// @brief constructor of the class
0228 ///
0229 /// @param first : iterator to the first element of the range to sort
0230 /// @param last : iterator after the last element to the range to sort
0231 /// @param cmp : object for to compare two elements pointed by Iter_t iterators
0232 /// @param num_thread : Number of threads to use in the process. When this value
0233 ///                     is lower than 2, the sorting is done with 1 thread
0234 /// @param paux : pointer to the auxiliary memory. If nullptr, the memory is
0235 ///               created inside the class
0236 /// @param naux : number of elements of the memory pointed by paux
0237 //-----------------------------------------------------------------------------
0238 template<class Iter_t, typename Compare>
0239 sample_sort<Iter_t, Compare>
0240 ::sample_sort (Iter_t first, Iter_t last, Compare cmp, uint32_t num_thread,
0241                value_t *paux, size_t naux)
0242 : nthread(num_thread), owner(false), comp(cmp), global_range(first, last),
0243   global_buf(nullptr, nullptr), error(false)
0244 {
0245     assert((last - first) >= 0);
0246     size_t nelem = size_t(last - first);
0247     construct = false;
0248     njob = 0;
0249     vfuture.resize(nthread);
0250 
0251     // Adjust when have many threads and only a few elements
0252     while (nelem > thread_min and (nthread * nthread) > (nelem >> 3))
0253     {
0254         nthread /= 2;
0255     };
0256     ninterval = (nthread << 3);
0257 
0258     if (nthread < 2 or nelem <= (thread_min))
0259     {
0260         bss::spinsort<Iter_t, Compare>(first, last, comp);
0261         return;
0262     };
0263 
0264     //------------------- check if sort --------------------------------------
0265     bool sw = true;
0266     for (Iter_t it1 = first, it2 = first + 1;
0267                     it2 != last and (sw = not comp(*it2, *it1)); it1 = it2++);
0268     if (sw) return;
0269 
0270     //------------------- check if reverse sort ---------------------------
0271     sw = true;
0272     for (Iter_t it1 = first, it2 = first + 1;
0273                     it2 != last and (sw = comp(*it2, *it1)); it1 = it2++);
0274     if (sw)
0275     {
0276     using std::swap;
0277         size_t nelem2 = nelem >> 1;
0278         Iter_t it1 = first, it2 = last - 1;
0279         for (size_t i = 0; i < nelem2; ++i)
0280             swap(*(it1++), *(it2--));
0281         return;
0282     };
0283 
0284     if (paux != nullptr)
0285     {
0286         assert(naux != 0);
0287         global_buf.first = paux;
0288         global_buf.last = paux + naux;
0289         owner = false;
0290     }
0291     else
0292     {
0293         value_t * ptr = reinterpret_cast <value_t*>
0294                         (std::malloc (nelem * sizeof(value_t)));
0295 
0296         if (ptr == nullptr) throw std::bad_alloc();
0297         owner = true;
0298         global_buf = range_buf(ptr, ptr + nelem);
0299     };
0300     //------------------------------------------------------------------------
0301     //                    PROCESS
0302     //------------------------------------------------------------------------
0303     try
0304     {
0305         initial_configuration();
0306     } catch (std::bad_alloc &)
0307     {
0308         error = true;
0309     };
0310     if (not error)
0311     {
0312         first_merge();
0313         construct = true;
0314         final_merge();
0315     };
0316     if (error)
0317     {
0318         destroy_all();
0319         throw std::bad_alloc();
0320     };
0321 }
0322 ;
0323 //
0324 //-----------------------------------------------------------------------------
0325 //  function : destroy_all
0326 /// @brief destructor of the class. The utility is to destroy the temporary
0327 ///        buffer used in the sorting process
0328 //-----------------------------------------------------------------------------
0329 template<class Iter_t, typename Compare>
0330 void sample_sort<Iter_t, Compare>::destroy_all(void)
0331 {
0332     if (construct)
0333     {
0334         destroy(global_buf);
0335         construct = false;
0336     }
0337     if (global_buf.first != nullptr and owner)
0338         std::free(global_buf.first);
0339 }
0340 //
0341 //-----------------------------------------------------------------------------
0342 //  function : initial_configuration
0343 /// @brief Create the internal data structures, and obtain the inital set of
0344 ///        ranges to merge
0345 //-----------------------------------------------------------------------------
0346 template<class Iter_t, typename Compare>
0347 void sample_sort<Iter_t, Compare>::initial_configuration(void)
0348 {
0349     std::vector<range_it> vmem_thread;
0350     std::vector<range_buf> vbuf_thread;
0351     size_t nelem = global_range.size();
0352 
0353     //------------------------------------------------------------------------
0354     size_t cupo = nelem / nthread;
0355     Iter_t it_first = global_range.first;
0356     value_t *buf_first = global_buf.first;
0357     vmem_thread.reserve(nthread + 1);
0358     vbuf_thread.reserve(nthread + 1);
0359 
0360     for (uint32_t i = 0; i < (nthread - 1); ++i, it_first += cupo, buf_first +=
0361                     cupo)
0362     {
0363         vmem_thread.emplace_back(it_first, it_first + cupo);
0364         vbuf_thread.emplace_back(buf_first, buf_first + cupo);
0365     };
0366 
0367     vmem_thread.emplace_back(it_first, global_range.last);
0368     vbuf_thread.emplace_back(buf_first, global_buf.last);
0369 
0370     //------------------------------------------------------------------------
0371     // Sorting of the ranges
0372     //------------------------------------------------------------------------
0373     std::vector<std::future<void>> vfuture(nthread);
0374 
0375     for (uint32_t i = 0; i < nthread; ++i)
0376     {
0377         auto func = [this, &vmem_thread, i,  &vbuf_thread]()
0378         {
0379             bss::spinsort<Iter_t, Compare> (vmem_thread[i].first,
0380                             vmem_thread[i].last, this->comp,
0381                             vbuf_thread[i]);
0382         };
0383         vfuture[i] = std::async(std::launch::async, func);
0384     };
0385 
0386     for (uint32_t i = 0; i < nthread; ++i)
0387         vfuture[i].get();
0388 
0389     //------------------------------------------------------------------------
0390     // Obtain the vector of milestones
0391     //------------------------------------------------------------------------
0392     std::vector<Iter_t> vsample;
0393     vsample.reserve(nthread * (ninterval - 1));
0394 
0395     for (uint32_t i = 0; i < nthread; ++i)
0396     {
0397         size_t distance = vmem_thread[i].size() / ninterval;
0398         for (size_t j = 1, pos = distance; j < ninterval; ++j, pos += distance)
0399         {
0400             vsample.push_back(vmem_thread[i].first + pos);
0401         };
0402     };
0403     typedef less_ptr_no_null<Iter_t, Compare> compare_ptr;
0404     typedef typename std::vector<Iter_t>::iterator it_to_it;
0405 
0406     bss::spinsort<it_to_it, compare_ptr>(vsample.begin(), vsample.end(),
0407                     compare_ptr(comp));
0408 
0409     //------------------------------------------------------------------------
0410     // Create the final milestone vector
0411     //------------------------------------------------------------------------
0412     std::vector<Iter_t> vmilestone;
0413     vmilestone.reserve(ninterval);
0414 
0415     for (uint32_t pos = nthread >> 1; pos < vsample.size(); pos += nthread)
0416     {
0417         vmilestone.push_back(vsample[pos]);
0418     };
0419 
0420     //------------------------------------------------------------------------
0421     // Creation of the first vector of ranges
0422     //------------------------------------------------------------------------
0423     std::vector<std::vector<range<Iter_t>>>vv_range_first (nthread);
0424 
0425     for (uint32_t i = 0; i < nthread; ++i)
0426     {
0427         Iter_t itaux = vmem_thread[i].first;
0428 
0429         for (uint32_t k = 0; k < (ninterval - 1); ++k)
0430         {
0431             Iter_t it2 = std::upper_bound(itaux, vmem_thread[i].last,
0432                             *vmilestone[k], comp);
0433 
0434             vv_range_first[i].emplace_back(itaux, it2);
0435             itaux = it2;
0436         };
0437         vv_range_first[i].emplace_back(itaux, vmem_thread[i].last);
0438     };
0439 
0440     //------------------------------------------------------------------------
0441     // Copy in buffer and  creation of the final matrix of ranges
0442     //------------------------------------------------------------------------
0443     vv_range_it.resize(ninterval);
0444     vv_range_buf.resize(ninterval);
0445     vrange_it_ini.reserve(ninterval);
0446     vrange_buf_ini.reserve(ninterval);
0447 
0448     for (uint32_t i = 0; i < ninterval; ++i)
0449     {
0450         vv_range_it[i].reserve(nthread);
0451         vv_range_buf[i].reserve(nthread);
0452     };
0453 
0454     Iter_t it = global_range.first;
0455     value_t *it_buf = global_buf.first;
0456 
0457     for (uint32_t k = 0; k < ninterval; ++k)
0458     {
0459         size_t nelem_interval = 0;
0460 
0461         for (uint32_t i = 0; i < nthread; ++i)
0462         {
0463             size_t nelem_range = vv_range_first[i][k].size();
0464             if (nelem_range != 0)
0465             {
0466                 vv_range_it[k].push_back(vv_range_first[i][k]);
0467             };
0468             nelem_interval += nelem_range;
0469         };
0470 
0471         vrange_it_ini.emplace_back(it, it + nelem_interval);
0472         vrange_buf_ini.emplace_back(it_buf, it_buf + nelem_interval);
0473 
0474         it += nelem_interval;
0475         it_buf += nelem_interval;
0476     };
0477 }
0478 ;
0479 //
0480 //****************************************************************************
0481 }
0482 ;
0483 //    End namespace sample_detail
0484 //****************************************************************************
0485 //
0486 namespace bscu = boost::sort::common::util;
0487 //
0488 //############################################################################
0489 //                                                                          ##
0490 //                                                                          ##
0491 //                       S A M P L E _ S O R T                              ##
0492 //                                                                          ##
0493 //                                                                          ##
0494 //############################################################################
0495 //
0496 //-----------------------------------------------------------------------------
0497 //  function : sample_sort
0498 /// @brief parallel sample sort  algorithm (stable sort)
0499 ///
0500 /// @param first : iterator to the first element of the range to sort
0501 /// @param last : iterator after the last element to the range to sort
0502 //-----------------------------------------------------------------------------
0503 template<class Iter_t>
0504 void sample_sort(Iter_t first, Iter_t last)
0505 {
0506     typedef compare_iter<Iter_t> Compare;
0507     sample_detail::sample_sort<Iter_t, Compare>(first, last);
0508 };
0509 //
0510 //-----------------------------------------------------------------------------
0511 //  function : sample_sort
0512 /// @brief parallel sample sort  algorithm (stable sort)
0513 ///
0514 /// @param first : iterator to the first element of the range to sort
0515 /// @param last : iterator after the last element to the range to sort
0516 /// @param nthread : Number of threads to use in the process. When this value
0517 ///                  is lower than 2, the sorting is done with 1 thread
0518 //-----------------------------------------------------------------------------
0519 template<class Iter_t>
0520 void sample_sort(Iter_t first, Iter_t last, uint32_t nthread)
0521 {
0522     typedef compare_iter<Iter_t> Compare;
0523     sample_detail::sample_sort<Iter_t, Compare>(first, last, nthread);
0524 };
0525 //
0526 //-----------------------------------------------------------------------------
0527 //  function : sample_sort
0528 /// @brief parallel sample sort  algorithm (stable sort)
0529 ///
0530 /// @param first : iterator to the first element of the range to sort
0531 /// @param last : iterator after the last element to the range to sort
0532 /// @param comp : object for to compare two elements pointed by Iter_t
0533 ///               iterators
0534 //-----------------------------------------------------------------------------
0535 template<class Iter_t, class Compare, bscu::enable_if_not_integral<Compare> * =
0536                 nullptr>
0537 void sample_sort(Iter_t first, Iter_t last, Compare comp)
0538 {
0539     sample_detail::sample_sort<Iter_t, Compare>(first, last, comp);
0540 };
0541 //
0542 //-----------------------------------------------------------------------------
0543 //  function : sample_sort
0544 /// @brief parallel sample sort  algorithm (stable sort)
0545 ///
0546 /// @param first : iterator to the first element of the range to sort
0547 /// @param last : iterator after the last element to the range to sort
0548 /// @param comp : object for to compare two elements pointed by Iter_t
0549 ///               iterators
0550 /// @param nthread : Number of threads to use in the process. When this value
0551 ///                  is lower than 2, the sorting is done with 1 thread
0552 //-----------------------------------------------------------------------------
0553 template<class Iter_t, class Compare>
0554 void sample_sort(Iter_t first, Iter_t last, Compare comp, uint32_t nthread)
0555 {
0556     sample_detail::sample_sort<Iter_t, Compare>(first, last, comp, nthread);
0557 };
0558 //
0559 //****************************************************************************
0560 };//    End namespace sort
0561 };//    End namespace boost
0562 //****************************************************************************
0563 //
0564 #endif