Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:30

0001 #pragma once
0002 /**
0003 s_unique.h : similar to np.unique
0004 ===================================
0005 
0006 **/
0007 
0008 #include <vector>
0009 #include <unordered_map>
0010 #include <algorithm>
0011 #include <numeric>
0012 
0013 #include <string>
0014 #include <sstream>
0015 #include <iomanip>
0016 #include <cassert>
0017 
0018 
0019 /**
0020 s_unique
0021 ----------
0022 
0023 Loosely following the NumPy np.unique API prepare unique value table.
0024 
0025 uvals
0026     unique values
0027 first, last
0028     iterators for input values
0029 count
0030     number of times each of the unique values appears
0031 order
0032     indices into uvals giving descending count order
0033 index
0034     indices into original values of first occurence, same length as uvals
0035 inverse
0036     indices into the uvals that reconstruct the original values, same length as original
0037 original
0038      original values provided from the iterator
0039 
0040 
0041 Started from:
0042 
0043 * https://stackoverflow.com/questions/70868307/c-equivalent-of-numpy-unique-on-stdvector-with-return-index-and-return-inver
0044 
0045 **/
0046 
0047 
0048 template<class T, class Iterator>
0049 inline void s_unique(
0050      std::vector<T>& uvals,
0051      Iterator first,
0052      Iterator last,
0053      std::vector<std::size_t>* count=nullptr,
0054      std::vector<std::size_t>* order=nullptr,
0055      std::vector<std::size_t>* index=nullptr,
0056      std::vector<std::size_t>* inverse=nullptr,
0057      std::vector<T>* original=nullptr
0058 )
0059 {
0060     using index_map = std::unordered_map<T, std::size_t>;  // T value and count
0061     using map_iter = typename index_map::iterator;
0062     using map_value = typename index_map::value_type;
0063 
0064     // clear optional output vectors
0065     for(std::vector<std::size_t>* arg: {count, order, index, inverse}) if(arg) arg->clear();
0066 
0067     index_map map;
0068 
0069     std::size_t cur_idx = 0;
0070     for(Iterator i = first; i != last; ++cur_idx, ++i)
0071     {
0072         const T value = *i ;
0073         const std::pair<map_iter, bool> inserted = map.try_emplace(value, uvals.size());
0074 
0075         const bool& is_first_such_value = inserted.second ;
0076         map_value& ival = *inserted.first;
0077 
0078         if(is_first_such_value)
0079         {
0080             uvals.push_back(ival.first);
0081             if(index) index->push_back(cur_idx);
0082             if(count) count->push_back(1);
0083         }
0084         else
0085         {
0086             if(count) (*count)[ival.second] += 1;
0087         }
0088         if(inverse)  inverse->push_back(ival.second);
0089         if(original) original->push_back(value);
0090     }
0091 
0092     if(count && order)
0093     {
0094         // prep indices and sort them into descending count order
0095         order->resize(count->size()) ;
0096         std::iota(order->begin(), order->end(), 0);
0097         const std::vector<std::size_t>& cn = *count ;
0098         auto descending = [&cn](const std::size_t& a, const std::size_t &b) { return cn[a] > cn[b];}  ;
0099         std::sort(order->begin(), order->end(), descending );
0100     }
0101 }
0102 
0103 
0104 
0105 /**
0106 s_unique_desc
0107 ---------------
0108 
0109 Presents unique value table with labels provided by unams.
0110 The table is ordered in descending count order.
0111 
0112 **/
0113 
0114 template<class T>
0115 inline std::string s_unique_desc(
0116      const std::vector<T>& uvals,
0117      const std::vector<std::string>* unams=nullptr,
0118      std::vector<std::size_t>* count=nullptr,
0119      std::vector<std::size_t>* order=nullptr,
0120      std::vector<std::size_t>* index=nullptr,
0121      std::vector<std::size_t>* inverse=nullptr,
0122      std::vector<T>* original=nullptr,
0123      int w = 6,
0124      bool check=false
0125 )
0126 {
0127     std::size_t num_uvals = uvals.size() ;
0128     std::size_t num_unams = unams ? unams->size() : 0 ;
0129     std::size_t num_count = count ? count->size() : 0 ;
0130     std::size_t num_order = order ? order->size() : 0 ;
0131     std::size_t num_index = index ? index->size() : 0 ;
0132     std::size_t num_inverse = inverse ? inverse->size() : 0  ;
0133     std::size_t num_original = original ? original->size() : 0  ;
0134 
0135     if(num_count > 0) assert( num_uvals == num_count ) ;
0136     if(num_order > 0) assert( num_uvals == num_order ) ;
0137     if(num_index > 0) assert( num_uvals == num_index ) ;
0138 
0139     std::stringstream ss ;
0140     ss << "[s_unique_desc\n" ;
0141     ss << std::setw(15) << " num_uvals "    << std::setw(8) << num_uvals << "\n" ;
0142     ss << std::setw(15) << " num_unams "    << std::setw(8) << num_unams << "\n" ;
0143     ss << std::setw(15) << " num_count "    << std::setw(8) << num_count << "\n" ;
0144     ss << std::setw(15) << " num_order "    << std::setw(8) << num_order << "\n" ;
0145     ss << std::setw(15) << " num_index "    << std::setw(8) << num_index << "\n" ;
0146     ss << std::setw(15) << " num_inverse "  << std::setw(8) << num_inverse << "\n" ;
0147     ss << std::setw(15) << " num_original " << std::setw(8) << num_original << "\n" ;
0148 
0149     std::size_t cn_total = 0 ;
0150     for(std::size_t u=0 ; u < num_uvals ; u++ )
0151     {
0152         std::size_t i = order == nullptr ? u : (*order)[u] ;
0153 
0154         T uval = uvals[i] ;
0155         std::string un = unams ? (*unams)[uval] : "-" ;
0156 
0157         std::size_t cn = count ? (*count)[i] : 0 ;
0158         std::size_t ix = index ? (*index)[i] : 0 ;
0159         if(cn > 0)  cn_total += cn ;
0160 
0161         ss             << " uv " << std::setw(w) << uval << " : " ;
0162         if(count)   ss << " cn " << std::setw(w) << cn << " " ;
0163         if(index)   ss << " ix " << std::setw(w) << ix << " " ;
0164         ss << " un " << un << "\n" ;
0165     }
0166     ss << "    " << std::setw(w) << " " << "   " ;
0167     if(count)     ss << " TOT" << std::setw(w) << cn_total << "\n" ;
0168 
0169     ss << "\n" ;
0170     if( check && num_inverse > 0 && num_original > 0)
0171     {
0172         assert( num_inverse == num_original ) ;
0173         for(std::size_t i=0 ; i < num_original ; i++ )
0174         {
0175             std::size_t uidx = (*inverse)[i] ;  // index into uvals to reproduce original
0176 
0177             const T value = (*original)[i]  ;
0178             const T lookup = uvals[uidx] ;
0179             bool match = value == lookup ;
0180 
0181             ss << " value " << std::setw(8) << value << " : " ;
0182             ss << " uidx  " << std::setw(8) << uidx  << " " ;
0183             ss << " lookup " << std::setw(8) << lookup  << " " ;
0184             ss << " match " << ( match ? "YES" : "NO " ) ;
0185             ss << "\n" ;
0186         }
0187     }
0188 
0189     ss << "]s_unique_desc\n" ;
0190     std::string str = ss.str() ;
0191     return str ;
0192 }
0193