Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 09:32:54

0001 /* 
0002    Copyright (c) Marshall Clow 2010-2012.
0003 
0004    Distributed under the Boost Software License, Version 1.0. (See accompanying
0005    file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 
0007     For more information, see http://www.boost.org
0008 */
0009 
0010 #ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
0011 #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
0012 
0013 #include <vector>
0014 #include <iterator>     // for std::iterator_traits
0015 
0016 #include <boost/config.hpp>
0017 #include <boost/assert.hpp>
0018 #include <boost/static_assert.hpp>
0019 
0020 #include <boost/range/begin.hpp>
0021 #include <boost/range/end.hpp>
0022 
0023 #include <boost/core/enable_if.hpp>
0024 #include <boost/type_traits/is_same.hpp>
0025 
0026 #include <boost/algorithm/searching/detail/debugging.hpp>
0027 
0028 // #define  BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
0029 
0030 namespace boost { namespace algorithm {
0031 
0032 // #define  NEW_KMP
0033 
0034 /*
0035     A templated version of the Knuth-Morris-Pratt searching algorithm.
0036     
0037     Requirements:
0038         * Random-access iterators
0039         * The two iterator types (I1 and I2) must "point to" the same underlying type.
0040 
0041     http://en.wikipedia.org/wiki/Knuth-Morris-Pratt_algorithm
0042     http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm
0043 */
0044 
0045     template <typename patIter>
0046     class knuth_morris_pratt {
0047         typedef typename std::iterator_traits<patIter>::difference_type difference_type;
0048     public:
0049         knuth_morris_pratt ( patIter first, patIter last ) 
0050                 : pat_first ( first ), pat_last ( last ), 
0051                   k_pattern_length ( std::distance ( pat_first, pat_last )),
0052                   skip_ ( k_pattern_length + 1 ) {
0053 #ifdef NEW_KMP
0054             preKmp ( pat_first, pat_last );
0055 #else
0056             init_skip_table ( pat_first, pat_last );
0057 #endif
0058 #ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
0059             detail::PrintTable ( skip_.begin (), skip_.end ());
0060 #endif
0061             }
0062             
0063         ~knuth_morris_pratt () {}
0064         
0065         /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
0066         /// \brief Searches the corpus for the pattern that was passed into the constructor
0067         /// 
0068         /// \param corpus_first The start of the data to search (Random Access Iterator)
0069         /// \param corpus_last  One past the end of the data to search
0070         /// \param p            A predicate used for the search comparisons.
0071         ///
0072         template <typename corpusIter>
0073         std::pair<corpusIter, corpusIter>
0074         operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
0075             BOOST_STATIC_ASSERT (( boost::is_same<
0076                 typename std::iterator_traits<patIter>::value_type, 
0077                 typename std::iterator_traits<corpusIter>::value_type>::value ));
0078 
0079             if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last);   // if nothing to search, we didn't find it!
0080             if (    pat_first ==    pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start
0081 
0082             const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
0083         //  If the pattern is larger than the corpus, we can't find it!
0084             if ( k_corpus_length < k_pattern_length ) 
0085                 return std::make_pair(corpus_last, corpus_last);
0086 
0087             return do_search ( corpus_first, corpus_last, k_corpus_length );
0088             }
0089     
0090         template <typename Range>
0091         std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
0092         operator () ( Range &r ) const {
0093             return (*this) (boost::begin(r), boost::end(r));
0094             }
0095 
0096     private:
0097 /// \cond DOXYGEN_HIDE
0098         patIter pat_first, pat_last;
0099         const difference_type k_pattern_length;
0100         std::vector <difference_type> skip_;
0101 
0102         /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last, Pred p )
0103         /// \brief Searches the corpus for the pattern that was passed into the constructor
0104         /// 
0105         /// \param corpus_first The start of the data to search (Random Access Iterator)
0106         /// \param corpus_last  One past the end of the data to search
0107         /// \param p            A predicate used for the search comparisons.
0108         ///
0109         template <typename corpusIter>
0110         std::pair<corpusIter, corpusIter>
0111         do_search ( corpusIter corpus_first, corpusIter corpus_last, 
0112                                                 difference_type k_corpus_length ) const {
0113             difference_type match_start = 0;  // position in the corpus that we're matching
0114             
0115 #ifdef NEW_KMP
0116             int patternIdx = 0;
0117             while ( match_start < k_corpus_length ) {
0118                 while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
0119                     patternIdx = skip_ [patternIdx]; //<--- Shifting the pattern on mismatch
0120 
0121                 patternIdx++;
0122                 match_start++; //<--- corpus is always increased by 1
0123 
0124                 if ( patternIdx >= (int) k_pattern_length )
0125                     return corpus_first + match_start - patternIdx;
0126                 }
0127             
0128 #else
0129 //  At this point, we know:
0130 //          k_pattern_length <= k_corpus_length
0131 //          for all elements of skip, it holds -1 .. k_pattern_length
0132 //      
0133 //          In the loop, we have the following invariants
0134 //              idx is in the range 0 .. k_pattern_length
0135 //              match_start is in the range 0 .. k_corpus_length - k_pattern_length + 1
0136 
0137             const difference_type last_match = k_corpus_length - k_pattern_length;
0138             difference_type idx = 0;          // position in the pattern we're comparing
0139 
0140             while ( match_start <= last_match ) {
0141                 while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
0142                     if ( ++idx == k_pattern_length )
0143                         return std::make_pair(corpus_first + match_start, corpus_first + match_start + k_pattern_length);
0144                     }
0145             //  Figure out where to start searching again
0146            //   assert ( idx - skip_ [ idx ] > 0 ); // we're always moving forward
0147                 match_start += idx - skip_ [ idx ];
0148                 idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
0149            //   assert ( idx >= 0 && idx < k_pattern_length );
0150                 }
0151 #endif
0152                 
0153         //  We didn't find anything
0154             return std::make_pair(corpus_last, corpus_last);
0155             }
0156     
0157 
0158         void preKmp ( patIter first, patIter last ) {
0159            const difference_type count = std::distance ( first, last );
0160         
0161            difference_type i, j;
0162         
0163            i = 0;
0164            j = skip_[0] = -1;
0165            while (i < count) {
0166               while (j > -1 && first[i] != first[j])
0167                  j = skip_[j];
0168               i++;
0169               j++;
0170               if (first[i] == first[j])
0171                  skip_[i] = skip_[j];
0172               else
0173                  skip_[i] = j;
0174            }
0175         }
0176 
0177 
0178         void init_skip_table ( patIter first, patIter last ) {
0179             const difference_type count = std::distance ( first, last );
0180     
0181             difference_type j;
0182             skip_ [ 0 ] = -1;
0183             for ( int i = 1; i <= count; ++i ) {
0184                 j = skip_ [ i - 1 ];
0185                 while ( j >= 0 ) {
0186                     if ( first [ j ] == first [ i - 1 ] )
0187                         break;
0188                     j = skip_ [ j ];
0189                     }
0190                 skip_ [ i ] = j + 1;
0191                 }
0192             }
0193 // \endcond
0194         };
0195 
0196 
0197 /*  Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
0198     Use a bit of TMP to disambiguate the 3-argument templates */
0199 
0200 /// \fn knuth_morris_pratt_search ( corpusIter corpus_first, corpusIter corpus_last, 
0201 ///       patIter pat_first, patIter pat_last )
0202 /// \brief Searches the corpus for the pattern.
0203 /// 
0204 /// \param corpus_first The start of the data to search (Random Access Iterator)
0205 /// \param corpus_last  One past the end of the data to search
0206 /// \param pat_first    The start of the pattern to search for (Random Access Iterator)
0207 /// \param pat_last     One past the end of the data to search for
0208 ///
0209     template <typename patIter, typename corpusIter>
0210     std::pair<corpusIter, corpusIter> knuth_morris_pratt_search ( 
0211                   corpusIter corpus_first, corpusIter corpus_last, 
0212                   patIter pat_first, patIter pat_last )
0213     {
0214         knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
0215         return kmp ( corpus_first, corpus_last );
0216     }
0217 
0218     template <typename PatternRange, typename corpusIter>
0219     std::pair<corpusIter, corpusIter> knuth_morris_pratt_search ( 
0220         corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
0221     {
0222         typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
0223         knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
0224         return kmp ( corpus_first, corpus_last );
0225     }
0226     
0227     template <typename patIter, typename CorpusRange>
0228     typename boost::disable_if_c<
0229         boost::is_same<CorpusRange, patIter>::value, 
0230         std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
0231     ::type
0232     knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
0233     {
0234         knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
0235         return kmp (boost::begin (corpus), boost::end (corpus));
0236     }
0237     
0238     template <typename PatternRange, typename CorpusRange>
0239     std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
0240     knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
0241     {
0242         typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
0243         knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
0244         return kmp (boost::begin (corpus), boost::end (corpus));
0245     }
0246 
0247 
0248     //  Creator functions -- take a pattern range, return an object
0249     template <typename Range>
0250     boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
0251     make_knuth_morris_pratt ( const Range &r ) {
0252         return boost::algorithm::knuth_morris_pratt
0253             <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
0254         }
0255     
0256     template <typename Range>
0257     boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
0258     make_knuth_morris_pratt ( Range &r ) {
0259         return boost::algorithm::knuth_morris_pratt
0260             <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
0261         }
0262 }}
0263 
0264 #endif  // BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP