File indexing completed on 2025-01-30 09:32:54
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #ifndef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
0011 #define BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_SEARCH_HPP
0012
0013 #include <vector>
0014 #include <iterator> // for std::iterator_traits
0015
0016 #include <boost/config.hpp>
0017 #include <boost/assert.hpp>
0018 #include <boost/static_assert.hpp>
0019
0020 #include <boost/range/begin.hpp>
0021 #include <boost/range/end.hpp>
0022
0023 #include <boost/core/enable_if.hpp>
0024 #include <boost/type_traits/is_same.hpp>
0025
0026 #include <boost/algorithm/searching/detail/debugging.hpp>
0027
0028
0029
0030 namespace boost { namespace algorithm {
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044
0045 template <typename patIter>
0046 class knuth_morris_pratt {
0047 typedef typename std::iterator_traits<patIter>::difference_type difference_type;
0048 public:
0049 knuth_morris_pratt ( patIter first, patIter last )
0050 : pat_first ( first ), pat_last ( last ),
0051 k_pattern_length ( std::distance ( pat_first, pat_last )),
0052 skip_ ( k_pattern_length + 1 ) {
0053 #ifdef NEW_KMP
0054 preKmp ( pat_first, pat_last );
0055 #else
0056 init_skip_table ( pat_first, pat_last );
0057 #endif
0058 #ifdef BOOST_ALGORITHM_KNUTH_MORRIS_PRATT_DEBUG
0059 detail::PrintTable ( skip_.begin (), skip_.end ());
0060 #endif
0061 }
0062
0063 ~knuth_morris_pratt () {}
0064
0065
0066
0067
0068
0069
0070
0071
0072 template <typename corpusIter>
0073 std::pair<corpusIter, corpusIter>
0074 operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
0075 BOOST_STATIC_ASSERT (( boost::is_same<
0076 typename std::iterator_traits<patIter>::value_type,
0077 typename std::iterator_traits<corpusIter>::value_type>::value ));
0078
0079 if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last);
0080 if ( pat_first == pat_last ) return std::make_pair(corpus_first, corpus_first);
0081
0082 const difference_type k_corpus_length = std::distance ( corpus_first, corpus_last );
0083
0084 if ( k_corpus_length < k_pattern_length )
0085 return std::make_pair(corpus_last, corpus_last);
0086
0087 return do_search ( corpus_first, corpus_last, k_corpus_length );
0088 }
0089
0090 template <typename Range>
0091 std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
0092 operator () ( Range &r ) const {
0093 return (*this) (boost::begin(r), boost::end(r));
0094 }
0095
0096 private:
0097
0098 patIter pat_first, pat_last;
0099 const difference_type k_pattern_length;
0100 std::vector <difference_type> skip_;
0101
0102
0103
0104
0105
0106
0107
0108
0109 template <typename corpusIter>
0110 std::pair<corpusIter, corpusIter>
0111 do_search ( corpusIter corpus_first, corpusIter corpus_last,
0112 difference_type k_corpus_length ) const {
0113 difference_type match_start = 0;
0114
0115 #ifdef NEW_KMP
0116 int patternIdx = 0;
0117 while ( match_start < k_corpus_length ) {
0118 while ( patternIdx > -1 && pat_first[patternIdx] != corpus_first [match_start] )
0119 patternIdx = skip_ [patternIdx];
0120
0121 patternIdx++;
0122 match_start++;
0123
0124 if ( patternIdx >= (int) k_pattern_length )
0125 return corpus_first + match_start - patternIdx;
0126 }
0127
0128 #else
0129
0130
0131
0132
0133
0134
0135
0136
0137 const difference_type last_match = k_corpus_length - k_pattern_length;
0138 difference_type idx = 0;
0139
0140 while ( match_start <= last_match ) {
0141 while ( pat_first [ idx ] == corpus_first [ match_start + idx ] ) {
0142 if ( ++idx == k_pattern_length )
0143 return std::make_pair(corpus_first + match_start, corpus_first + match_start + k_pattern_length);
0144 }
0145
0146
0147 match_start += idx - skip_ [ idx ];
0148 idx = skip_ [ idx ] >= 0 ? skip_ [ idx ] : 0;
0149
0150 }
0151 #endif
0152
0153
0154 return std::make_pair(corpus_last, corpus_last);
0155 }
0156
0157
0158 void preKmp ( patIter first, patIter last ) {
0159 const difference_type count = std::distance ( first, last );
0160
0161 difference_type i, j;
0162
0163 i = 0;
0164 j = skip_[0] = -1;
0165 while (i < count) {
0166 while (j > -1 && first[i] != first[j])
0167 j = skip_[j];
0168 i++;
0169 j++;
0170 if (first[i] == first[j])
0171 skip_[i] = skip_[j];
0172 else
0173 skip_[i] = j;
0174 }
0175 }
0176
0177
0178 void init_skip_table ( patIter first, patIter last ) {
0179 const difference_type count = std::distance ( first, last );
0180
0181 difference_type j;
0182 skip_ [ 0 ] = -1;
0183 for ( int i = 1; i <= count; ++i ) {
0184 j = skip_ [ i - 1 ];
0185 while ( j >= 0 ) {
0186 if ( first [ j ] == first [ i - 1 ] )
0187 break;
0188 j = skip_ [ j ];
0189 }
0190 skip_ [ i ] = j + 1;
0191 }
0192 }
0193
0194 };
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209 template <typename patIter, typename corpusIter>
0210 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
0211 corpusIter corpus_first, corpusIter corpus_last,
0212 patIter pat_first, patIter pat_last )
0213 {
0214 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
0215 return kmp ( corpus_first, corpus_last );
0216 }
0217
0218 template <typename PatternRange, typename corpusIter>
0219 std::pair<corpusIter, corpusIter> knuth_morris_pratt_search (
0220 corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
0221 {
0222 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
0223 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
0224 return kmp ( corpus_first, corpus_last );
0225 }
0226
0227 template <typename patIter, typename CorpusRange>
0228 typename boost::disable_if_c<
0229 boost::is_same<CorpusRange, patIter>::value,
0230 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
0231 ::type
0232 knuth_morris_pratt_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
0233 {
0234 knuth_morris_pratt<patIter> kmp ( pat_first, pat_last );
0235 return kmp (boost::begin (corpus), boost::end (corpus));
0236 }
0237
0238 template <typename PatternRange, typename CorpusRange>
0239 std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
0240 knuth_morris_pratt_search ( CorpusRange &corpus, const PatternRange &pattern )
0241 {
0242 typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
0243 knuth_morris_pratt<pattern_iterator> kmp ( boost::begin(pattern), boost::end (pattern));
0244 return kmp (boost::begin (corpus), boost::end (corpus));
0245 }
0246
0247
0248
0249 template <typename Range>
0250 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<const Range>::type>
0251 make_knuth_morris_pratt ( const Range &r ) {
0252 return boost::algorithm::knuth_morris_pratt
0253 <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
0254 }
0255
0256 template <typename Range>
0257 boost::algorithm::knuth_morris_pratt<typename boost::range_iterator<Range>::type>
0258 make_knuth_morris_pratt ( Range &r ) {
0259 return boost::algorithm::knuth_morris_pratt
0260 <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
0261 }
0262 }}
0263
0264 #endif