Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-30 09:32:54

0001 /* 
0002    Copyright (c) Marshall Clow 2010-2012.
0003 
0004    Distributed under the Boost Software License, Version 1.0. (See accompanying
0005    file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 
0007     For more information, see http://www.boost.org
0008 */
0009 
0010 #ifndef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
0011 #define BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP
0012 
0013 #include <iterator>     // for std::iterator_traits
0014 
0015 #include <boost/config.hpp>
0016 #include <boost/assert.hpp>
0017 #include <boost/static_assert.hpp>
0018 
0019 #include <boost/range/begin.hpp>
0020 #include <boost/range/end.hpp>
0021 
0022 #include <boost/core/enable_if.hpp>
0023 #include <boost/type_traits/is_same.hpp>
0024 
0025 #include <boost/algorithm/searching/detail/bm_traits.hpp>
0026 #include <boost/algorithm/searching/detail/debugging.hpp>
0027 
0028 // #define  BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
0029 
0030 namespace boost { namespace algorithm {
0031 
0032 /*
0033     A templated version of the boyer-moore-horspool searching algorithm.
0034     
0035     Requirements:
0036         * Random access iterators
0037         * The two iterator types (patIter and corpusIter) must 
0038             "point to" the same underlying type.
0039         * Additional requirements may be imposed buy the skip table, such as:
0040         ** Numeric type (array-based skip table)
0041         ** Hashable type (map-based skip table)
0042 
0043 http://www-igm.univ-mlv.fr/%7Elecroq/string/node18.html
0044 
0045 */
0046 
0047     template <typename patIter, typename traits = detail::BM_traits<patIter> >
0048     class boyer_moore_horspool {
0049         typedef typename std::iterator_traits<patIter>::difference_type difference_type;
0050     public:
0051         boyer_moore_horspool ( patIter first, patIter last ) 
0052                 : pat_first ( first ), pat_last ( last ),
0053                   k_pattern_length ( std::distance ( pat_first, pat_last )),
0054                   skip_ ( k_pattern_length, k_pattern_length ) {
0055                   
0056         //  Build the skip table
0057             std::size_t i = 0;
0058             if ( first != last )    // empty pattern?
0059                 for ( patIter iter = first; iter != last-1; ++iter, ++i )
0060                     skip_.insert ( *iter, k_pattern_length - 1 - i );
0061 #ifdef BOOST_ALGORITHM_BOYER_MOORE_HORSPOOL_DEBUG_HPP
0062             skip_.PrintSkipTable ();
0063 #endif
0064             }
0065             
0066         ~boyer_moore_horspool () {}
0067         
0068         /// \fn operator ( corpusIter corpus_first, corpusIter corpus_last)
0069         /// \brief Searches the corpus for the pattern that was passed into the constructor
0070         /// 
0071         /// \param corpus_first The start of the data to search (Random Access Iterator)
0072         /// \param corpus_last  One past the end of the data to search
0073         ///
0074         template <typename corpusIter>
0075         std::pair<corpusIter, corpusIter>
0076         operator () ( corpusIter corpus_first, corpusIter corpus_last ) const {
0077             BOOST_STATIC_ASSERT (( boost::is_same<
0078                 typename std::iterator_traits<patIter>::value_type, 
0079                 typename std::iterator_traits<corpusIter>::value_type>::value ));
0080 
0081             if ( corpus_first == corpus_last ) return std::make_pair(corpus_last, corpus_last);   // if nothing to search, we didn't find it!
0082             if (    pat_first ==    pat_last ) return std::make_pair(corpus_first, corpus_first); // empty pattern matches at start
0083 
0084             const difference_type k_corpus_length  = std::distance ( corpus_first, corpus_last );
0085         //  If the pattern is larger than the corpus, we can't find it!
0086             if ( k_corpus_length < k_pattern_length )
0087                 return std::make_pair(corpus_last, corpus_last);
0088     
0089         //  Do the search 
0090             return this->do_search ( corpus_first, corpus_last );
0091             }
0092             
0093         template <typename Range>
0094         std::pair<typename boost::range_iterator<Range>::type, typename boost::range_iterator<Range>::type>
0095         operator () ( Range &r ) const {
0096             return (*this) (boost::begin(r), boost::end(r));
0097             }
0098 
0099     private:
0100 /// \cond DOXYGEN_HIDE
0101         patIter pat_first, pat_last;
0102         const difference_type k_pattern_length;
0103         typename traits::skip_table_t skip_;
0104 
0105         /// \fn do_search ( corpusIter corpus_first, corpusIter corpus_last )
0106         /// \brief Searches the corpus for the pattern that was passed into the constructor
0107         /// 
0108         /// \param corpus_first The start of the data to search (Random Access Iterator)
0109         /// \param corpus_last  One past the end of the data to search
0110         /// \param k_corpus_length The length of the corpus to search
0111         ///
0112         template <typename corpusIter>
0113         std::pair<corpusIter, corpusIter>
0114         do_search ( corpusIter corpus_first, corpusIter corpus_last ) const {
0115             corpusIter curPos = corpus_first;
0116             const corpusIter lastPos = corpus_last - k_pattern_length;
0117             while ( curPos <= lastPos ) {
0118             //  Do we match right where we are?
0119                 std::size_t j = k_pattern_length - 1;
0120                 while ( pat_first [j] == curPos [j] ) {
0121                 //  We matched - we're done!
0122                     if ( j == 0 )
0123                         return std::make_pair(curPos, curPos + k_pattern_length);
0124                     j--;
0125                     }
0126         
0127                 curPos += skip_ [ curPos [ k_pattern_length - 1 ]];
0128                 }
0129             
0130             return std::make_pair(corpus_last, corpus_last);
0131             }
0132 // \endcond
0133         };
0134 
0135 /*  Two ranges as inputs gives us four possibilities; with 2,3,3,4 parameters
0136     Use a bit of TMP to disambiguate the 3-argument templates */
0137 
0138 /// \fn boyer_moore_horspool_search ( corpusIter corpus_first, corpusIter corpus_last, 
0139 ///       patIter pat_first, patIter pat_last )
0140 /// \brief Searches the corpus for the pattern.
0141 /// 
0142 /// \param corpus_first The start of the data to search (Random Access Iterator)
0143 /// \param corpus_last  One past the end of the data to search
0144 /// \param pat_first    The start of the pattern to search for (Random Access Iterator)
0145 /// \param pat_last     One past the end of the data to search for
0146 ///
0147     template <typename patIter, typename corpusIter>
0148     std::pair<corpusIter, corpusIter> boyer_moore_horspool_search ( 
0149                   corpusIter corpus_first, corpusIter corpus_last, 
0150                   patIter pat_first, patIter pat_last )
0151     {
0152         boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
0153         return bmh ( corpus_first, corpus_last );
0154     }
0155 
0156     template <typename PatternRange, typename corpusIter>
0157     std::pair<corpusIter, corpusIter> boyer_moore_horspool_search ( 
0158         corpusIter corpus_first, corpusIter corpus_last, const PatternRange &pattern )
0159     {
0160         typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
0161         boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
0162         return bmh ( corpus_first, corpus_last );
0163     }
0164     
0165     template <typename patIter, typename CorpusRange>
0166     typename boost::disable_if_c<
0167         boost::is_same<CorpusRange, patIter>::value, 
0168         std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type> >
0169     ::type
0170     boyer_moore_horspool_search ( CorpusRange &corpus, patIter pat_first, patIter pat_last )
0171     {
0172         boyer_moore_horspool<patIter> bmh ( pat_first, pat_last );
0173         return bm (boost::begin (corpus), boost::end (corpus));
0174     }
0175     
0176     template <typename PatternRange, typename CorpusRange>
0177     std::pair<typename boost::range_iterator<CorpusRange>::type, typename boost::range_iterator<CorpusRange>::type>
0178     boyer_moore_horspool_search ( CorpusRange &corpus, const PatternRange &pattern )
0179     {
0180         typedef typename boost::range_iterator<const PatternRange>::type pattern_iterator;
0181         boyer_moore_horspool<pattern_iterator> bmh ( boost::begin(pattern), boost::end (pattern));
0182         return bmh (boost::begin (corpus), boost::end (corpus));
0183     }
0184 
0185 
0186     //  Creator functions -- take a pattern range, return an object
0187     template <typename Range>
0188     boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<const Range>::type>
0189     make_boyer_moore_horspool ( const Range &r ) {
0190         return boost::algorithm::boyer_moore_horspool
0191             <typename boost::range_iterator<const Range>::type> (boost::begin(r), boost::end(r));
0192         }
0193     
0194     template <typename Range>
0195     boost::algorithm::boyer_moore_horspool<typename boost::range_iterator<Range>::type>
0196     make_boyer_moore_horspool ( Range &r ) {
0197         return boost::algorithm::boyer_moore_horspool
0198             <typename boost::range_iterator<Range>::type> (boost::begin(r), boost::end(r));
0199         }
0200 
0201 }}
0202 
0203 #endif  //  BOOST_ALGORITHM_BOYER_MOORE_HORSPOOOL_SEARCH_HPP