Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:30:39

0001 #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
0002 #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
0003 
0004 /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
0005  * Use, modification and distribution is subject to the
0006  * Boost Software License, Version 1.0. (See accompanying
0007  * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
0008  * Author: Jeff Garland, Bart Garst
0009  * $Date$
0010  */
0011 
0012 
0013 #include <boost/algorithm/string/case_conv.hpp>
0014 #include <cctype>
0015 #include <map>
0016 #include <string>
0017 #include <vector>
0018 #include <ostream>
0019 #include <iterator>
0020 #include <algorithm>
0021 
0022 namespace boost { namespace date_time {
0023 
0024 
0025 template<typename charT>
0026 struct parse_match_result
0027 {
0028   parse_match_result() :
0029     match_depth(0),
0030     current_match(PARSE_ERROR)
0031   {}
0032   typedef std::basic_string<charT> string_type;
0033   string_type remaining() const
0034   {
0035     if (match_depth == cache.size()) {
0036       return string_type();
0037     }
0038     if (current_match == PARSE_ERROR) {
0039       return cache;
0040     }
0041     //some of the cache was used return the rest
0042     return string_type(cache, match_depth);
0043   }
0044   charT last_char() const
0045   {
0046     return cache[cache.size()-1];
0047   }
0048   //! Returns true if more characters were parsed than was necessary
0049   /*! Should be used in conjunction with last_char()
0050    *  to get the remaining character.
0051    */
0052   bool has_remaining() const
0053   {
0054     return (cache.size() > match_depth);
0055   }
0056 
0057   // cache will hold characters that have been read from the stream
0058   string_type cache;
0059   unsigned short match_depth;
0060   short current_match;
0061   enum PARSE_STATE { PARSE_ERROR = -1 };
0062 };
0063 
0064   //for debug -- really only char streams...
0065 template<typename charT>
0066 std::basic_ostream<charT>&
0067 operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
0068 {
0069   os << "cm: " << mr.current_match
0070      << " C: '" << mr.cache
0071      << "' md: " << mr.match_depth
0072      << " R: " << mr.remaining();
0073   return os;
0074 }
0075 
0076 
0077 
0078 //! Recursive data structure to allow efficient parsing of various strings
0079 /*! This class provides a quick lookup by building what amounts to a
0080  *  tree data structure.  It also features a match function which can
0081  *  can handle nasty input interators by caching values as it recurses
0082  *  the tree so that it can backtrack as needed.
0083  */
0084 template<typename charT>
0085 struct string_parse_tree
0086 {
0087 #if BOOST_WORKAROUND( BOOST_BORLANDC, BOOST_TESTED_AT(0x581) )
0088   typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll;
0089 #else
0090   typedef std::multimap<charT, string_parse_tree > ptree_coll;
0091 #endif
0092   typedef typename ptree_coll::value_type value_type;
0093   typedef typename ptree_coll::iterator iterator;
0094   typedef typename ptree_coll::const_iterator const_iterator;
0095   typedef std::basic_string<charT> string_type;
0096   typedef std::vector<std::basic_string<charT> > collection_type;
0097   typedef parse_match_result<charT> parse_match_result_type;
0098 
0099   /*! Parameter "starting_point" designates where the numbering begins.
0100    * A starting_point of zero will start the numbering at zero
0101    * (Sun=0, Mon=1, ...) were a starting_point of one starts the
0102    * numbering at one (Jan=1, Feb=2, ...). The default is zero,
0103    * negative vaules are not allowed */
0104   string_parse_tree(collection_type names, unsigned int starting_point=0) :
0105     m_value(parse_match_result_type::PARSE_ERROR)
0106   {
0107     // iterate thru all the elements and build the tree
0108     unsigned short index = 0;
0109     while (index != names.size() ) {
0110       string_type s = boost::algorithm::to_lower_copy(names[index]);
0111       insert(s, static_cast<unsigned short>(index + starting_point));
0112       index++;
0113     }
0114     //set the last tree node = index+1  indicating a value
0115     index++;
0116   }
0117 
0118 
0119   string_parse_tree(short value = parse_match_result_type::PARSE_ERROR) :
0120     m_value(value)
0121   {}
0122   ptree_coll m_next_chars;
0123   short m_value;
0124 
0125   void insert(const string_type& s, unsigned short value)
0126   {
0127     unsigned int i = 0;
0128     iterator ti;
0129     while(i < s.size()) {
0130       if (i==0) {
0131         if (i == (s.size()-1)) {
0132           ti = m_next_chars.insert(value_type(s[i],
0133                                               string_parse_tree<charT>(value)));
0134         }
0135         else {
0136           ti = m_next_chars.insert(value_type(s[i],
0137                                               string_parse_tree<charT>()));
0138         }
0139       }
0140       else {
0141         if (i == (s.size()-1)) {
0142           ti = ti->second.m_next_chars.insert(value_type(s[i],
0143                                                          string_parse_tree<charT>(value)));
0144         }
0145 
0146         else {
0147           ti = ti->second.m_next_chars.insert(value_type(s[i],
0148                                                          string_parse_tree<charT>()));
0149         }
0150 
0151       }
0152       i++;
0153     }
0154   }
0155 
0156 
0157   //! Recursive function that finds a matching string in the tree.
0158   /*! Must check match_results::has_remaining() after match() is
0159    * called. This is required so the user can determine if
0160    * stream iterator is already pointing to the expected
0161    * character or not (match() might advance sitr to next char in stream).
0162    *
0163    * A parse_match_result that has been returned from a failed match
0164    * attempt can be sent in to the match function of a different
0165    * string_parse_tree to attempt a match there. Use the iterators
0166    * for the partially consumed stream, the parse_match_result object,
0167    * and '0' for the level parameter. */
0168   short
0169   match(std::istreambuf_iterator<charT>& sitr,
0170         std::istreambuf_iterator<charT>& stream_end,
0171         parse_match_result_type& result,
0172         unsigned int& level)  const
0173   {
0174 
0175     level++;
0176     charT c;
0177     // if we conditionally advance sitr, we won't have
0178     // to consume the next character past the input
0179     bool adv_itr = true;
0180     if (level > result.cache.size()) {
0181       if (sitr == stream_end) return 0; //bail - input exhausted
0182       c = static_cast<charT>(std::tolower(*sitr));
0183       //result.cache += c;
0184       //sitr++;
0185     }
0186     else {
0187       // if we're looking for characters from the cache,
0188       // we don't want to increment sitr
0189       adv_itr = false;
0190       c = static_cast<charT>(std::tolower(result.cache[level-1]));
0191     }
0192     const_iterator litr = m_next_chars.lower_bound(c);
0193     const_iterator uitr = m_next_chars.upper_bound(c);
0194     while (litr != uitr) { // equal if not found
0195       if(adv_itr) {
0196         sitr++;
0197         result.cache += c;
0198       }
0199       if (litr->second.m_value != -1) { // -1 is default value
0200         if (result.match_depth < level) {
0201           result.current_match = litr->second.m_value;
0202           result.match_depth = static_cast<unsigned short>(level);
0203         }
0204         litr->second.match(sitr, stream_end,
0205                            result, level);
0206         level--;
0207       }
0208       else {
0209         litr->second.match(sitr, stream_end,
0210                            result, level);
0211         level--;
0212       }
0213 
0214       if(level <= result.cache.size()) {
0215         adv_itr = false;
0216       }
0217 
0218       litr++;
0219     }
0220     return result.current_match;
0221 
0222   }
0223 
0224   /*! Must check match_results::has_remaining() after match() is
0225    * called. This is required so the user can determine if
0226    * stream iterator is already pointing to the expected
0227    * character or not (match() might advance sitr to next char in stream).
0228    */
0229   parse_match_result_type
0230   match(std::istreambuf_iterator<charT>& sitr,
0231         std::istreambuf_iterator<charT>& stream_end) const
0232   {
0233     // lookup to_lower of char in tree.
0234     unsigned int level = 0;
0235     //    string_type cache;
0236     parse_match_result_type result;
0237     match(sitr, stream_end, result, level);
0238     return result;
0239   }
0240 
0241   void printme(std::ostream& os, int& level)
0242   {
0243     level++;
0244     iterator itr = m_next_chars.begin();
0245     iterator end = m_next_chars.end();
0246     //    os << "starting level: " << level << std::endl;
0247     while (itr != end) {
0248       os << "level:  " << level
0249          << " node:  " << itr->first
0250          << " value: " << itr->second.m_value
0251          << std::endl;
0252       itr->second.printme(os, level);
0253       itr++;
0254     }
0255     level--;
0256   }
0257 
0258   void print(std::ostream& os)
0259   {
0260     int level = 0;
0261     printme(os, level);
0262   }
0263 
0264   void printmatch(std::ostream& os, charT c)
0265   {
0266     iterator litr = m_next_chars.lower_bound(c);
0267     iterator uitr = m_next_chars.upper_bound(c);
0268     os << "matches for: " << c << std::endl;
0269     while (litr != uitr) {
0270       os << " node:  " << litr->first
0271          << " value: " << litr->second.m_value
0272          << std::endl;
0273       litr++;
0274     }
0275   }
0276 
0277 };
0278 
0279 
0280 } } //namespace
0281 #endif