|
||||
File indexing completed on 2025-01-17 09:55:54
0001 // Copyright (c) 2005, Google Inc. 0002 // All rights reserved. 0003 // 0004 // Redistribution and use in source and binary forms, with or without 0005 // modification, are permitted provided that the following conditions are 0006 // met: 0007 // 0008 // * Redistributions of source code must retain the above copyright 0009 // notice, this list of conditions and the following disclaimer. 0010 // * Redistributions in binary form must reproduce the above 0011 // copyright notice, this list of conditions and the following disclaimer 0012 // in the documentation and/or other materials provided with the 0013 // distribution. 0014 // * Neither the name of Google Inc. nor the names of its 0015 // contributors may be used to endorse or promote products derived from 0016 // this software without specific prior written permission. 0017 // 0018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 0019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 0020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 0021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 0022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 0023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 0024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 0025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 0026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 0028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0029 // 0030 // Author: Sanjay Ghemawat 0031 // 0032 // Regular-expression based scanner for parsing an input stream. 0033 // 0034 // Example 1: parse a sequence of "var = number" entries from input: 0035 // 0036 // Scanner scanner(input); 0037 // string var; 0038 // int number; 0039 // scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter 0040 // while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) { 0041 // ...; 0042 // } 0043 0044 #ifndef _PCRE_SCANNER_H 0045 #define _PCRE_SCANNER_H 0046 0047 #include <assert.h> 0048 #include <string> 0049 #include <vector> 0050 0051 #include <pcrecpp.h> 0052 #include <pcre_stringpiece.h> 0053 0054 namespace pcrecpp { 0055 0056 class PCRECPP_EXP_DEFN Scanner { 0057 public: 0058 Scanner(); 0059 explicit Scanner(const std::string& input); 0060 ~Scanner(); 0061 0062 // Return current line number. The returned line-number is 0063 // one-based. I.e. it returns 1 + the number of consumed newlines. 0064 // 0065 // Note: this method may be slow. It may take time proportional to 0066 // the size of the input. 0067 int LineNumber() const; 0068 0069 // Return the byte-offset that the scanner is looking in the 0070 // input data; 0071 int Offset() const; 0072 0073 // Return true iff the start of the remaining input matches "re" 0074 bool LookingAt(const RE& re) const; 0075 0076 // Return true iff all of the following are true 0077 // a. the start of the remaining input matches "re", 0078 // b. if any arguments are supplied, matched sub-patterns can be 0079 // parsed and stored into the arguments. 0080 // If it returns true, it skips over the matched input and any 0081 // following input that matches the "skip" regular expression. 0082 bool Consume(const RE& re, 0083 const Arg& arg0 = RE::no_arg, 0084 const Arg& arg1 = RE::no_arg, 0085 const Arg& arg2 = RE::no_arg 0086 // TODO: Allow more arguments? 0087 ); 0088 0089 // Set the "skip" regular expression. If after consuming some data, 0090 // a prefix of the input matches this RE, it is automatically 0091 // skipped. For example, a programming language scanner would use 0092 // a skip RE that matches white space and comments. 0093 // 0094 // scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/"); 0095 // 0096 // Skipping repeats as long as it succeeds. We used to let people do 0097 // this by writing "(...)*" in the regular expression, but that added 0098 // up to lots of recursive calls within the pcre library, so now we 0099 // control repetition explicitly via the function call API. 0100 // 0101 // You can pass NULL for "re" if you do not want any data to be skipped. 0102 void Skip(const char* re); // DEPRECATED; does *not* repeat 0103 void SetSkipExpression(const char* re); 0104 0105 // Temporarily pause "skip"ing. This 0106 // Skip("Foo"); code ; DisableSkip(); code; EnableSkip() 0107 // is similar to 0108 // Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo"); 0109 // but avoids creating/deleting new RE objects. 0110 void DisableSkip(); 0111 0112 // Reenable previously paused skipping. Any prefix of the input 0113 // that matches the skip pattern is immediately dropped. 0114 void EnableSkip(); 0115 0116 /***** Special wrappers around SetSkip() for some common idioms *****/ 0117 0118 // Arranges to skip whitespace, C comments, C++ comments. 0119 // The overall RE is a disjunction of the following REs: 0120 // \\s whitespace 0121 // //.*\n C++ comment 0122 // /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x) 0123 // We get repetition via the semantics of SetSkipExpression, not by using * 0124 void SkipCXXComments() { 0125 SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/"); 0126 } 0127 0128 void set_save_comments(bool comments) { 0129 save_comments_ = comments; 0130 } 0131 0132 bool save_comments() { 0133 return save_comments_; 0134 } 0135 0136 // Append to vector ranges the comments found in the 0137 // byte range [start,end] (inclusive) of the input data. 0138 // Only comments that were extracted entirely within that 0139 // range are returned: no range splitting of atomically-extracted 0140 // comments is performed. 0141 void GetComments(int start, int end, std::vector<StringPiece> *ranges); 0142 0143 // Append to vector ranges the comments added 0144 // since the last time this was called. This 0145 // functionality is provided for efficiency when 0146 // interleaving scanning with parsing. 0147 void GetNextComments(std::vector<StringPiece> *ranges); 0148 0149 private: 0150 std::string data_; // All the input data 0151 StringPiece input_; // Unprocessed input 0152 RE* skip_; // If non-NULL, RE for skipping input 0153 bool should_skip_; // If true, use skip_ 0154 bool skip_repeat_; // If true, repeat skip_ as long as it works 0155 bool save_comments_; // If true, aggregate the skip expression 0156 0157 // the skipped comments 0158 // TODO: later consider requiring that the StringPieces be added 0159 // in order by their start position 0160 std::vector<StringPiece> *comments_; 0161 0162 // the offset into comments_ that has been returned by GetNextComments 0163 int comments_offset_; 0164 0165 // helper function to consume *skip_ and honour 0166 // save_comments_ 0167 void ConsumeSkip(); 0168 }; 0169 0170 } // namespace pcrecpp 0171 0172 #endif /* _PCRE_SCANNER_H */
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |