Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2024-05-18 08:30:10

0001 // Copyright (c) 2005, Google Inc.
0002 // All rights reserved.
0003 //
0004 // Redistribution and use in source and binary forms, with or without
0005 // modification, are permitted provided that the following conditions are
0006 // met:
0007 //
0008 //     * Redistributions of source code must retain the above copyright
0009 // notice, this list of conditions and the following disclaimer.
0010 //     * Redistributions in binary form must reproduce the above
0011 // copyright notice, this list of conditions and the following disclaimer
0012 // in the documentation and/or other materials provided with the
0013 // distribution.
0014 //     * Neither the name of Google Inc. nor the names of its
0015 // contributors may be used to endorse or promote products derived from
0016 // this software without specific prior written permission.
0017 //
0018 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
0019 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
0020 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
0021 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
0022 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
0023 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0024 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
0025 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
0026 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0027 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
0028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0029 //
0030 // Author: Sanjay Ghemawat
0031 //
0032 // Regular-expression based scanner for parsing an input stream.
0033 //
0034 // Example 1: parse a sequence of "var = number" entries from input:
0035 //
0036 //      Scanner scanner(input);
0037 //      string var;
0038 //      int number;
0039 //      scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
0040 //      while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
0041 //        ...;
0042 //      }
0043 
0044 #ifndef _PCRE_SCANNER_H
0045 #define _PCRE_SCANNER_H
0046 
0047 #include <assert.h>
0048 #include <string>
0049 #include <vector>
0050 
0051 #include <pcrecpp.h>
0052 #include <pcre_stringpiece.h>
0053 
0054 namespace pcrecpp {
0055 
0056 class PCRECPP_EXP_DEFN Scanner {
0057  public:
0058   Scanner();
0059   explicit Scanner(const std::string& input);
0060   ~Scanner();
0061 
0062   // Return current line number.  The returned line-number is
0063   // one-based.  I.e. it returns 1 + the number of consumed newlines.
0064   //
0065   // Note: this method may be slow.  It may take time proportional to
0066   // the size of the input.
0067   int LineNumber() const;
0068 
0069   // Return the byte-offset that the scanner is looking in the
0070   // input data;
0071   int Offset() const;
0072 
0073   // Return true iff the start of the remaining input matches "re"
0074   bool LookingAt(const RE& re) const;
0075 
0076   // Return true iff all of the following are true
0077   //    a. the start of the remaining input matches "re",
0078   //    b. if any arguments are supplied, matched sub-patterns can be
0079   //       parsed and stored into the arguments.
0080   // If it returns true, it skips over the matched input and any
0081   // following input that matches the "skip" regular expression.
0082   bool Consume(const RE& re,
0083                const Arg& arg0 = RE::no_arg,
0084                const Arg& arg1 = RE::no_arg,
0085                const Arg& arg2 = RE::no_arg
0086                // TODO: Allow more arguments?
0087                );
0088 
0089   // Set the "skip" regular expression.  If after consuming some data,
0090   // a prefix of the input matches this RE, it is automatically
0091   // skipped.  For example, a programming language scanner would use
0092   // a skip RE that matches white space and comments.
0093   //
0094   //    scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
0095   //
0096   // Skipping repeats as long as it succeeds.  We used to let people do
0097   // this by writing "(...)*" in the regular expression, but that added
0098   // up to lots of recursive calls within the pcre library, so now we
0099   // control repetition explicitly via the function call API.
0100   //
0101   // You can pass NULL for "re" if you do not want any data to be skipped.
0102   void Skip(const char* re);   // DEPRECATED; does *not* repeat
0103   void SetSkipExpression(const char* re);
0104 
0105   // Temporarily pause "skip"ing. This
0106   //   Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
0107   // is similar to
0108   //   Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
0109   // but avoids creating/deleting new RE objects.
0110   void DisableSkip();
0111 
0112   // Reenable previously paused skipping.  Any prefix of the input
0113   // that matches the skip pattern is immediately dropped.
0114   void EnableSkip();
0115 
0116   /***** Special wrappers around SetSkip() for some common idioms *****/
0117 
0118   // Arranges to skip whitespace, C comments, C++ comments.
0119   // The overall RE is a disjunction of the following REs:
0120   //    \\s                     whitespace
0121   //    //.*\n                  C++ comment
0122   //    /[*](.|\n)*?[*]/        C comment (x*? means minimal repetitions of x)
0123   // We get repetition via the semantics of SetSkipExpression, not by using *
0124   void SkipCXXComments() {
0125     SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
0126   }
0127 
0128   void set_save_comments(bool comments) {
0129     save_comments_ = comments;
0130   }
0131 
0132   bool save_comments() {
0133     return save_comments_;
0134   }
0135 
0136   // Append to vector ranges the comments found in the
0137   // byte range [start,end] (inclusive) of the input data.
0138   // Only comments that were extracted entirely within that
0139   // range are returned: no range splitting of atomically-extracted
0140   // comments is performed.
0141   void GetComments(int start, int end, std::vector<StringPiece> *ranges);
0142 
0143   // Append to vector ranges the comments added
0144   // since the last time this was called. This
0145   // functionality is provided for efficiency when
0146   // interleaving scanning with parsing.
0147   void GetNextComments(std::vector<StringPiece> *ranges);
0148 
0149  private:
0150   std::string   data_;          // All the input data
0151   StringPiece   input_;         // Unprocessed input
0152   RE*           skip_;          // If non-NULL, RE for skipping input
0153   bool          should_skip_;   // If true, use skip_
0154   bool          skip_repeat_;   // If true, repeat skip_ as long as it works
0155   bool          save_comments_; // If true, aggregate the skip expression
0156 
0157   // the skipped comments
0158   // TODO: later consider requiring that the StringPieces be added
0159   // in order by their start position
0160   std::vector<StringPiece> *comments_;
0161 
0162   // the offset into comments_ that has been returned by GetNextComments
0163   int           comments_offset_;
0164 
0165   // helper function to consume *skip_ and honour
0166   // save_comments_
0167   void ConsumeSkip();
0168 };
0169 
0170 }   // namespace pcrecpp
0171 
0172 #endif /* _PCRE_SCANNER_H */