|
||||
Warning, file /include/re2/filtered_re2.h was not indexed or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001 // Copyright 2009 The RE2 Authors. All Rights Reserved. 0002 // Use of this source code is governed by a BSD-style 0003 // license that can be found in the LICENSE file. 0004 0005 #ifndef RE2_FILTERED_RE2_H_ 0006 #define RE2_FILTERED_RE2_H_ 0007 0008 // The class FilteredRE2 is used as a wrapper to multiple RE2 regexps. 0009 // It provides a prefilter mechanism that helps in cutting down the 0010 // number of regexps that need to be actually searched. 0011 // 0012 // By design, it does not include a string matching engine. This is to 0013 // allow the user of the class to use their favorite string matching 0014 // engine. The overall flow is: Add all the regexps using Add, then 0015 // Compile the FilteredRE2. Compile returns strings that need to be 0016 // matched. Note that the returned strings are lowercased and distinct. 0017 // For applying regexps to a search text, the caller does the string 0018 // matching using the returned strings. When doing the string match, 0019 // note that the caller has to do that in a case-insensitive way or 0020 // on a lowercased version of the search text. Then call FirstMatch 0021 // or AllMatches with a vector of indices of strings that were found 0022 // in the text to get the actual regexp matches. 0023 0024 #include <memory> 0025 #include <string> 0026 #include <vector> 0027 0028 #include "absl/strings/string_view.h" 0029 #include "re2/re2.h" 0030 0031 namespace re2 { 0032 0033 class PrefilterTree; 0034 0035 class FilteredRE2 { 0036 public: 0037 FilteredRE2(); 0038 explicit FilteredRE2(int min_atom_len); 0039 ~FilteredRE2(); 0040 0041 // Not copyable. 0042 FilteredRE2(const FilteredRE2&) = delete; 0043 FilteredRE2& operator=(const FilteredRE2&) = delete; 0044 // Movable. 0045 FilteredRE2(FilteredRE2&& other); 0046 FilteredRE2& operator=(FilteredRE2&& other); 0047 0048 // Uses RE2 constructor to create a RE2 object (re). Returns 0049 // re->error_code(). If error_code is other than NoError, then re is 0050 // deleted and not added to re2_vec_. 0051 RE2::ErrorCode Add(absl::string_view pattern, 0052 const RE2::Options& options, 0053 int* id); 0054 0055 // Prepares the regexps added by Add for filtering. Returns a set 0056 // of strings that the caller should check for in candidate texts. 0057 // The returned strings are lowercased and distinct. When doing 0058 // string matching, it should be performed in a case-insensitive 0059 // way or the search text should be lowercased first. Call after 0060 // all Add calls are done. 0061 void Compile(std::vector<std::string>* strings_to_match); 0062 0063 // Returns the index of the first matching regexp. 0064 // Returns -1 on no match. Can be called prior to Compile. 0065 // Does not do any filtering: simply tries to Match the 0066 // regexps in a loop. 0067 int SlowFirstMatch(absl::string_view text) const; 0068 0069 // Returns the index of the first matching regexp. 0070 // Returns -1 on no match. Compile has to be called before 0071 // calling this. 0072 int FirstMatch(absl::string_view text, 0073 const std::vector<int>& atoms) const; 0074 0075 // Returns the indices of all matching regexps, after first clearing 0076 // matched_regexps. 0077 bool AllMatches(absl::string_view text, 0078 const std::vector<int>& atoms, 0079 std::vector<int>* matching_regexps) const; 0080 0081 // Returns the indices of all potentially matching regexps after first 0082 // clearing potential_regexps. 0083 // A regexp is potentially matching if it passes the filter. 0084 // If a regexp passes the filter it may still not match. 0085 // A regexp that does not pass the filter is guaranteed to not match. 0086 void AllPotentials(const std::vector<int>& atoms, 0087 std::vector<int>* potential_regexps) const; 0088 0089 // The number of regexps added. 0090 int NumRegexps() const { return static_cast<int>(re2_vec_.size()); } 0091 0092 // Get the individual RE2 objects. 0093 const RE2& GetRE2(int regexpid) const { return *re2_vec_[regexpid]; } 0094 0095 private: 0096 // Print prefilter. 0097 void PrintPrefilter(int regexpid); 0098 0099 // Useful for testing and debugging. 0100 void RegexpsGivenStrings(const std::vector<int>& matched_atoms, 0101 std::vector<int>* passed_regexps); 0102 0103 // All the regexps in the FilteredRE2. 0104 std::vector<RE2*> re2_vec_; 0105 0106 // Has the FilteredRE2 been compiled using Compile() 0107 bool compiled_; 0108 0109 // An AND-OR tree of string atoms used for filtering regexps. 0110 std::unique_ptr<PrefilterTree> prefilter_tree_; 0111 }; 0112 0113 } // namespace re2 0114 0115 #endif // RE2_FILTERED_RE2_H_
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |