File indexing completed on 2025-12-25 09:44:09
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #if !defined(BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
0014 #define BOOST_CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
0015
0016 #include <string>
0017 #include <cstdio>
0018 #include <cstdarg>
0019 #if defined(BOOST_SPIRIT_DEBUG)
0020 #include <iostream>
0021 #endif
0022
0023 #include <boost/concept_check.hpp>
0024 #include <boost/assert.hpp>
0025
0026 #include <boost/wave/wave_config.hpp>
0027 #include <boost/wave/language_support.hpp>
0028 #include <boost/wave/token_ids.hpp>
0029 #include <boost/wave/util/file_position.hpp>
0030 #include <boost/wave/cpplexer/validate_universal_char.hpp>
0031 #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
0032 #include <boost/wave/cpplexer/token_cache.hpp>
0033 #include <boost/wave/cpplexer/convert_trigraphs.hpp>
0034
0035 #include <boost/wave/cpplexer/cpp_lex_interface.hpp>
0036 #include <boost/wave/cpplexer/re2clex/scanner.hpp>
0037 #include <boost/wave/cpplexer/re2clex/cpp_re.hpp>
0038 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
0039 #include <boost/wave/cpplexer/detect_include_guards.hpp>
0040 #endif
0041
0042 #include <boost/wave/cpplexer/cpp_lex_interface_generator.hpp>
0043
0044
0045 #ifdef BOOST_HAS_ABI_HEADERS
0046 #include BOOST_ABI_PREFIX
0047 #endif
0048
0049
0050 namespace boost {
0051 namespace wave {
0052 namespace cpplexer {
0053 namespace re2clex {
0054
0055
0056
0057
0058
0059
0060
0061 template <typename IteratorT,
0062 typename PositionT = boost::wave::util::file_position_type,
0063 typename TokenT = lex_token<PositionT> >
0064 class lexer
0065 {
0066 public:
0067 typedef TokenT token_type;
0068 typedef typename token_type::string_type string_type;
0069
0070 lexer(IteratorT const &first, IteratorT const &last,
0071 PositionT const &pos, boost::wave::language_support language_);
0072 ~lexer();
0073
0074 token_type& get(token_type&);
0075 void set_position(PositionT const &pos)
0076 {
0077
0078 filename = pos.get_file();
0079 scanner.line = pos.get_line();
0080
0081 scanner.file_name = filename.c_str();
0082 }
0083 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
0084 bool has_include_guards(std::string& guard_name) const
0085 {
0086 return guards.detected(guard_name);
0087 }
0088 #endif
0089
0090
0091 static int report_error(Scanner<IteratorT> const* s, int code, char const *, ...);
0092
0093 private:
0094 static char const *tok_names[];
0095
0096 Scanner<IteratorT> scanner;
0097 string_type filename;
0098 string_type value;
0099 bool at_eof;
0100 boost::wave::language_support language;
0101 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
0102 include_guards<token_type> guards;
0103 #endif
0104
0105 #if BOOST_WAVE_SUPPORT_THREADING == 0
0106 static token_cache<string_type> const cache;
0107 #else
0108 token_cache<string_type> const cache;
0109 #endif
0110 };
0111
0112
0113
0114 template <typename IteratorT, typename PositionT, typename TokenT>
0115 inline
0116 lexer<IteratorT, PositionT, TokenT>::lexer(IteratorT const &first,
0117 IteratorT const &last, PositionT const &pos,
0118 boost::wave::language_support language_)
0119 : scanner(first, last),
0120 filename(pos.get_file()), at_eof(false), language(language_)
0121 #if BOOST_WAVE_SUPPORT_THREADING != 0
0122 , cache()
0123 #endif
0124 {
0125 using namespace std;
0126 scanner.line = pos.get_line();
0127 scanner.column = scanner.curr_column = pos.get_column();
0128 scanner.error_proc = report_error;
0129 scanner.file_name = filename.c_str();
0130
0131 #if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
0132 scanner.enable_ms_extensions = true;
0133 #else
0134 scanner.enable_ms_extensions = false;
0135 #endif
0136
0137 #if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
0138 scanner.act_in_c99_mode = boost::wave::need_c99(language_);
0139 #endif
0140
0141 #if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
0142 scanner.enable_import_keyword = !boost::wave::need_c99(language_);
0143 #else
0144 scanner.enable_import_keyword = false;
0145 #endif
0146
0147 scanner.detect_pp_numbers = boost::wave::need_prefer_pp_numbers(language_);
0148 scanner.single_line_only = boost::wave::need_single_line(language_);
0149
0150 #if BOOST_WAVE_SUPPORT_CPP0X != 0
0151 scanner.act_in_cpp0x_mode = boost::wave::need_cpp0x(language_);
0152 #else
0153 scanner.act_in_cpp0x_mode = false;
0154 #endif
0155
0156 #if BOOST_WAVE_SUPPORT_CPP2A != 0
0157 scanner.act_in_cpp2a_mode = boost::wave::need_cpp2a(language_);
0158 scanner.act_in_cpp0x_mode = boost::wave::need_cpp2a(language_)
0159 || boost::wave::need_cpp0x(language_);
0160 #else
0161 scanner.act_in_cpp2a_mode = false;
0162 #endif
0163 }
0164
0165 template <typename IteratorT, typename PositionT, typename TokenT>
0166 inline
0167 lexer<IteratorT, PositionT, TokenT>::~lexer()
0168 {
0169 using namespace std;
0170 free(scanner.bot);
0171 }
0172
0173
0174
0175 template <typename IteratorT, typename PositionT, typename TokenT>
0176 inline TokenT&
0177 lexer<IteratorT, PositionT, TokenT>::get(TokenT& result)
0178 {
0179 if (at_eof)
0180 return result = token_type();
0181
0182 std::size_t actline = scanner.line;
0183 token_id id = token_id(scan(&scanner));
0184
0185 switch (id) {
0186 case T_IDENTIFIER:
0187
0188 value = string_type((char const *)scanner.tok,
0189 scanner.cur-scanner.tok);
0190 if (!boost::wave::need_no_character_validation(language))
0191 impl::validate_identifier_name(value, actline, scanner.column, filename);
0192 break;
0193
0194 case T_STRINGLIT:
0195 case T_CHARLIT:
0196 case T_RAWSTRINGLIT:
0197
0198 value = string_type((char const *)scanner.tok,
0199 scanner.cur-scanner.tok);
0200 if (boost::wave::need_convert_trigraphs(language))
0201 value = impl::convert_trigraphs(value);
0202 if (!boost::wave::need_no_character_validation(language))
0203 impl::validate_literal(value, actline, scanner.column, filename);
0204 break;
0205
0206 case T_PP_HHEADER:
0207 case T_PP_QHEADER:
0208 case T_PP_INCLUDE:
0209
0210 {
0211 value = string_type((char const *)scanner.tok,
0212 scanner.cur-scanner.tok);
0213
0214 #if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
0215
0216 typename string_type::size_type start = value.find("include");
0217 if (value.compare(start, 12, "include_next", 12) == 0)
0218 id = token_id(id | AltTokenType);
0219 #endif
0220 break;
0221 }
0222
0223 case T_LONGINTLIT:
0224 value = string_type((char const *)scanner.tok,
0225 scanner.cur-scanner.tok);
0226 if (!boost::wave::need_long_long(language)) {
0227
0228 BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal,
0229 value.c_str(), actline, scanner.column, filename.c_str());
0230 }
0231 break;
0232
0233 case T_OCTALINT:
0234 case T_DECIMALINT:
0235 case T_HEXAINT:
0236 case T_INTLIT:
0237 case T_FLOATLIT:
0238 case T_FIXEDPOINTLIT:
0239 case T_CCOMMENT:
0240 case T_CPPCOMMENT:
0241 case T_SPACE:
0242 case T_SPACE2:
0243 case T_ANY:
0244 case T_PP_NUMBER:
0245 value = string_type((char const *)scanner.tok,
0246 scanner.cur-scanner.tok);
0247 break;
0248
0249 case T_EOF:
0250
0251
0252 at_eof = true;
0253 value.clear();
0254 break;
0255
0256 case T_OR_TRIGRAPH:
0257 case T_XOR_TRIGRAPH:
0258 case T_LEFTBRACE_TRIGRAPH:
0259 case T_RIGHTBRACE_TRIGRAPH:
0260 case T_LEFTBRACKET_TRIGRAPH:
0261 case T_RIGHTBRACKET_TRIGRAPH:
0262 case T_COMPL_TRIGRAPH:
0263 case T_POUND_TRIGRAPH:
0264 if (boost::wave::need_convert_trigraphs(language)) {
0265 value = cache.get_token_value(BASEID_FROM_TOKEN(id));
0266 }
0267 else {
0268 value = string_type((char const *)scanner.tok,
0269 scanner.cur-scanner.tok);
0270 }
0271 break;
0272
0273 case T_ANY_TRIGRAPH:
0274 if (boost::wave::need_convert_trigraphs(language)) {
0275 value = impl::convert_trigraph(
0276 string_type((char const *)scanner.tok,
0277 scanner.cur-scanner.tok));
0278 }
0279 else {
0280 value = string_type((char const *)scanner.tok,
0281 scanner.cur-scanner.tok);
0282 }
0283 break;
0284
0285 default:
0286 if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) ||
0287 IS_CATEGORY(id, UnknownTokenType))
0288 {
0289 value = string_type((char const *)scanner.tok,
0290 scanner.cur-scanner.tok);
0291 }
0292 else {
0293 value = cache.get_token_value(id);
0294 }
0295 break;
0296 }
0297
0298
0299
0300
0301 result = token_type(id, value, PositionT(filename, actline, scanner.column));
0302
0303 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
0304 return guards.detect_guard(result);
0305 #else
0306 return result;
0307 #endif
0308 }
0309
0310 template <typename IteratorT, typename PositionT, typename TokenT>
0311 inline int
0312 lexer<IteratorT, PositionT, TokenT>::report_error(Scanner<IteratorT> const *s, int errcode,
0313 char const *msg, ...)
0314 {
0315 BOOST_ASSERT(0 != s);
0316 BOOST_ASSERT(0 != msg);
0317
0318 using namespace std;
0319
0320 constexpr std::size_t bufsize = 200;
0321 char buffer[bufsize];
0322 va_list params;
0323 va_start(params, msg);
0324 vsnprintf(buffer, bufsize, msg, params);
0325 va_end(params);
0326
0327 BOOST_WAVE_LEXER_THROW_VAR(lexing_exception, errcode, buffer, s->line,
0328 s->column, s->file_name);
0329
0330 return 0;
0331 }
0332
0333
0334
0335
0336
0337
0338
0339 template <typename IteratorT,
0340 typename PositionT = boost::wave::util::file_position_type,
0341 typename TokenT = typename lexer<IteratorT, PositionT>::token_type>
0342 class lex_functor
0343 : public lex_input_interface_generator<TokenT>
0344 {
0345 public:
0346 typedef TokenT token_type;
0347
0348 lex_functor(IteratorT const &first, IteratorT const &last,
0349 PositionT const &pos, boost::wave::language_support language)
0350 : re2c_lexer(first, last, pos, language)
0351 {}
0352 virtual ~lex_functor() {}
0353
0354
0355 token_type& get(token_type& result) BOOST_OVERRIDE { return re2c_lexer.get(result); }
0356 void set_position(PositionT const &pos) BOOST_OVERRIDE { re2c_lexer.set_position(pos); }
0357 #if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
0358 bool has_include_guards(std::string& guard_name) const BOOST_OVERRIDE
0359 { return re2c_lexer.has_include_guards(guard_name); }
0360 #endif
0361
0362 private:
0363 lexer<IteratorT, PositionT, TokenT> re2c_lexer;
0364 };
0365
0366 #if BOOST_WAVE_SUPPORT_THREADING == 0
0367
0368 template <typename IteratorT, typename PositionT, typename TokenT>
0369 token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type> const
0370 lexer<IteratorT, PositionT, TokenT>::cache =
0371 token_cache<typename lexer<IteratorT, PositionT, TokenT>::string_type>();
0372 #endif
0373
0374 }
0375
0376
0377
0378
0379
0380
0381
0382
0383
0384
0385
0386 #if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
0387 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
0388 #else
0389 #define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
0390 #endif
0391
0392
0393
0394
0395
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409 template <typename IteratorT, typename PositionT, typename TokenT>
0410 BOOST_WAVE_RE2C_NEW_LEXER_INLINE
0411 lex_input_interface<TokenT> *
0412 new_lexer_gen<IteratorT, PositionT, TokenT>::new_lexer(IteratorT const &first,
0413 IteratorT const &last, PositionT const &pos,
0414 boost::wave::language_support language)
0415 {
0416 using re2clex::lex_functor;
0417 return new lex_functor<IteratorT, PositionT, TokenT>(first, last, pos, language);
0418 }
0419
0420 #undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
0421
0422
0423 }
0424 }
0425 }
0426
0427
0428 #ifdef BOOST_HAS_ABI_HEADERS
0429 #include BOOST_ABI_SUFFIX
0430 #endif
0431
0432 #endif