![]() |
|
|||
File indexing completed on 2025-09-15 08:30:17
0001 // 0002 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) 0003 // 0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying 0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 0006 // 0007 // Official repository: https://github.com/boostorg/beast 0008 // 0009 0010 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP 0011 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP 0012 0013 #include <boost/beast/core/detail/config.hpp> 0014 #include <boost/beast/core/error.hpp> 0015 #include <boost/beast/core/string.hpp> 0016 #include <boost/beast/http/field.hpp> 0017 #include <boost/beast/http/verb.hpp> 0018 #include <boost/beast/http/detail/basic_parser.hpp> 0019 #include <boost/asio/buffer.hpp> 0020 #include <boost/optional.hpp> 0021 #include <boost/assert.hpp> 0022 #include <cstdint> 0023 #include <limits> 0024 #include <memory> 0025 #include <type_traits> 0026 #include <utility> 0027 0028 namespace boost { 0029 namespace beast { 0030 namespace http { 0031 0032 /** A parser for decoding HTTP/1 wire format messages. 0033 0034 This parser is designed to efficiently parse messages in the 0035 HTTP/1 wire format. It allocates no memory when input is 0036 presented as a single contiguous buffer, and uses minimal 0037 state. It will handle chunked encoding and it understands 0038 the semantics of the Connection, Content-Length, and Upgrade 0039 fields. 0040 The parser is optimized for the case where the input buffer 0041 sequence consists of a single contiguous buffer. The 0042 @ref beast::basic_flat_buffer class is provided, which guarantees 0043 that the input sequence of the stream buffer will be represented 0044 by exactly one contiguous buffer. To ensure the optimum performance 0045 of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms 0046 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some. 0047 Alternatively, the caller may use custom techniques to ensure that 0048 the structured portion of the HTTP message (header or chunk header) 0049 is contained in a linear buffer. 0050 0051 The interface to the parser uses virtual member functions. 0052 To use this class, derive your type from @ref basic_parser. When 0053 bytes are presented, the implementation will make a series of zero 0054 or more calls to virtual functions, which the derived class must 0055 implement. 0056 0057 Every virtual function must be provided by the derived class, 0058 or else a compilation error will be generated. The implementation 0059 will make sure that `ec` is clear before each virtual function 0060 is invoked. If a virtual function sets an error, it is propagated 0061 out of the parser to the caller. 0062 0063 @tparam isRequest A `bool` indicating whether the parser will be 0064 presented with request or response message. 0065 0066 @note If the parser encounters a field value with obs-fold 0067 longer than 4 kilobytes in length, an error is generated. 0068 */ 0069 template<bool isRequest> 0070 class basic_parser 0071 : private detail::basic_parser_base 0072 { 0073 boost::optional<std::uint64_t> 0074 body_limit_ = 0075 boost::optional<std::uint64_t>( 0076 default_body_limit(is_request{})); // max payload body 0077 std::uint64_t len_ = 0; // size of chunk or body 0078 std::uint64_t len0_ = 0; // content length if known 0079 std::unique_ptr<char[]> buf_; // temp storage 0080 std::size_t buf_len_ = 0; // size of buf_ 0081 std::uint32_t header_limit_ = 8192; // max header size 0082 unsigned short status_ = 0; // response status 0083 state state_ = state::nothing_yet; // initial state 0084 unsigned f_ = 0; // flags 0085 0086 // limit on the size of the stack flat buffer 0087 static std::size_t constexpr max_stack_buffer = 8192; 0088 0089 // Message will be complete after reading header 0090 static unsigned constexpr flagSkipBody = 1<< 0; 0091 0092 // Consume input buffers across semantic boundaries 0093 static unsigned constexpr flagEager = 1<< 1; 0094 0095 // The parser has read at least one byte 0096 static unsigned constexpr flagGotSome = 1<< 2; 0097 0098 // Message semantics indicate a body is expected. 0099 // cleared if flagSkipBody set 0100 // 0101 static unsigned constexpr flagHasBody = 1<< 3; 0102 0103 static unsigned constexpr flagHTTP11 = 1<< 4; 0104 static unsigned constexpr flagNeedEOF = 1<< 5; 0105 static unsigned constexpr flagExpectCRLF = 1<< 6; 0106 static unsigned constexpr flagConnectionClose = 1<< 7; 0107 static unsigned constexpr flagConnectionUpgrade = 1<< 8; 0108 static unsigned constexpr flagConnectionKeepAlive = 1<< 9; 0109 static unsigned constexpr flagContentLength = 1<< 10; 0110 static unsigned constexpr flagChunked = 1<< 11; 0111 static unsigned constexpr flagUpgrade = 1<< 12; 0112 0113 static constexpr 0114 std::uint64_t 0115 default_body_limit(std::true_type) 0116 { 0117 // limit for requests 0118 return 1 * 1024 * 1024; // 1MB 0119 } 0120 0121 static constexpr 0122 std::uint64_t 0123 default_body_limit(std::false_type) 0124 { 0125 // limit for responses 0126 return 8 * 1024 * 1024; // 8MB 0127 } 0128 0129 template<bool OtherIsRequest> 0130 friend class basic_parser; 0131 0132 #ifndef BOOST_BEAST_DOXYGEN 0133 friend class basic_parser_test; 0134 #endif 0135 0136 protected: 0137 /// Default constructor 0138 basic_parser() = default; 0139 0140 /** Move constructor 0141 0142 @note 0143 0144 After the move, the only valid operation on the 0145 moved-from object is destruction. 0146 */ 0147 basic_parser(basic_parser &&) = default; 0148 0149 /// Move assignment 0150 basic_parser& operator=(basic_parser &&) = default; 0151 0152 public: 0153 /// `true` if this parser parses requests, `false` for responses. 0154 using is_request = 0155 std::integral_constant<bool, isRequest>; 0156 0157 /// Destructor 0158 virtual ~basic_parser() = default; 0159 0160 /// Copy constructor 0161 basic_parser(basic_parser const&) = delete; 0162 0163 /// Copy assignment 0164 basic_parser& operator=(basic_parser const&) = delete; 0165 0166 /// Returns `true` if the parser has received at least one byte of input. 0167 bool 0168 got_some() const 0169 { 0170 return state_ != state::nothing_yet; 0171 } 0172 0173 /** Returns `true` if the message is complete. 0174 0175 The message is complete after the full header is prduced 0176 and one of the following is true: 0177 0178 @li The skip body option was set. 0179 0180 @li The semantics of the message indicate there is no body. 0181 0182 @li The semantics of the message indicate a body is expected, 0183 and the entire body was parsed. 0184 */ 0185 bool 0186 is_done() const 0187 { 0188 return state_ == state::complete; 0189 } 0190 0191 /** Returns `true` if a the parser has produced the full header. 0192 */ 0193 bool 0194 is_header_done() const 0195 { 0196 return state_ > state::fields; 0197 } 0198 0199 /** Returns `true` if the message is an upgrade message. 0200 0201 @note The return value is undefined unless 0202 @ref is_header_done would return `true`. 0203 */ 0204 bool 0205 upgrade() const 0206 { 0207 return (f_ & flagConnectionUpgrade) != 0; 0208 } 0209 0210 /** Returns `true` if the last value for Transfer-Encoding is "chunked". 0211 0212 @note The return value is undefined unless 0213 @ref is_header_done would return `true`. 0214 */ 0215 bool 0216 chunked() const 0217 { 0218 return (f_ & flagChunked) != 0; 0219 } 0220 0221 /** Returns `true` if the message has keep-alive connection semantics. 0222 0223 This function always returns `false` if @ref need_eof would return 0224 `false`. 0225 0226 @note The return value is undefined unless 0227 @ref is_header_done would return `true`. 0228 */ 0229 bool 0230 keep_alive() const; 0231 0232 /** Returns the optional value of Content-Length if known. 0233 0234 @note The return value is undefined unless 0235 @ref is_header_done would return `true`. 0236 */ 0237 boost::optional<std::uint64_t> 0238 content_length() const; 0239 0240 /** Returns the remaining content length if known 0241 0242 If the message header specifies a Content-Length, 0243 the return value will be the number of bytes remaining 0244 in the payload body have not yet been parsed. 0245 0246 @note The return value is undefined unless 0247 @ref is_header_done would return `true`. 0248 */ 0249 boost::optional<std::uint64_t> 0250 content_length_remaining() const; 0251 0252 /** Returns `true` if the message semantics require an end of file. 0253 0254 Depending on the contents of the header, the parser may 0255 require and end of file notification to know where the end 0256 of the body lies. If this function returns `true` it will be 0257 necessary to call @ref put_eof when there will never be additional 0258 data from the input. 0259 */ 0260 bool 0261 need_eof() const 0262 { 0263 return (f_ & flagNeedEOF) != 0; 0264 } 0265 0266 /** Set the limit on the payload body. 0267 0268 This function sets the maximum allowed size of the payload body, 0269 before any encodings except chunked have been removed. Depending 0270 on the message semantics, one of these cases will apply: 0271 0272 @li The Content-Length is specified and exceeds the limit. In 0273 this case the result @ref error::body_limit is returned 0274 immediately after the header is parsed. 0275 0276 @li The Content-Length is unspecified and the chunked encoding 0277 is not specified as the last encoding. In this case the end of 0278 message is determined by the end of file indicator on the 0279 associated stream or input source. If a sufficient number of 0280 body payload octets are presented to the parser to exceed the 0281 configured limit, the parse fails with the result 0282 @ref error::body_limit 0283 0284 @li The Transfer-Encoding specifies the chunked encoding as the 0285 last encoding. In this case, when the number of payload body 0286 octets produced by removing the chunked encoding exceeds 0287 the configured limit, the parse fails with the result 0288 @ref error::body_limit. 0289 0290 Setting the limit after any body octets have been parsed 0291 results in undefined behavior. 0292 0293 The default limit is 1MB for requests and 8MB for responses. 0294 0295 @param v An optional integral value representing the body limit. 0296 If this is equal to `boost::none`, then the body limit is disabled. 0297 */ 0298 void 0299 body_limit(boost::optional<std::uint64_t> v) 0300 { 0301 body_limit_ = v; 0302 } 0303 0304 /** Set a limit on the total size of the header. 0305 0306 This function sets the maximum allowed size of the header 0307 including all field name, value, and delimiter characters 0308 and also including the CRLF sequences in the serialized 0309 input. If the end of the header is not found within the 0310 limit of the header size, the error @ref error::header_limit 0311 is returned by @ref put. 0312 0313 Setting the limit after any header octets have been parsed 0314 results in undefined behavior. 0315 */ 0316 void 0317 header_limit(std::uint32_t v) 0318 { 0319 header_limit_ = v; 0320 } 0321 0322 /// Returns `true` if the eager parse option is set. 0323 bool 0324 eager() const 0325 { 0326 return (f_ & flagEager) != 0; 0327 } 0328 0329 /** Set the eager parse option. 0330 0331 Normally the parser returns after successfully parsing a structured 0332 element (header, chunk header, or chunk body) even if there are octets 0333 remaining in the input. This is necessary when attempting to parse the 0334 header first, or when the caller wants to inspect information which may 0335 be invalidated by subsequent parsing, such as a chunk extension. The 0336 `eager` option controls whether the parser keeps going after parsing 0337 structured element if there are octets remaining in the buffer and no 0338 error occurs. This option is automatically set or cleared during certain 0339 stream operations to improve performance with no change in functionality. 0340 0341 The default setting is `false`. 0342 0343 @param v `true` to set the eager parse option or `false` to disable it. 0344 */ 0345 void 0346 eager(bool v) 0347 { 0348 if(v) 0349 f_ |= flagEager; 0350 else 0351 f_ &= ~flagEager; 0352 } 0353 0354 /// Returns `true` if the skip parse option is set. 0355 bool 0356 skip() const 0357 { 0358 return (f_ & flagSkipBody) != 0; 0359 } 0360 0361 /** Set the skip parse option. 0362 0363 This option controls whether or not the parser expects to see an HTTP 0364 body, regardless of the presence or absence of certain fields such as 0365 Content-Length or a chunked Transfer-Encoding. Depending on the request, 0366 some responses do not carry a body. For example, a 200 response to a 0367 CONNECT request from a tunneling proxy, or a response to a HEAD request. 0368 In these cases, callers may use this function inform the parser that 0369 no body is expected. The parser will consider the message complete 0370 after the header has been received. 0371 0372 @param v `true` to set the skip body option or `false` to disable it. 0373 0374 @note This function must called before any bytes are processed. 0375 */ 0376 void 0377 skip(bool v); 0378 0379 /** Write a buffer sequence to the parser. 0380 0381 This function attempts to incrementally parse the HTTP 0382 message data stored in the caller provided buffers. Upon 0383 success, a positive return value indicates that the parser 0384 made forward progress, consuming that number of 0385 bytes. 0386 0387 In some cases there may be an insufficient number of octets 0388 in the input buffer in order to make forward progress. This 0389 is indicated by the code @ref error::need_more. When 0390 this happens, the caller should place additional bytes into 0391 the buffer sequence and call @ref put again. 0392 0393 The error code @ref error::need_more is special. When this 0394 error is returned, a subsequent call to @ref put may succeed 0395 if the buffers have been updated. Otherwise, upon error 0396 the parser may not be restarted. 0397 0398 @param buffers An object meeting the requirements of 0399 <em>ConstBufferSequence</em> that represents the next chunk of 0400 message data. If the length of this buffer sequence is 0401 one, the implementation will not allocate additional memory. 0402 The class @ref beast::basic_flat_buffer is provided as one way to 0403 meet this requirement 0404 0405 @param ec Set to the error, if any occurred. 0406 0407 @return The number of octets consumed in the buffer 0408 sequence. The caller should remove these octets even if the 0409 error is set. 0410 */ 0411 template<class ConstBufferSequence> 0412 std::size_t 0413 put(ConstBufferSequence const& buffers, error_code& ec); 0414 0415 #if ! BOOST_BEAST_DOXYGEN 0416 std::size_t 0417 put(net::const_buffer buffer, 0418 error_code& ec); 0419 #endif 0420 0421 /** Inform the parser that the end of stream was reached. 0422 0423 In certain cases, HTTP needs to know where the end of 0424 the stream is. For example, sometimes servers send 0425 responses without Content-Length and expect the client 0426 to consume input (for the body) until EOF. Callbacks 0427 and errors will still be processed as usual. 0428 0429 This is typically called when a read from the 0430 underlying stream object sets the error code to 0431 `net::error::eof`. 0432 0433 @note Only valid after parsing a complete header. 0434 0435 @param ec Set to the error, if any occurred. 0436 */ 0437 void 0438 put_eof(error_code& ec); 0439 0440 protected: 0441 /** Called after receiving the request-line. 0442 0443 This virtual function is invoked after receiving a request-line 0444 when parsing HTTP requests. 0445 It can only be called when `isRequest == true`. 0446 0447 @param method The verb enumeration. If the method string is not 0448 one of the predefined strings, this value will be @ref verb::unknown. 0449 0450 @param method_str The unmodified string representing the verb. 0451 0452 @param target The request-target. 0453 0454 @param version The HTTP-version. This will be 10 for HTTP/1.0, 0455 and 11 for HTTP/1.1. 0456 0457 @param ec An output parameter which the function may set to indicate 0458 an error. The error will be clear before this function is invoked. 0459 */ 0460 virtual 0461 void 0462 on_request_impl( 0463 verb method, 0464 string_view method_str, 0465 string_view target, 0466 int version, 0467 error_code& ec) = 0; 0468 0469 /** Called after receiving the status-line. 0470 0471 This virtual function is invoked after receiving a status-line 0472 when parsing HTTP responses. 0473 It can only be called when `isRequest == false`. 0474 0475 @param code The numeric status code. 0476 0477 @param reason The reason-phrase. Note that this value is 0478 now obsolete, and only provided for historical or diagnostic 0479 purposes. 0480 0481 @param version The HTTP-version. This will be 10 for HTTP/1.0, 0482 and 11 for HTTP/1.1. 0483 0484 @param ec An output parameter which the function may set to indicate 0485 an error. The error will be clear before this function is invoked. 0486 */ 0487 virtual 0488 void 0489 on_response_impl( 0490 int code, 0491 string_view reason, 0492 int version, 0493 error_code& ec) = 0; 0494 0495 /** Called once for each complete field in the HTTP header. 0496 0497 This virtual function is invoked for each field that is received 0498 while parsing an HTTP message. 0499 0500 @param name The known field enum value. If the name of the field 0501 is not recognized, this value will be @ref field::unknown. 0502 0503 @param name_string The exact name of the field as received from 0504 the input, represented as a string. 0505 0506 @param value A string holding the value of the field. 0507 0508 @param ec An output parameter which the function may set to indicate 0509 an error. The error will be clear before this function is invoked. 0510 */ 0511 virtual 0512 void 0513 on_field_impl( 0514 field name, 0515 string_view name_string, 0516 string_view value, 0517 error_code& ec) = 0; 0518 0519 /** Called once after the complete HTTP header is received. 0520 0521 This virtual function is invoked once, after the complete HTTP 0522 header is received while parsing a message. 0523 0524 @param ec An output parameter which the function may set to indicate 0525 an error. The error will be clear before this function is invoked. 0526 */ 0527 virtual 0528 void 0529 on_header_impl(error_code& ec) = 0; 0530 0531 /** Called once before the body is processed. 0532 0533 This virtual function is invoked once, before the content body is 0534 processed (but after the complete header is received). 0535 0536 @param content_length A value representing the content length in 0537 bytes if the length is known (this can include a zero length). 0538 Otherwise, the value will be `boost::none`. 0539 0540 @param ec An output parameter which the function may set to indicate 0541 an error. The error will be clear before this function is invoked. 0542 */ 0543 virtual 0544 void 0545 on_body_init_impl( 0546 boost::optional<std::uint64_t> const& content_length, 0547 error_code& ec) = 0; 0548 0549 /** Called each time additional data is received representing the content body. 0550 0551 This virtual function is invoked for each piece of the body which is 0552 received while parsing of a message. This function is only used when 0553 no chunked transfer encoding is present. 0554 0555 @param body A string holding the additional body contents. This may 0556 contain nulls or unprintable characters. 0557 0558 @param ec An output parameter which the function may set to indicate 0559 an error. The error will be clear before this function is invoked. 0560 0561 @see on_chunk_body_impl 0562 */ 0563 virtual 0564 std::size_t 0565 on_body_impl( 0566 string_view body, 0567 error_code& ec) = 0; 0568 0569 /** Called each time a new chunk header of a chunk encoded body is received. 0570 0571 This function is invoked each time a new chunk header is received. 0572 The function is only used when the chunked transfer encoding is present. 0573 0574 @param size The size of this chunk, in bytes. 0575 0576 @param extensions A string containing the entire chunk extensions. 0577 This may be empty, indicating no extensions are present. 0578 0579 @param ec An output parameter which the function may set to indicate 0580 an error. The error will be clear before this function is invoked. 0581 */ 0582 virtual 0583 void 0584 on_chunk_header_impl( 0585 std::uint64_t size, 0586 string_view extensions, 0587 error_code& ec) = 0; 0588 0589 /** Called each time additional data is received representing part of a body chunk. 0590 0591 This virtual function is invoked for each piece of the body which is 0592 received while parsing of a message. This function is only used when 0593 no chunked transfer encoding is present. 0594 0595 @param remain The number of bytes remaining in this chunk. This includes 0596 the contents of passed `body`. If this value is zero, then this represents 0597 the final chunk. 0598 0599 @param body A string holding the additional body contents. This may 0600 contain nulls or unprintable characters. 0601 0602 @param ec An output parameter which the function may set to indicate 0603 an error. The error will be clear before this function is invoked. 0604 0605 @return This function should return the number of bytes actually consumed 0606 from the `body` value. Any bytes that are not consumed on this call 0607 will be presented in a subsequent call. 0608 0609 @see on_body_impl 0610 */ 0611 virtual 0612 std::size_t 0613 on_chunk_body_impl( 0614 std::uint64_t remain, 0615 string_view body, 0616 error_code& ec) = 0; 0617 0618 /** Called once when the complete message is received. 0619 0620 This virtual function is invoked once, after successfully parsing 0621 a complete HTTP message. 0622 0623 @param ec An output parameter which the function may set to indicate 0624 an error. The error will be clear before this function is invoked. 0625 */ 0626 virtual 0627 void 0628 on_finish_impl(error_code& ec) = 0; 0629 0630 private: 0631 0632 boost::optional<std::uint64_t> 0633 content_length_unchecked() const; 0634 0635 template<class ConstBufferSequence> 0636 std::size_t 0637 put_from_stack( 0638 std::size_t size, 0639 ConstBufferSequence const& buffers, 0640 error_code& ec); 0641 0642 void 0643 inner_parse_start_line( 0644 char const*& p, char const* last, 0645 error_code& ec, std::true_type); 0646 0647 void 0648 inner_parse_start_line( 0649 char const*& p, char const* last, 0650 error_code& ec, std::false_type); 0651 0652 void 0653 parse_start_line( 0654 char const*& p, std::size_t n, 0655 error_code& ec); 0656 0657 void 0658 inner_parse_fields( 0659 char const*& p, char const* last, 0660 error_code& ec); 0661 0662 void 0663 parse_fields( 0664 char const*& p, std::size_t n, 0665 error_code& ec); 0666 0667 void 0668 finish_header( 0669 error_code& ec, std::true_type); 0670 0671 void 0672 finish_header( 0673 error_code& ec, std::false_type); 0674 0675 void 0676 parse_body(char const*& p, 0677 std::size_t n, error_code& ec); 0678 0679 void 0680 parse_body_to_eof(char const*& p, 0681 std::size_t n, error_code& ec); 0682 0683 void 0684 parse_chunk_header(char const*& p, 0685 std::size_t n, error_code& ec); 0686 0687 void 0688 parse_chunk_body(char const*& p, 0689 std::size_t n, error_code& ec); 0690 0691 void 0692 do_field(field f, 0693 string_view value, error_code& ec); 0694 }; 0695 0696 } // http 0697 } // beast 0698 } // boost 0699 0700 #include <boost/beast/http/impl/basic_parser.hpp> 0701 #ifdef BOOST_BEAST_HEADER_ONLY 0702 #include <boost/beast/http/impl/basic_parser.ipp> 0703 #endif 0704 0705 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |