|
||||
File indexing completed on 2025-01-18 09:29:31
0001 // 0002 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com) 0003 // 0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying 0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) 0006 // 0007 // Official repository: https://github.com/boostorg/beast 0008 // 0009 0010 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP 0011 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP 0012 0013 #include <boost/beast/core/detail/config.hpp> 0014 #include <boost/beast/core/error.hpp> 0015 #include <boost/beast/core/string.hpp> 0016 #include <boost/beast/http/field.hpp> 0017 #include <boost/beast/http/verb.hpp> 0018 #include <boost/beast/http/detail/basic_parser.hpp> 0019 #include <boost/asio/buffer.hpp> 0020 #include <boost/optional.hpp> 0021 #include <boost/assert.hpp> 0022 #include <cstdint> 0023 #include <limits> 0024 #include <memory> 0025 #include <type_traits> 0026 #include <utility> 0027 0028 namespace boost { 0029 namespace beast { 0030 namespace http { 0031 0032 /** A parser for decoding HTTP/1 wire format messages. 0033 0034 This parser is designed to efficiently parse messages in the 0035 HTTP/1 wire format. It allocates no memory when input is 0036 presented as a single contiguous buffer, and uses minimal 0037 state. It will handle chunked encoding and it understands 0038 the semantics of the Connection, Content-Length, and Upgrade 0039 fields. 0040 The parser is optimized for the case where the input buffer 0041 sequence consists of a single contiguous buffer. The 0042 @ref beast::basic_flat_buffer class is provided, which guarantees 0043 that the input sequence of the stream buffer will be represented 0044 by exactly one contiguous buffer. To ensure the optimum performance 0045 of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms 0046 such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some. 0047 Alternatively, the caller may use custom techniques to ensure that 0048 the structured portion of the HTTP message (header or chunk header) 0049 is contained in a linear buffer. 0050 0051 The interface to the parser uses virtual member functions. 0052 To use this class, derive your type from @ref basic_parser. When 0053 bytes are presented, the implementation will make a series of zero 0054 or more calls to virtual functions, which the derived class must 0055 implement. 0056 0057 Every virtual function must be provided by the derived class, 0058 or else a compilation error will be generated. The implementation 0059 will make sure that `ec` is clear before each virtual function 0060 is invoked. If a virtual function sets an error, it is propagated 0061 out of the parser to the caller. 0062 0063 @tparam isRequest A `bool` indicating whether the parser will be 0064 presented with request or response message. 0065 0066 @note If the parser encounters a field value with obs-fold 0067 longer than 4 kilobytes in length, an error is generated. 0068 */ 0069 template<bool isRequest> 0070 class basic_parser 0071 : private detail::basic_parser_base 0072 { 0073 boost::optional<std::uint64_t> 0074 body_limit_ = 0075 boost::optional<std::uint64_t>( 0076 default_body_limit(is_request{})); // max payload body 0077 std::uint64_t len_ = 0; // size of chunk or body 0078 std::uint64_t len0_ = 0; // content length if known 0079 std::unique_ptr<char[]> buf_; // temp storage 0080 std::size_t buf_len_ = 0; // size of buf_ 0081 std::size_t skip_ = 0; // resume search here 0082 std::uint32_t header_limit_ = 8192; // max header size 0083 unsigned short status_ = 0; // response status 0084 state state_ = state::nothing_yet; // initial state 0085 unsigned f_ = 0; // flags 0086 0087 // limit on the size of the stack flat buffer 0088 static std::size_t constexpr max_stack_buffer = 8192; 0089 0090 // Message will be complete after reading header 0091 static unsigned constexpr flagSkipBody = 1<< 0; 0092 0093 // Consume input buffers across semantic boundaries 0094 static unsigned constexpr flagEager = 1<< 1; 0095 0096 // The parser has read at least one byte 0097 static unsigned constexpr flagGotSome = 1<< 2; 0098 0099 // Message semantics indicate a body is expected. 0100 // cleared if flagSkipBody set 0101 // 0102 static unsigned constexpr flagHasBody = 1<< 3; 0103 0104 static unsigned constexpr flagHTTP11 = 1<< 4; 0105 static unsigned constexpr flagNeedEOF = 1<< 5; 0106 static unsigned constexpr flagExpectCRLF = 1<< 6; 0107 static unsigned constexpr flagConnectionClose = 1<< 7; 0108 static unsigned constexpr flagConnectionUpgrade = 1<< 8; 0109 static unsigned constexpr flagConnectionKeepAlive = 1<< 9; 0110 static unsigned constexpr flagContentLength = 1<< 10; 0111 static unsigned constexpr flagChunked = 1<< 11; 0112 static unsigned constexpr flagUpgrade = 1<< 12; 0113 static unsigned constexpr flagFinalChunk = 1<< 13; 0114 0115 static constexpr 0116 std::uint64_t 0117 default_body_limit(std::true_type) 0118 { 0119 // limit for requests 0120 return 1 * 1024 * 1024; // 1MB 0121 } 0122 0123 static constexpr 0124 std::uint64_t 0125 default_body_limit(std::false_type) 0126 { 0127 // limit for responses 0128 return 8 * 1024 * 1024; // 8MB 0129 } 0130 0131 template<bool OtherIsRequest> 0132 friend class basic_parser; 0133 0134 #ifndef BOOST_BEAST_DOXYGEN 0135 friend class basic_parser_test; 0136 #endif 0137 0138 protected: 0139 /// Default constructor 0140 basic_parser() = default; 0141 0142 /** Move constructor 0143 0144 @note 0145 0146 After the move, the only valid operation on the 0147 moved-from object is destruction. 0148 */ 0149 basic_parser(basic_parser &&) = default; 0150 0151 /// Move assignment 0152 basic_parser& operator=(basic_parser &&) = default; 0153 0154 public: 0155 /// `true` if this parser parses requests, `false` for responses. 0156 using is_request = 0157 std::integral_constant<bool, isRequest>; 0158 0159 /// Destructor 0160 virtual ~basic_parser() = default; 0161 0162 /// Copy constructor 0163 basic_parser(basic_parser const&) = delete; 0164 0165 /// Copy assignment 0166 basic_parser& operator=(basic_parser const&) = delete; 0167 0168 /// Returns `true` if the parser has received at least one byte of input. 0169 bool 0170 got_some() const 0171 { 0172 return state_ != state::nothing_yet; 0173 } 0174 0175 /** Returns `true` if the message is complete. 0176 0177 The message is complete after the full header is prduced 0178 and one of the following is true: 0179 0180 @li The skip body option was set. 0181 0182 @li The semantics of the message indicate there is no body. 0183 0184 @li The semantics of the message indicate a body is expected, 0185 and the entire body was parsed. 0186 */ 0187 bool 0188 is_done() const 0189 { 0190 return state_ == state::complete; 0191 } 0192 0193 /** Returns `true` if a the parser has produced the full header. 0194 */ 0195 bool 0196 is_header_done() const 0197 { 0198 return state_ > state::fields; 0199 } 0200 0201 /** Returns `true` if the message is an upgrade message. 0202 0203 @note The return value is undefined unless 0204 @ref is_header_done would return `true`. 0205 */ 0206 bool 0207 upgrade() const 0208 { 0209 return (f_ & flagConnectionUpgrade) != 0; 0210 } 0211 0212 /** Returns `true` if the last value for Transfer-Encoding is "chunked". 0213 0214 @note The return value is undefined unless 0215 @ref is_header_done would return `true`. 0216 */ 0217 bool 0218 chunked() const 0219 { 0220 return (f_ & flagChunked) != 0; 0221 } 0222 0223 /** Returns `true` if the message has keep-alive connection semantics. 0224 0225 This function always returns `false` if @ref need_eof would return 0226 `false`. 0227 0228 @note The return value is undefined unless 0229 @ref is_header_done would return `true`. 0230 */ 0231 bool 0232 keep_alive() const; 0233 0234 /** Returns the optional value of Content-Length if known. 0235 0236 @note The return value is undefined unless 0237 @ref is_header_done would return `true`. 0238 */ 0239 boost::optional<std::uint64_t> 0240 content_length() const; 0241 0242 /** Returns the remaining content length if known 0243 0244 If the message header specifies a Content-Length, 0245 the return value will be the number of bytes remaining 0246 in the payload body have not yet been parsed. 0247 0248 @note The return value is undefined unless 0249 @ref is_header_done would return `true`. 0250 */ 0251 boost::optional<std::uint64_t> 0252 content_length_remaining() const; 0253 0254 /** Returns `true` if the message semantics require an end of file. 0255 0256 Depending on the contents of the header, the parser may 0257 require and end of file notification to know where the end 0258 of the body lies. If this function returns `true` it will be 0259 necessary to call @ref put_eof when there will never be additional 0260 data from the input. 0261 */ 0262 bool 0263 need_eof() const 0264 { 0265 return (f_ & flagNeedEOF) != 0; 0266 } 0267 0268 /** Set the limit on the payload body. 0269 0270 This function sets the maximum allowed size of the payload body, 0271 before any encodings except chunked have been removed. Depending 0272 on the message semantics, one of these cases will apply: 0273 0274 @li The Content-Length is specified and exceeds the limit. In 0275 this case the result @ref error::body_limit is returned 0276 immediately after the header is parsed. 0277 0278 @li The Content-Length is unspecified and the chunked encoding 0279 is not specified as the last encoding. In this case the end of 0280 message is determined by the end of file indicator on the 0281 associated stream or input source. If a sufficient number of 0282 body payload octets are presented to the parser to exceed the 0283 configured limit, the parse fails with the result 0284 @ref error::body_limit 0285 0286 @li The Transfer-Encoding specifies the chunked encoding as the 0287 last encoding. In this case, when the number of payload body 0288 octets produced by removing the chunked encoding exceeds 0289 the configured limit, the parse fails with the result 0290 @ref error::body_limit. 0291 0292 Setting the limit after any body octets have been parsed 0293 results in undefined behavior. 0294 0295 The default limit is 1MB for requests and 8MB for responses. 0296 0297 @param v An optional integral value representing the body limit. 0298 If this is equal to `boost::none`, then the body limit is disabled. 0299 */ 0300 void 0301 body_limit(boost::optional<std::uint64_t> v) 0302 { 0303 body_limit_ = v; 0304 } 0305 0306 /** Set a limit on the total size of the header. 0307 0308 This function sets the maximum allowed size of the header 0309 including all field name, value, and delimiter characters 0310 and also including the CRLF sequences in the serialized 0311 input. If the end of the header is not found within the 0312 limit of the header size, the error @ref error::header_limit 0313 is returned by @ref put. 0314 0315 Setting the limit after any header octets have been parsed 0316 results in undefined behavior. 0317 */ 0318 void 0319 header_limit(std::uint32_t v) 0320 { 0321 header_limit_ = v; 0322 } 0323 0324 /// Returns `true` if the eager parse option is set. 0325 bool 0326 eager() const 0327 { 0328 return (f_ & flagEager) != 0; 0329 } 0330 0331 /** Set the eager parse option. 0332 0333 Normally the parser returns after successfully parsing a structured 0334 element (header, chunk header, or chunk body) even if there are octets 0335 remaining in the input. This is necessary when attempting to parse the 0336 header first, or when the caller wants to inspect information which may 0337 be invalidated by subsequent parsing, such as a chunk extension. The 0338 `eager` option controls whether the parser keeps going after parsing 0339 structured element if there are octets remaining in the buffer and no 0340 error occurs. This option is automatically set or cleared during certain 0341 stream operations to improve performance with no change in functionality. 0342 0343 The default setting is `false`. 0344 0345 @param v `true` to set the eager parse option or `false` to disable it. 0346 */ 0347 void 0348 eager(bool v) 0349 { 0350 if(v) 0351 f_ |= flagEager; 0352 else 0353 f_ &= ~flagEager; 0354 } 0355 0356 /// Returns `true` if the skip parse option is set. 0357 bool 0358 skip() const 0359 { 0360 return (f_ & flagSkipBody) != 0; 0361 } 0362 0363 /** Set the skip parse option. 0364 0365 This option controls whether or not the parser expects to see an HTTP 0366 body, regardless of the presence or absence of certain fields such as 0367 Content-Length or a chunked Transfer-Encoding. Depending on the request, 0368 some responses do not carry a body. For example, a 200 response to a 0369 CONNECT request from a tunneling proxy, or a response to a HEAD request. 0370 In these cases, callers may use this function inform the parser that 0371 no body is expected. The parser will consider the message complete 0372 after the header has been received. 0373 0374 @param v `true` to set the skip body option or `false` to disable it. 0375 0376 @note This function must called before any bytes are processed. 0377 */ 0378 void 0379 skip(bool v); 0380 0381 /** Write a buffer sequence to the parser. 0382 0383 This function attempts to incrementally parse the HTTP 0384 message data stored in the caller provided buffers. Upon 0385 success, a positive return value indicates that the parser 0386 made forward progress, consuming that number of 0387 bytes. 0388 0389 In some cases there may be an insufficient number of octets 0390 in the input buffer in order to make forward progress. This 0391 is indicated by the code @ref error::need_more. When 0392 this happens, the caller should place additional bytes into 0393 the buffer sequence and call @ref put again. 0394 0395 The error code @ref error::need_more is special. When this 0396 error is returned, a subsequent call to @ref put may succeed 0397 if the buffers have been updated. Otherwise, upon error 0398 the parser may not be restarted. 0399 0400 @param buffers An object meeting the requirements of 0401 <em>ConstBufferSequence</em> that represents the next chunk of 0402 message data. If the length of this buffer sequence is 0403 one, the implementation will not allocate additional memory. 0404 The class @ref beast::basic_flat_buffer is provided as one way to 0405 meet this requirement 0406 0407 @param ec Set to the error, if any occurred. 0408 0409 @return The number of octets consumed in the buffer 0410 sequence. The caller should remove these octets even if the 0411 error is set. 0412 */ 0413 template<class ConstBufferSequence> 0414 std::size_t 0415 put(ConstBufferSequence const& buffers, error_code& ec); 0416 0417 #if ! BOOST_BEAST_DOXYGEN 0418 std::size_t 0419 put(net::const_buffer buffer, 0420 error_code& ec); 0421 #endif 0422 0423 /** Inform the parser that the end of stream was reached. 0424 0425 In certain cases, HTTP needs to know where the end of 0426 the stream is. For example, sometimes servers send 0427 responses without Content-Length and expect the client 0428 to consume input (for the body) until EOF. Callbacks 0429 and errors will still be processed as usual. 0430 0431 This is typically called when a read from the 0432 underlying stream object sets the error code to 0433 `net::error::eof`. 0434 0435 @note Only valid after parsing a complete header. 0436 0437 @param ec Set to the error, if any occurred. 0438 */ 0439 void 0440 put_eof(error_code& ec); 0441 0442 protected: 0443 /** Called after receiving the request-line. 0444 0445 This virtual function is invoked after receiving a request-line 0446 when parsing HTTP requests. 0447 It can only be called when `isRequest == true`. 0448 0449 @param method The verb enumeration. If the method string is not 0450 one of the predefined strings, this value will be @ref verb::unknown. 0451 0452 @param method_str The unmodified string representing the verb. 0453 0454 @param target The request-target. 0455 0456 @param version The HTTP-version. This will be 10 for HTTP/1.0, 0457 and 11 for HTTP/1.1. 0458 0459 @param ec An output parameter which the function may set to indicate 0460 an error. The error will be clear before this function is invoked. 0461 */ 0462 virtual 0463 void 0464 on_request_impl( 0465 verb method, 0466 string_view method_str, 0467 string_view target, 0468 int version, 0469 error_code& ec) = 0; 0470 0471 /** Called after receiving the status-line. 0472 0473 This virtual function is invoked after receiving a status-line 0474 when parsing HTTP responses. 0475 It can only be called when `isRequest == false`. 0476 0477 @param code The numeric status code. 0478 0479 @param reason The reason-phrase. Note that this value is 0480 now obsolete, and only provided for historical or diagnostic 0481 purposes. 0482 0483 @param version The HTTP-version. This will be 10 for HTTP/1.0, 0484 and 11 for HTTP/1.1. 0485 0486 @param ec An output parameter which the function may set to indicate 0487 an error. The error will be clear before this function is invoked. 0488 */ 0489 virtual 0490 void 0491 on_response_impl( 0492 int code, 0493 string_view reason, 0494 int version, 0495 error_code& ec) = 0; 0496 0497 /** Called once for each complete field in the HTTP header. 0498 0499 This virtual function is invoked for each field that is received 0500 while parsing an HTTP message. 0501 0502 @param name The known field enum value. If the name of the field 0503 is not recognized, this value will be @ref field::unknown. 0504 0505 @param name_string The exact name of the field as received from 0506 the input, represented as a string. 0507 0508 @param value A string holding the value of the field. 0509 0510 @param ec An output parameter which the function may set to indicate 0511 an error. The error will be clear before this function is invoked. 0512 */ 0513 virtual 0514 void 0515 on_field_impl( 0516 field name, 0517 string_view name_string, 0518 string_view value, 0519 error_code& ec) = 0; 0520 0521 /** Called once after the complete HTTP header is received. 0522 0523 This virtual function is invoked once, after the complete HTTP 0524 header is received while parsing a message. 0525 0526 @param ec An output parameter which the function may set to indicate 0527 an error. The error will be clear before this function is invoked. 0528 */ 0529 virtual 0530 void 0531 on_header_impl(error_code& ec) = 0; 0532 0533 /** Called once before the body is processed. 0534 0535 This virtual function is invoked once, before the content body is 0536 processed (but after the complete header is received). 0537 0538 @param content_length A value representing the content length in 0539 bytes if the length is known (this can include a zero length). 0540 Otherwise, the value will be `boost::none`. 0541 0542 @param ec An output parameter which the function may set to indicate 0543 an error. The error will be clear before this function is invoked. 0544 */ 0545 virtual 0546 void 0547 on_body_init_impl( 0548 boost::optional<std::uint64_t> const& content_length, 0549 error_code& ec) = 0; 0550 0551 /** Called each time additional data is received representing the content body. 0552 0553 This virtual function is invoked for each piece of the body which is 0554 received while parsing of a message. This function is only used when 0555 no chunked transfer encoding is present. 0556 0557 @param body A string holding the additional body contents. This may 0558 contain nulls or unprintable characters. 0559 0560 @param ec An output parameter which the function may set to indicate 0561 an error. The error will be clear before this function is invoked. 0562 0563 @see on_chunk_body_impl 0564 */ 0565 virtual 0566 std::size_t 0567 on_body_impl( 0568 string_view body, 0569 error_code& ec) = 0; 0570 0571 /** Called each time a new chunk header of a chunk encoded body is received. 0572 0573 This function is invoked each time a new chunk header is received. 0574 The function is only used when the chunked transfer encoding is present. 0575 0576 @param size The size of this chunk, in bytes. 0577 0578 @param extensions A string containing the entire chunk extensions. 0579 This may be empty, indicating no extensions are present. 0580 0581 @param ec An output parameter which the function may set to indicate 0582 an error. The error will be clear before this function is invoked. 0583 */ 0584 virtual 0585 void 0586 on_chunk_header_impl( 0587 std::uint64_t size, 0588 string_view extensions, 0589 error_code& ec) = 0; 0590 0591 /** Called each time additional data is received representing part of a body chunk. 0592 0593 This virtual function is invoked for each piece of the body which is 0594 received while parsing of a message. This function is only used when 0595 no chunked transfer encoding is present. 0596 0597 @param remain The number of bytes remaining in this chunk. This includes 0598 the contents of passed `body`. If this value is zero, then this represents 0599 the final chunk. 0600 0601 @param body A string holding the additional body contents. This may 0602 contain nulls or unprintable characters. 0603 0604 @param ec An output parameter which the function may set to indicate 0605 an error. The error will be clear before this function is invoked. 0606 0607 @return This function should return the number of bytes actually consumed 0608 from the `body` value. Any bytes that are not consumed on this call 0609 will be presented in a subsequent call. 0610 0611 @see on_body_impl 0612 */ 0613 virtual 0614 std::size_t 0615 on_chunk_body_impl( 0616 std::uint64_t remain, 0617 string_view body, 0618 error_code& ec) = 0; 0619 0620 /** Called once when the complete message is received. 0621 0622 This virtual function is invoked once, after successfully parsing 0623 a complete HTTP message. 0624 0625 @param ec An output parameter which the function may set to indicate 0626 an error. The error will be clear before this function is invoked. 0627 */ 0628 virtual 0629 void 0630 on_finish_impl(error_code& ec) = 0; 0631 0632 private: 0633 0634 boost::optional<std::uint64_t> 0635 content_length_unchecked() const; 0636 0637 template<class ConstBufferSequence> 0638 std::size_t 0639 put_from_stack( 0640 std::size_t size, 0641 ConstBufferSequence const& buffers, 0642 error_code& ec); 0643 0644 void 0645 maybe_need_more( 0646 char const* p, std::size_t n, 0647 error_code& ec); 0648 0649 void 0650 parse_start_line( 0651 char const*& p, char const* last, 0652 error_code& ec, std::true_type); 0653 0654 void 0655 parse_start_line( 0656 char const*& p, char const* last, 0657 error_code& ec, std::false_type); 0658 0659 void 0660 parse_fields( 0661 char const*& p, char const* last, 0662 error_code& ec); 0663 0664 void 0665 finish_header( 0666 error_code& ec, std::true_type); 0667 0668 void 0669 finish_header( 0670 error_code& ec, std::false_type); 0671 0672 void 0673 parse_body(char const*& p, 0674 std::size_t n, error_code& ec); 0675 0676 void 0677 parse_body_to_eof(char const*& p, 0678 std::size_t n, error_code& ec); 0679 0680 void 0681 parse_chunk_header(char const*& p, 0682 std::size_t n, error_code& ec); 0683 0684 void 0685 parse_chunk_body(char const*& p, 0686 std::size_t n, error_code& ec); 0687 0688 void 0689 do_field(field f, 0690 string_view value, error_code& ec); 0691 }; 0692 0693 } // http 0694 } // beast 0695 } // boost 0696 0697 #include <boost/beast/http/impl/basic_parser.hpp> 0698 #ifdef BOOST_BEAST_HEADER_ONLY 0699 #include <boost/beast/http/impl/basic_parser.ipp> 0700 #endif 0701 0702 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |