Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-09-15 08:30:17

0001 //
0002 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 //
0007 // Official repository: https://github.com/boostorg/beast
0008 //
0009 
0010 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
0011 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
0012 
0013 #include <boost/beast/core/detail/config.hpp>
0014 #include <boost/beast/core/error.hpp>
0015 #include <boost/beast/core/string.hpp>
0016 #include <boost/beast/http/field.hpp>
0017 #include <boost/beast/http/verb.hpp>
0018 #include <boost/beast/http/detail/basic_parser.hpp>
0019 #include <boost/asio/buffer.hpp>
0020 #include <boost/optional.hpp>
0021 #include <boost/assert.hpp>
0022 #include <cstdint>
0023 #include <limits>
0024 #include <memory>
0025 #include <type_traits>
0026 #include <utility>
0027 
0028 namespace boost {
0029 namespace beast {
0030 namespace http {
0031 
0032 /** A parser for decoding HTTP/1 wire format messages.
0033 
0034     This parser is designed to efficiently parse messages in the
0035     HTTP/1 wire format. It allocates no memory when input is
0036     presented as a single contiguous buffer, and uses minimal
0037     state. It will handle chunked encoding and it understands
0038     the semantics of the Connection, Content-Length, and Upgrade
0039     fields.
0040     The parser is optimized for the case where the input buffer
0041     sequence consists of a single contiguous buffer. The
0042     @ref beast::basic_flat_buffer class is provided, which guarantees
0043     that the input sequence of the stream buffer will be represented
0044     by exactly one contiguous buffer. To ensure the optimum performance
0045     of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
0046     such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
0047     Alternatively, the caller may use custom techniques to ensure that
0048     the structured portion of the HTTP message (header or chunk header)
0049     is contained in a linear buffer.
0050 
0051     The interface to the parser uses virtual member functions.
0052     To use this class, derive your type from @ref basic_parser. When
0053     bytes are presented, the implementation will make a series of zero
0054     or more calls to virtual functions, which the derived class must
0055     implement.
0056 
0057     Every virtual function must be provided by the derived class,
0058     or else a compilation error will be generated. The implementation
0059     will make sure that `ec` is clear before each virtual function
0060     is invoked. If a virtual function sets an error, it is propagated
0061     out of the parser to the caller.
0062 
0063     @tparam isRequest A `bool` indicating whether the parser will be
0064     presented with request or response message.
0065 
0066     @note If the parser encounters a field value with obs-fold
0067     longer than 4 kilobytes in length, an error is generated.
0068 */
0069 template<bool isRequest>
0070 class basic_parser
0071     : private detail::basic_parser_base
0072 {
0073     boost::optional<std::uint64_t>
0074         body_limit_ =
0075             boost::optional<std::uint64_t>(
0076                 default_body_limit(is_request{}));   // max payload body
0077     std::uint64_t len_ = 0;                 // size of chunk or body
0078     std::uint64_t len0_ = 0;                // content length if known
0079     std::unique_ptr<char[]> buf_;           // temp storage
0080     std::size_t buf_len_ = 0;               // size of buf_
0081     std::uint32_t header_limit_ = 8192;     // max header size
0082     unsigned short status_ = 0;             // response status
0083     state state_ = state::nothing_yet;      // initial state
0084     unsigned f_ = 0;                        // flags
0085 
0086     // limit on the size of the stack flat buffer
0087     static std::size_t constexpr max_stack_buffer = 8192;
0088 
0089     // Message will be complete after reading header
0090     static unsigned constexpr flagSkipBody              = 1<<  0;
0091 
0092     // Consume input buffers across semantic boundaries
0093     static unsigned constexpr flagEager                 = 1<<  1;
0094 
0095     // The parser has read at least one byte
0096     static unsigned constexpr flagGotSome               = 1<<  2;
0097 
0098     // Message semantics indicate a body is expected.
0099     // cleared if flagSkipBody set
0100     //
0101     static unsigned constexpr flagHasBody               = 1<<  3;
0102 
0103     static unsigned constexpr flagHTTP11                = 1<<  4;
0104     static unsigned constexpr flagNeedEOF               = 1<<  5;
0105     static unsigned constexpr flagExpectCRLF            = 1<<  6;
0106     static unsigned constexpr flagConnectionClose       = 1<<  7;
0107     static unsigned constexpr flagConnectionUpgrade     = 1<<  8;
0108     static unsigned constexpr flagConnectionKeepAlive   = 1<<  9;
0109     static unsigned constexpr flagContentLength         = 1<< 10;
0110     static unsigned constexpr flagChunked               = 1<< 11;
0111     static unsigned constexpr flagUpgrade               = 1<< 12;
0112 
0113     static constexpr
0114     std::uint64_t
0115     default_body_limit(std::true_type)
0116     {
0117         // limit for requests
0118         return 1 * 1024 * 1024; // 1MB
0119     }
0120 
0121     static constexpr
0122     std::uint64_t
0123     default_body_limit(std::false_type)
0124     {
0125         // limit for responses
0126         return 8 * 1024 * 1024; // 8MB
0127     }
0128 
0129     template<bool OtherIsRequest>
0130     friend class basic_parser;
0131 
0132 #ifndef BOOST_BEAST_DOXYGEN
0133     friend class basic_parser_test;
0134 #endif
0135 
0136 protected:
0137     /// Default constructor
0138     basic_parser() = default;
0139 
0140     /** Move constructor
0141 
0142         @note
0143 
0144         After the move, the only valid operation on the
0145         moved-from object is destruction.
0146     */
0147     basic_parser(basic_parser &&) = default;
0148 
0149     /// Move assignment
0150     basic_parser& operator=(basic_parser &&) = default;
0151 
0152 public:
0153     /// `true` if this parser parses requests, `false` for responses.
0154     using is_request =
0155         std::integral_constant<bool, isRequest>;
0156 
0157     /// Destructor
0158     virtual ~basic_parser() = default;
0159 
0160     /// Copy constructor
0161     basic_parser(basic_parser const&) = delete;
0162 
0163     /// Copy assignment
0164     basic_parser& operator=(basic_parser const&) = delete;
0165 
0166     /// Returns `true` if the parser has received at least one byte of input.
0167     bool
0168     got_some() const
0169     {
0170         return state_ != state::nothing_yet;
0171     }
0172 
0173     /** Returns `true` if the message is complete.
0174 
0175         The message is complete after the full header is prduced
0176         and one of the following is true:
0177 
0178         @li The skip body option was set.
0179 
0180         @li The semantics of the message indicate there is no body.
0181 
0182         @li The semantics of the message indicate a body is expected,
0183         and the entire body was parsed.
0184     */
0185     bool
0186     is_done() const
0187     {
0188         return state_ == state::complete;
0189     }
0190 
0191     /** Returns `true` if a the parser has produced the full header.
0192     */
0193     bool
0194     is_header_done() const
0195     {
0196         return state_ > state::fields;
0197     }
0198 
0199     /** Returns `true` if the message is an upgrade message.
0200 
0201         @note The return value is undefined unless
0202         @ref is_header_done would return `true`.
0203     */
0204     bool
0205     upgrade() const
0206     {
0207         return (f_ & flagConnectionUpgrade) != 0;
0208     }
0209 
0210     /** Returns `true` if the last value for Transfer-Encoding is "chunked".
0211 
0212         @note The return value is undefined unless
0213         @ref is_header_done would return `true`.
0214     */
0215     bool
0216     chunked() const
0217     {
0218         return (f_ & flagChunked) != 0;
0219     }
0220 
0221     /** Returns `true` if the message has keep-alive connection semantics.
0222 
0223         This function always returns `false` if @ref need_eof would return
0224         `false`.
0225 
0226         @note The return value is undefined unless
0227         @ref is_header_done would return `true`.
0228     */
0229     bool
0230     keep_alive() const;
0231 
0232     /** Returns the optional value of Content-Length if known.
0233 
0234         @note The return value is undefined unless
0235         @ref is_header_done would return `true`.
0236     */
0237     boost::optional<std::uint64_t>
0238     content_length() const;
0239 
0240     /** Returns the remaining content length if known
0241 
0242         If the message header specifies a Content-Length,
0243         the return value will be the number of bytes remaining
0244         in the payload body have not yet been parsed.
0245 
0246         @note The return value is undefined unless
0247               @ref is_header_done would return `true`.
0248     */
0249     boost::optional<std::uint64_t>
0250     content_length_remaining() const;
0251 
0252     /** Returns `true` if the message semantics require an end of file.
0253 
0254         Depending on the contents of the header, the parser may
0255         require and end of file notification to know where the end
0256         of the body lies. If this function returns `true` it will be
0257         necessary to call @ref put_eof when there will never be additional
0258         data from the input.
0259     */
0260     bool
0261     need_eof() const
0262     {
0263         return (f_ & flagNeedEOF) != 0;
0264     }
0265 
0266     /** Set the limit on the payload body.
0267 
0268         This function sets the maximum allowed size of the payload body,
0269         before any encodings except chunked have been removed. Depending
0270         on the message semantics, one of these cases will apply:
0271 
0272         @li The Content-Length is specified and exceeds the limit. In
0273         this case the result @ref error::body_limit is returned
0274         immediately after the header is parsed.
0275 
0276         @li The Content-Length is unspecified and the chunked encoding
0277         is not specified as the last encoding. In this case the end of
0278         message is determined by the end of file indicator on the
0279         associated stream or input source. If a sufficient number of
0280         body payload octets are presented to the parser to exceed the
0281         configured limit, the parse fails with the result
0282         @ref error::body_limit
0283 
0284         @li The Transfer-Encoding specifies the chunked encoding as the
0285         last encoding. In this case, when the number of payload body
0286         octets produced by removing the chunked encoding  exceeds
0287         the configured limit, the parse fails with the result
0288         @ref error::body_limit.
0289         
0290         Setting the limit after any body octets have been parsed
0291         results in undefined behavior.
0292 
0293         The default limit is 1MB for requests and 8MB for responses.
0294 
0295         @param v An optional integral value representing the body limit.
0296         If this is equal to `boost::none`, then the body limit is disabled.
0297     */
0298     void
0299     body_limit(boost::optional<std::uint64_t> v)
0300     {
0301         body_limit_ = v;
0302     }
0303 
0304     /** Set a limit on the total size of the header.
0305 
0306         This function sets the maximum allowed size of the header
0307         including all field name, value, and delimiter characters
0308         and also including the CRLF sequences in the serialized
0309         input. If the end of the header is not found within the
0310         limit of the header size, the error @ref error::header_limit
0311         is returned by @ref put.
0312 
0313         Setting the limit after any header octets have been parsed
0314         results in undefined behavior.
0315     */
0316     void
0317     header_limit(std::uint32_t v)
0318     {
0319         header_limit_ = v;
0320     }
0321 
0322     /// Returns `true` if the eager parse option is set.
0323     bool
0324     eager() const
0325     {
0326         return (f_ & flagEager) != 0;
0327     }
0328 
0329     /** Set the eager parse option.
0330 
0331         Normally the parser returns after successfully parsing a structured
0332         element (header, chunk header, or chunk body) even if there are octets
0333         remaining in the input. This is necessary when attempting to parse the
0334         header first, or when the caller wants to inspect information which may
0335         be invalidated by subsequent parsing, such as a chunk extension. The
0336         `eager` option controls whether the parser keeps going after parsing
0337         structured element if there are octets remaining in the buffer and no
0338         error occurs. This option is automatically set or cleared during certain
0339         stream operations to improve performance with no change in functionality.
0340 
0341         The default setting is `false`.
0342 
0343         @param v `true` to set the eager parse option or `false` to disable it.
0344     */
0345     void
0346     eager(bool v)
0347     {
0348         if(v)
0349             f_ |= flagEager;
0350         else
0351             f_ &= ~flagEager;
0352     }
0353 
0354     /// Returns `true` if the skip parse option is set.
0355     bool
0356     skip() const
0357     {
0358         return (f_ & flagSkipBody) != 0;
0359     }
0360 
0361     /** Set the skip parse option.
0362 
0363         This option controls whether or not the parser expects to see an HTTP
0364         body, regardless of the presence or absence of certain fields such as
0365         Content-Length or a chunked Transfer-Encoding. Depending on the request,
0366         some responses do not carry a body. For example, a 200 response to a
0367         CONNECT request from a tunneling proxy, or a response to a HEAD request.
0368         In these cases, callers may use this function inform the parser that
0369         no body is expected. The parser will consider the message complete
0370         after the header has been received.
0371 
0372         @param v `true` to set the skip body option or `false` to disable it.
0373 
0374         @note This function must called before any bytes are processed.
0375     */
0376     void
0377     skip(bool v);
0378 
0379     /** Write a buffer sequence to the parser.
0380 
0381         This function attempts to incrementally parse the HTTP
0382         message data stored in the caller provided buffers. Upon
0383         success, a positive return value indicates that the parser
0384         made forward progress, consuming that number of
0385         bytes.
0386 
0387         In some cases there may be an insufficient number of octets
0388         in the input buffer in order to make forward progress. This
0389         is indicated by the code @ref error::need_more. When
0390         this happens, the caller should place additional bytes into
0391         the buffer sequence and call @ref put again.
0392 
0393         The error code @ref error::need_more is special. When this
0394         error is returned, a subsequent call to @ref put may succeed
0395         if the buffers have been updated. Otherwise, upon error
0396         the parser may not be restarted.
0397 
0398         @param buffers An object meeting the requirements of
0399         <em>ConstBufferSequence</em> that represents the next chunk of
0400         message data. If the length of this buffer sequence is
0401         one, the implementation will not allocate additional memory.
0402         The class @ref beast::basic_flat_buffer is provided as one way to
0403         meet this requirement
0404 
0405         @param ec Set to the error, if any occurred.
0406 
0407         @return The number of octets consumed in the buffer
0408         sequence. The caller should remove these octets even if the
0409         error is set.
0410     */
0411     template<class ConstBufferSequence>
0412     std::size_t
0413     put(ConstBufferSequence const& buffers, error_code& ec);
0414 
0415 #if ! BOOST_BEAST_DOXYGEN
0416     std::size_t
0417     put(net::const_buffer buffer,
0418         error_code& ec);
0419 #endif
0420 
0421     /** Inform the parser that the end of stream was reached.
0422 
0423         In certain cases, HTTP needs to know where the end of
0424         the stream is. For example, sometimes servers send
0425         responses without Content-Length and expect the client
0426         to consume input (for the body) until EOF. Callbacks
0427         and errors will still be processed as usual.
0428 
0429         This is typically called when a read from the
0430         underlying stream object sets the error code to
0431         `net::error::eof`.
0432 
0433         @note Only valid after parsing a complete header.
0434 
0435         @param ec Set to the error, if any occurred. 
0436     */
0437     void
0438     put_eof(error_code& ec);
0439 
0440 protected:
0441     /** Called after receiving the request-line.
0442 
0443         This virtual function is invoked after receiving a request-line
0444         when parsing HTTP requests.
0445         It can only be called when `isRequest == true`.
0446 
0447         @param method The verb enumeration. If the method string is not
0448         one of the predefined strings, this value will be @ref verb::unknown.
0449 
0450         @param method_str The unmodified string representing the verb.
0451 
0452         @param target The request-target.
0453 
0454         @param version The HTTP-version. This will be 10 for HTTP/1.0,
0455         and 11 for HTTP/1.1.
0456 
0457         @param ec An output parameter which the function may set to indicate
0458         an error. The error will be clear before this function is invoked.
0459     */
0460     virtual
0461     void
0462     on_request_impl(
0463         verb method,
0464         string_view method_str,
0465         string_view target,
0466         int version,
0467         error_code& ec) = 0;
0468 
0469     /** Called after receiving the status-line.
0470 
0471         This virtual function is invoked after receiving a status-line
0472         when parsing HTTP responses.
0473         It can only be called when `isRequest == false`.
0474 
0475         @param code The numeric status code.
0476 
0477         @param reason The reason-phrase. Note that this value is
0478         now obsolete, and only provided for historical or diagnostic
0479         purposes.
0480 
0481         @param version The HTTP-version. This will be 10 for HTTP/1.0,
0482         and 11 for HTTP/1.1.
0483 
0484         @param ec An output parameter which the function may set to indicate
0485         an error. The error will be clear before this function is invoked.
0486     */
0487     virtual
0488     void
0489     on_response_impl(
0490         int code,
0491         string_view reason,
0492         int version,
0493         error_code& ec) = 0;
0494 
0495     /** Called once for each complete field in the HTTP header.
0496 
0497         This virtual function is invoked for each field that is received
0498         while parsing an HTTP message.
0499 
0500         @param name The known field enum value. If the name of the field
0501         is not recognized, this value will be @ref field::unknown.
0502 
0503         @param name_string The exact name of the field as received from
0504         the input, represented as a string.
0505 
0506         @param value A string holding the value of the field.
0507 
0508         @param ec An output parameter which the function may set to indicate
0509         an error. The error will be clear before this function is invoked.
0510     */
0511     virtual
0512     void
0513     on_field_impl(
0514         field name,
0515         string_view name_string,
0516         string_view value,
0517         error_code& ec) = 0;
0518 
0519     /** Called once after the complete HTTP header is received.
0520 
0521         This virtual function is invoked once, after the complete HTTP
0522         header is received while parsing a message.
0523 
0524         @param ec An output parameter which the function may set to indicate
0525         an error. The error will be clear before this function is invoked.
0526     */
0527     virtual
0528     void
0529     on_header_impl(error_code& ec) = 0;
0530 
0531     /** Called once before the body is processed.
0532 
0533         This virtual function is invoked once, before the content body is
0534         processed (but after the complete header is received).
0535 
0536         @param content_length A value representing the content length in
0537         bytes if the length is known (this can include a zero length).
0538         Otherwise, the value will be `boost::none`.
0539 
0540         @param ec An output parameter which the function may set to indicate
0541         an error. The error will be clear before this function is invoked.
0542     */
0543     virtual
0544     void
0545     on_body_init_impl(
0546         boost::optional<std::uint64_t> const& content_length,
0547         error_code& ec) = 0;
0548 
0549     /** Called each time additional data is received representing the content body.
0550 
0551         This virtual function is invoked for each piece of the body which is
0552         received while parsing of a message. This function is only used when
0553         no chunked transfer encoding is present.
0554 
0555         @param body A string holding the additional body contents. This may
0556         contain nulls or unprintable characters.
0557 
0558         @param ec An output parameter which the function may set to indicate
0559         an error. The error will be clear before this function is invoked.
0560 
0561         @see on_chunk_body_impl
0562     */
0563     virtual
0564     std::size_t
0565     on_body_impl(
0566         string_view body,
0567         error_code& ec) = 0;
0568 
0569     /** Called each time a new chunk header of a chunk encoded body is received.
0570 
0571         This function is invoked each time a new chunk header is received.
0572         The function is only used when the chunked transfer encoding is present.
0573 
0574         @param size The size of this chunk, in bytes.
0575 
0576         @param extensions A string containing the entire chunk extensions.
0577         This may be empty, indicating no extensions are present.
0578 
0579         @param ec An output parameter which the function may set to indicate
0580         an error. The error will be clear before this function is invoked.
0581     */
0582     virtual
0583     void
0584     on_chunk_header_impl(
0585         std::uint64_t size,
0586         string_view extensions,
0587         error_code& ec) = 0;
0588 
0589     /** Called each time additional data is received representing part of a body chunk.
0590 
0591         This virtual function is invoked for each piece of the body which is
0592         received while parsing of a message. This function is only used when
0593         no chunked transfer encoding is present.
0594 
0595         @param remain The number of bytes remaining in this chunk. This includes
0596         the contents of passed `body`. If this value is zero, then this represents
0597         the final chunk.
0598 
0599         @param body A string holding the additional body contents. This may
0600         contain nulls or unprintable characters.
0601 
0602         @param ec An output parameter which the function may set to indicate
0603         an error. The error will be clear before this function is invoked.
0604 
0605         @return This function should return the number of bytes actually consumed
0606         from the `body` value. Any bytes that are not consumed on this call
0607         will be presented in a subsequent call.
0608 
0609         @see on_body_impl
0610     */
0611     virtual
0612     std::size_t
0613     on_chunk_body_impl(
0614         std::uint64_t remain,
0615         string_view body,
0616         error_code& ec) = 0;
0617 
0618     /** Called once when the complete message is received.
0619 
0620         This virtual function is invoked once, after successfully parsing
0621         a complete HTTP message.
0622 
0623         @param ec An output parameter which the function may set to indicate
0624         an error. The error will be clear before this function is invoked.
0625     */
0626     virtual
0627     void
0628     on_finish_impl(error_code& ec) = 0;
0629 
0630 private:
0631 
0632     boost::optional<std::uint64_t>
0633     content_length_unchecked() const;
0634 
0635     template<class ConstBufferSequence>
0636     std::size_t
0637     put_from_stack(
0638         std::size_t size,
0639         ConstBufferSequence const& buffers,
0640         error_code& ec);
0641 
0642     void
0643     inner_parse_start_line(
0644         char const*& p, char const* last,
0645             error_code& ec, std::true_type);
0646 
0647     void
0648     inner_parse_start_line(
0649         char const*& p, char const* last,
0650             error_code& ec, std::false_type);
0651 
0652     void
0653     parse_start_line(
0654         char const*& p, std::size_t n,
0655             error_code& ec);
0656 
0657     void
0658     inner_parse_fields(
0659         char const*& p, char const* last,
0660             error_code& ec);
0661 
0662     void
0663     parse_fields(
0664         char const*& p, std::size_t n,
0665             error_code& ec);
0666 
0667     void
0668     finish_header(
0669         error_code& ec, std::true_type);
0670 
0671     void
0672     finish_header(
0673         error_code& ec, std::false_type);
0674 
0675     void
0676     parse_body(char const*& p,
0677         std::size_t n, error_code& ec);
0678 
0679     void
0680     parse_body_to_eof(char const*& p,
0681         std::size_t n, error_code& ec);
0682 
0683     void
0684     parse_chunk_header(char const*& p,
0685         std::size_t n, error_code& ec);
0686 
0687     void
0688     parse_chunk_body(char const*& p,
0689         std::size_t n, error_code& ec);
0690 
0691     void
0692     do_field(field f,
0693         string_view value, error_code& ec);
0694 };
0695 
0696 } // http
0697 } // beast
0698 } // boost
0699 
0700 #include <boost/beast/http/impl/basic_parser.hpp>
0701 #ifdef BOOST_BEAST_HEADER_ONLY
0702 #include <boost/beast/http/impl/basic_parser.ipp>
0703 #endif
0704 
0705 #endif