Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:29:31

0001 //
0002 // Copyright (c) 2016-2019 Vinnie Falco (vinnie dot falco at gmail dot com)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 //
0007 // Official repository: https://github.com/boostorg/beast
0008 //
0009 
0010 #ifndef BOOST_BEAST_HTTP_BASIC_PARSER_HPP
0011 #define BOOST_BEAST_HTTP_BASIC_PARSER_HPP
0012 
0013 #include <boost/beast/core/detail/config.hpp>
0014 #include <boost/beast/core/error.hpp>
0015 #include <boost/beast/core/string.hpp>
0016 #include <boost/beast/http/field.hpp>
0017 #include <boost/beast/http/verb.hpp>
0018 #include <boost/beast/http/detail/basic_parser.hpp>
0019 #include <boost/asio/buffer.hpp>
0020 #include <boost/optional.hpp>
0021 #include <boost/assert.hpp>
0022 #include <cstdint>
0023 #include <limits>
0024 #include <memory>
0025 #include <type_traits>
0026 #include <utility>
0027 
0028 namespace boost {
0029 namespace beast {
0030 namespace http {
0031 
0032 /** A parser for decoding HTTP/1 wire format messages.
0033 
0034     This parser is designed to efficiently parse messages in the
0035     HTTP/1 wire format. It allocates no memory when input is
0036     presented as a single contiguous buffer, and uses minimal
0037     state. It will handle chunked encoding and it understands
0038     the semantics of the Connection, Content-Length, and Upgrade
0039     fields.
0040     The parser is optimized for the case where the input buffer
0041     sequence consists of a single contiguous buffer. The
0042     @ref beast::basic_flat_buffer class is provided, which guarantees
0043     that the input sequence of the stream buffer will be represented
0044     by exactly one contiguous buffer. To ensure the optimum performance
0045     of the parser, use @ref beast::basic_flat_buffer with HTTP algorithms
0046     such as @ref read, @ref read_some, @ref async_read, and @ref async_read_some.
0047     Alternatively, the caller may use custom techniques to ensure that
0048     the structured portion of the HTTP message (header or chunk header)
0049     is contained in a linear buffer.
0050 
0051     The interface to the parser uses virtual member functions.
0052     To use this class, derive your type from @ref basic_parser. When
0053     bytes are presented, the implementation will make a series of zero
0054     or more calls to virtual functions, which the derived class must
0055     implement.
0056 
0057     Every virtual function must be provided by the derived class,
0058     or else a compilation error will be generated. The implementation
0059     will make sure that `ec` is clear before each virtual function
0060     is invoked. If a virtual function sets an error, it is propagated
0061     out of the parser to the caller.
0062 
0063     @tparam isRequest A `bool` indicating whether the parser will be
0064     presented with request or response message.
0065 
0066     @note If the parser encounters a field value with obs-fold
0067     longer than 4 kilobytes in length, an error is generated.
0068 */
0069 template<bool isRequest>
0070 class basic_parser
0071     : private detail::basic_parser_base
0072 {
0073     boost::optional<std::uint64_t>
0074         body_limit_ =
0075             boost::optional<std::uint64_t>(
0076                 default_body_limit(is_request{}));   // max payload body
0077     std::uint64_t len_ = 0;                 // size of chunk or body
0078     std::uint64_t len0_ = 0;                // content length if known
0079     std::unique_ptr<char[]> buf_;           // temp storage
0080     std::size_t buf_len_ = 0;               // size of buf_
0081     std::size_t skip_ = 0;                  // resume search here
0082     std::uint32_t header_limit_ = 8192;     // max header size
0083     unsigned short status_ = 0;             // response status
0084     state state_ = state::nothing_yet;      // initial state
0085     unsigned f_ = 0;                        // flags
0086 
0087     // limit on the size of the stack flat buffer
0088     static std::size_t constexpr max_stack_buffer = 8192;
0089 
0090     // Message will be complete after reading header
0091     static unsigned constexpr flagSkipBody              = 1<<  0;
0092 
0093     // Consume input buffers across semantic boundaries
0094     static unsigned constexpr flagEager                 = 1<<  1;
0095 
0096     // The parser has read at least one byte
0097     static unsigned constexpr flagGotSome               = 1<<  2;
0098 
0099     // Message semantics indicate a body is expected.
0100     // cleared if flagSkipBody set
0101     //
0102     static unsigned constexpr flagHasBody               = 1<<  3;
0103 
0104     static unsigned constexpr flagHTTP11                = 1<<  4;
0105     static unsigned constexpr flagNeedEOF               = 1<<  5;
0106     static unsigned constexpr flagExpectCRLF            = 1<<  6;
0107     static unsigned constexpr flagConnectionClose       = 1<<  7;
0108     static unsigned constexpr flagConnectionUpgrade     = 1<<  8;
0109     static unsigned constexpr flagConnectionKeepAlive   = 1<<  9;
0110     static unsigned constexpr flagContentLength         = 1<< 10;
0111     static unsigned constexpr flagChunked               = 1<< 11;
0112     static unsigned constexpr flagUpgrade               = 1<< 12;
0113     static unsigned constexpr flagFinalChunk            = 1<< 13;
0114 
0115     static constexpr
0116     std::uint64_t
0117     default_body_limit(std::true_type)
0118     {
0119         // limit for requests
0120         return 1 * 1024 * 1024; // 1MB
0121     }
0122 
0123     static constexpr
0124     std::uint64_t
0125     default_body_limit(std::false_type)
0126     {
0127         // limit for responses
0128         return 8 * 1024 * 1024; // 8MB
0129     }
0130 
0131     template<bool OtherIsRequest>
0132     friend class basic_parser;
0133 
0134 #ifndef BOOST_BEAST_DOXYGEN
0135     friend class basic_parser_test;
0136 #endif
0137 
0138 protected:
0139     /// Default constructor
0140     basic_parser() = default;
0141 
0142     /** Move constructor
0143 
0144         @note
0145 
0146         After the move, the only valid operation on the
0147         moved-from object is destruction.
0148     */
0149     basic_parser(basic_parser &&) = default;
0150 
0151     /// Move assignment
0152     basic_parser& operator=(basic_parser &&) = default;
0153 
0154 public:
0155     /// `true` if this parser parses requests, `false` for responses.
0156     using is_request =
0157         std::integral_constant<bool, isRequest>;
0158 
0159     /// Destructor
0160     virtual ~basic_parser() = default;
0161 
0162     /// Copy constructor
0163     basic_parser(basic_parser const&) = delete;
0164 
0165     /// Copy assignment
0166     basic_parser& operator=(basic_parser const&) = delete;
0167 
0168     /// Returns `true` if the parser has received at least one byte of input.
0169     bool
0170     got_some() const
0171     {
0172         return state_ != state::nothing_yet;
0173     }
0174 
0175     /** Returns `true` if the message is complete.
0176 
0177         The message is complete after the full header is prduced
0178         and one of the following is true:
0179 
0180         @li The skip body option was set.
0181 
0182         @li The semantics of the message indicate there is no body.
0183 
0184         @li The semantics of the message indicate a body is expected,
0185         and the entire body was parsed.
0186     */
0187     bool
0188     is_done() const
0189     {
0190         return state_ == state::complete;
0191     }
0192 
0193     /** Returns `true` if a the parser has produced the full header.
0194     */
0195     bool
0196     is_header_done() const
0197     {
0198         return state_ > state::fields;
0199     }
0200 
0201     /** Returns `true` if the message is an upgrade message.
0202 
0203         @note The return value is undefined unless
0204         @ref is_header_done would return `true`.
0205     */
0206     bool
0207     upgrade() const
0208     {
0209         return (f_ & flagConnectionUpgrade) != 0;
0210     }
0211 
0212     /** Returns `true` if the last value for Transfer-Encoding is "chunked".
0213 
0214         @note The return value is undefined unless
0215         @ref is_header_done would return `true`.
0216     */
0217     bool
0218     chunked() const
0219     {
0220         return (f_ & flagChunked) != 0;
0221     }
0222 
0223     /** Returns `true` if the message has keep-alive connection semantics.
0224 
0225         This function always returns `false` if @ref need_eof would return
0226         `false`.
0227 
0228         @note The return value is undefined unless
0229         @ref is_header_done would return `true`.
0230     */
0231     bool
0232     keep_alive() const;
0233 
0234     /** Returns the optional value of Content-Length if known.
0235 
0236         @note The return value is undefined unless
0237         @ref is_header_done would return `true`.
0238     */
0239     boost::optional<std::uint64_t>
0240     content_length() const;
0241 
0242     /** Returns the remaining content length if known
0243 
0244         If the message header specifies a Content-Length,
0245         the return value will be the number of bytes remaining
0246         in the payload body have not yet been parsed.
0247 
0248         @note The return value is undefined unless
0249               @ref is_header_done would return `true`.
0250     */
0251     boost::optional<std::uint64_t>
0252     content_length_remaining() const;
0253 
0254     /** Returns `true` if the message semantics require an end of file.
0255 
0256         Depending on the contents of the header, the parser may
0257         require and end of file notification to know where the end
0258         of the body lies. If this function returns `true` it will be
0259         necessary to call @ref put_eof when there will never be additional
0260         data from the input.
0261     */
0262     bool
0263     need_eof() const
0264     {
0265         return (f_ & flagNeedEOF) != 0;
0266     }
0267 
0268     /** Set the limit on the payload body.
0269 
0270         This function sets the maximum allowed size of the payload body,
0271         before any encodings except chunked have been removed. Depending
0272         on the message semantics, one of these cases will apply:
0273 
0274         @li The Content-Length is specified and exceeds the limit. In
0275         this case the result @ref error::body_limit is returned
0276         immediately after the header is parsed.
0277 
0278         @li The Content-Length is unspecified and the chunked encoding
0279         is not specified as the last encoding. In this case the end of
0280         message is determined by the end of file indicator on the
0281         associated stream or input source. If a sufficient number of
0282         body payload octets are presented to the parser to exceed the
0283         configured limit, the parse fails with the result
0284         @ref error::body_limit
0285 
0286         @li The Transfer-Encoding specifies the chunked encoding as the
0287         last encoding. In this case, when the number of payload body
0288         octets produced by removing the chunked encoding  exceeds
0289         the configured limit, the parse fails with the result
0290         @ref error::body_limit.
0291         
0292         Setting the limit after any body octets have been parsed
0293         results in undefined behavior.
0294 
0295         The default limit is 1MB for requests and 8MB for responses.
0296 
0297         @param v An optional integral value representing the body limit.
0298         If this is equal to `boost::none`, then the body limit is disabled.
0299     */
0300     void
0301     body_limit(boost::optional<std::uint64_t> v)
0302     {
0303         body_limit_ = v;
0304     }
0305 
0306     /** Set a limit on the total size of the header.
0307 
0308         This function sets the maximum allowed size of the header
0309         including all field name, value, and delimiter characters
0310         and also including the CRLF sequences in the serialized
0311         input. If the end of the header is not found within the
0312         limit of the header size, the error @ref error::header_limit
0313         is returned by @ref put.
0314 
0315         Setting the limit after any header octets have been parsed
0316         results in undefined behavior.
0317     */
0318     void
0319     header_limit(std::uint32_t v)
0320     {
0321         header_limit_ = v;
0322     }
0323 
0324     /// Returns `true` if the eager parse option is set.
0325     bool
0326     eager() const
0327     {
0328         return (f_ & flagEager) != 0;
0329     }
0330 
0331     /** Set the eager parse option.
0332 
0333         Normally the parser returns after successfully parsing a structured
0334         element (header, chunk header, or chunk body) even if there are octets
0335         remaining in the input. This is necessary when attempting to parse the
0336         header first, or when the caller wants to inspect information which may
0337         be invalidated by subsequent parsing, such as a chunk extension. The
0338         `eager` option controls whether the parser keeps going after parsing
0339         structured element if there are octets remaining in the buffer and no
0340         error occurs. This option is automatically set or cleared during certain
0341         stream operations to improve performance with no change in functionality.
0342 
0343         The default setting is `false`.
0344 
0345         @param v `true` to set the eager parse option or `false` to disable it.
0346     */
0347     void
0348     eager(bool v)
0349     {
0350         if(v)
0351             f_ |= flagEager;
0352         else
0353             f_ &= ~flagEager;
0354     }
0355 
0356     /// Returns `true` if the skip parse option is set.
0357     bool
0358     skip() const
0359     {
0360         return (f_ & flagSkipBody) != 0;
0361     }
0362 
0363     /** Set the skip parse option.
0364 
0365         This option controls whether or not the parser expects to see an HTTP
0366         body, regardless of the presence or absence of certain fields such as
0367         Content-Length or a chunked Transfer-Encoding. Depending on the request,
0368         some responses do not carry a body. For example, a 200 response to a
0369         CONNECT request from a tunneling proxy, or a response to a HEAD request.
0370         In these cases, callers may use this function inform the parser that
0371         no body is expected. The parser will consider the message complete
0372         after the header has been received.
0373 
0374         @param v `true` to set the skip body option or `false` to disable it.
0375 
0376         @note This function must called before any bytes are processed.
0377     */
0378     void
0379     skip(bool v);
0380 
0381     /** Write a buffer sequence to the parser.
0382 
0383         This function attempts to incrementally parse the HTTP
0384         message data stored in the caller provided buffers. Upon
0385         success, a positive return value indicates that the parser
0386         made forward progress, consuming that number of
0387         bytes.
0388 
0389         In some cases there may be an insufficient number of octets
0390         in the input buffer in order to make forward progress. This
0391         is indicated by the code @ref error::need_more. When
0392         this happens, the caller should place additional bytes into
0393         the buffer sequence and call @ref put again.
0394 
0395         The error code @ref error::need_more is special. When this
0396         error is returned, a subsequent call to @ref put may succeed
0397         if the buffers have been updated. Otherwise, upon error
0398         the parser may not be restarted.
0399 
0400         @param buffers An object meeting the requirements of
0401         <em>ConstBufferSequence</em> that represents the next chunk of
0402         message data. If the length of this buffer sequence is
0403         one, the implementation will not allocate additional memory.
0404         The class @ref beast::basic_flat_buffer is provided as one way to
0405         meet this requirement
0406 
0407         @param ec Set to the error, if any occurred.
0408 
0409         @return The number of octets consumed in the buffer
0410         sequence. The caller should remove these octets even if the
0411         error is set.
0412     */
0413     template<class ConstBufferSequence>
0414     std::size_t
0415     put(ConstBufferSequence const& buffers, error_code& ec);
0416 
0417 #if ! BOOST_BEAST_DOXYGEN
0418     std::size_t
0419     put(net::const_buffer buffer,
0420         error_code& ec);
0421 #endif
0422 
0423     /** Inform the parser that the end of stream was reached.
0424 
0425         In certain cases, HTTP needs to know where the end of
0426         the stream is. For example, sometimes servers send
0427         responses without Content-Length and expect the client
0428         to consume input (for the body) until EOF. Callbacks
0429         and errors will still be processed as usual.
0430 
0431         This is typically called when a read from the
0432         underlying stream object sets the error code to
0433         `net::error::eof`.
0434 
0435         @note Only valid after parsing a complete header.
0436 
0437         @param ec Set to the error, if any occurred. 
0438     */
0439     void
0440     put_eof(error_code& ec);
0441 
0442 protected:
0443     /** Called after receiving the request-line.
0444 
0445         This virtual function is invoked after receiving a request-line
0446         when parsing HTTP requests.
0447         It can only be called when `isRequest == true`.
0448 
0449         @param method The verb enumeration. If the method string is not
0450         one of the predefined strings, this value will be @ref verb::unknown.
0451 
0452         @param method_str The unmodified string representing the verb.
0453 
0454         @param target The request-target.
0455 
0456         @param version The HTTP-version. This will be 10 for HTTP/1.0,
0457         and 11 for HTTP/1.1.
0458 
0459         @param ec An output parameter which the function may set to indicate
0460         an error. The error will be clear before this function is invoked.
0461     */
0462     virtual
0463     void
0464     on_request_impl(
0465         verb method,
0466         string_view method_str,
0467         string_view target,
0468         int version,
0469         error_code& ec) = 0;
0470 
0471     /** Called after receiving the status-line.
0472 
0473         This virtual function is invoked after receiving a status-line
0474         when parsing HTTP responses.
0475         It can only be called when `isRequest == false`.
0476 
0477         @param code The numeric status code.
0478 
0479         @param reason The reason-phrase. Note that this value is
0480         now obsolete, and only provided for historical or diagnostic
0481         purposes.
0482 
0483         @param version The HTTP-version. This will be 10 for HTTP/1.0,
0484         and 11 for HTTP/1.1.
0485 
0486         @param ec An output parameter which the function may set to indicate
0487         an error. The error will be clear before this function is invoked.
0488     */
0489     virtual
0490     void
0491     on_response_impl(
0492         int code,
0493         string_view reason,
0494         int version,
0495         error_code& ec) = 0;
0496 
0497     /** Called once for each complete field in the HTTP header.
0498 
0499         This virtual function is invoked for each field that is received
0500         while parsing an HTTP message.
0501 
0502         @param name The known field enum value. If the name of the field
0503         is not recognized, this value will be @ref field::unknown.
0504 
0505         @param name_string The exact name of the field as received from
0506         the input, represented as a string.
0507 
0508         @param value A string holding the value of the field.
0509 
0510         @param ec An output parameter which the function may set to indicate
0511         an error. The error will be clear before this function is invoked.
0512     */
0513     virtual
0514     void
0515     on_field_impl(
0516         field name,
0517         string_view name_string,
0518         string_view value,
0519         error_code& ec) = 0;
0520 
0521     /** Called once after the complete HTTP header is received.
0522 
0523         This virtual function is invoked once, after the complete HTTP
0524         header is received while parsing a message.
0525 
0526         @param ec An output parameter which the function may set to indicate
0527         an error. The error will be clear before this function is invoked.
0528     */
0529     virtual
0530     void
0531     on_header_impl(error_code& ec) = 0;
0532 
0533     /** Called once before the body is processed.
0534 
0535         This virtual function is invoked once, before the content body is
0536         processed (but after the complete header is received).
0537 
0538         @param content_length A value representing the content length in
0539         bytes if the length is known (this can include a zero length).
0540         Otherwise, the value will be `boost::none`.
0541 
0542         @param ec An output parameter which the function may set to indicate
0543         an error. The error will be clear before this function is invoked.
0544     */
0545     virtual
0546     void
0547     on_body_init_impl(
0548         boost::optional<std::uint64_t> const& content_length,
0549         error_code& ec) = 0;
0550 
0551     /** Called each time additional data is received representing the content body.
0552 
0553         This virtual function is invoked for each piece of the body which is
0554         received while parsing of a message. This function is only used when
0555         no chunked transfer encoding is present.
0556 
0557         @param body A string holding the additional body contents. This may
0558         contain nulls or unprintable characters.
0559 
0560         @param ec An output parameter which the function may set to indicate
0561         an error. The error will be clear before this function is invoked.
0562 
0563         @see on_chunk_body_impl
0564     */
0565     virtual
0566     std::size_t
0567     on_body_impl(
0568         string_view body,
0569         error_code& ec) = 0;
0570 
0571     /** Called each time a new chunk header of a chunk encoded body is received.
0572 
0573         This function is invoked each time a new chunk header is received.
0574         The function is only used when the chunked transfer encoding is present.
0575 
0576         @param size The size of this chunk, in bytes.
0577 
0578         @param extensions A string containing the entire chunk extensions.
0579         This may be empty, indicating no extensions are present.
0580 
0581         @param ec An output parameter which the function may set to indicate
0582         an error. The error will be clear before this function is invoked.
0583     */
0584     virtual
0585     void
0586     on_chunk_header_impl(
0587         std::uint64_t size,
0588         string_view extensions,
0589         error_code& ec) = 0;
0590 
0591     /** Called each time additional data is received representing part of a body chunk.
0592 
0593         This virtual function is invoked for each piece of the body which is
0594         received while parsing of a message. This function is only used when
0595         no chunked transfer encoding is present.
0596 
0597         @param remain The number of bytes remaining in this chunk. This includes
0598         the contents of passed `body`. If this value is zero, then this represents
0599         the final chunk.
0600 
0601         @param body A string holding the additional body contents. This may
0602         contain nulls or unprintable characters.
0603 
0604         @param ec An output parameter which the function may set to indicate
0605         an error. The error will be clear before this function is invoked.
0606 
0607         @return This function should return the number of bytes actually consumed
0608         from the `body` value. Any bytes that are not consumed on this call
0609         will be presented in a subsequent call.
0610 
0611         @see on_body_impl
0612     */
0613     virtual
0614     std::size_t
0615     on_chunk_body_impl(
0616         std::uint64_t remain,
0617         string_view body,
0618         error_code& ec) = 0;
0619 
0620     /** Called once when the complete message is received.
0621 
0622         This virtual function is invoked once, after successfully parsing
0623         a complete HTTP message.
0624 
0625         @param ec An output parameter which the function may set to indicate
0626         an error. The error will be clear before this function is invoked.
0627     */
0628     virtual
0629     void
0630     on_finish_impl(error_code& ec) = 0;
0631 
0632 private:
0633 
0634     boost::optional<std::uint64_t>
0635     content_length_unchecked() const;
0636 
0637     template<class ConstBufferSequence>
0638     std::size_t
0639     put_from_stack(
0640         std::size_t size,
0641         ConstBufferSequence const& buffers,
0642         error_code& ec);
0643 
0644     void
0645     maybe_need_more(
0646         char const* p, std::size_t n,
0647             error_code& ec);
0648 
0649     void
0650     parse_start_line(
0651         char const*& p, char const* last,
0652             error_code& ec, std::true_type);
0653 
0654     void
0655     parse_start_line(
0656         char const*& p, char const* last,
0657             error_code& ec, std::false_type);
0658 
0659     void
0660     parse_fields(
0661         char const*& p, char const* last,
0662             error_code& ec);
0663 
0664     void
0665     finish_header(
0666         error_code& ec, std::true_type);
0667 
0668     void
0669     finish_header(
0670         error_code& ec, std::false_type);
0671 
0672     void
0673     parse_body(char const*& p,
0674         std::size_t n, error_code& ec);
0675 
0676     void
0677     parse_body_to_eof(char const*& p,
0678         std::size_t n, error_code& ec);
0679 
0680     void
0681     parse_chunk_header(char const*& p,
0682         std::size_t n, error_code& ec);
0683 
0684     void
0685     parse_chunk_body(char const*& p,
0686         std::size_t n, error_code& ec);
0687 
0688     void
0689     do_field(field f,
0690         string_view value, error_code& ec);
0691 };
0692 
0693 } // http
0694 } // beast
0695 } // boost
0696 
0697 #include <boost/beast/http/impl/basic_parser.hpp>
0698 #ifdef BOOST_BEAST_HEADER_ONLY
0699 #include <boost/beast/http/impl/basic_parser.ipp>
0700 #endif
0701 
0702 #endif