Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-04-09 08:28:04

0001 //
0002 // Copyright (c) 2019-2024 Ruben Perez Hidalgo (rubenperez038 at gmail dot com)
0003 //
0004 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0005 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0006 //
0007 
0008 #ifndef BOOST_MYSQL_IMPL_CHARACTER_SET_IPP
0009 #define BOOST_MYSQL_IMPL_CHARACTER_SET_IPP
0010 
0011 #pragma once
0012 
0013 #include <boost/mysql/character_set.hpp>
0014 
0015 #include <boost/assert.hpp>
0016 
0017 namespace boost {
0018 namespace mysql {
0019 namespace detail {
0020 
0021 inline bool in_range(unsigned char byte, unsigned char lower, unsigned char upper)
0022 {
0023     return byte >= lower && byte <= upper;
0024 }
0025 
0026 }  // namespace detail
0027 }  // namespace mysql
0028 }  // namespace boost
0029 
0030 std::size_t boost::mysql::detail::next_char_utf8mb4(span<const unsigned char> input)
0031 {
0032     // s[0]    s[1]    s[2]    s[3]    comment
0033     // 00-7F                           ascii
0034     // 80-c1                           invalid
0035     // c2-df   80-bf                   2byte
0036     // e0      a0-bf   80-bf           3byte, case 1
0037     // e1-ec   80-bf   80-bf           3byte, case 2
0038     // ed      80-9f   80-bf           3byte, case 3 (surrogates)
0039     // ee-ef   80-bf   80-bf           3byte, case 2
0040     // f0      90-bf   80-bf   80-bf   4byte, case 1
0041     // f1-f3   80-bf   80-bf   80-bf   4byte, case 2
0042     // f4      80-8f   80-bf   80-bf   4byte, case 3
0043 
0044     BOOST_ASSERT(!input.empty());
0045 
0046     auto first_char = input.front();
0047     BOOST_ASSERT(first_char >= 0x80);  // ascii range covered by call_next_char
0048 
0049     if (first_char < 0xc2)
0050     {
0051         return 0;
0052     }
0053     else if (first_char < 0xe0)
0054     {
0055         return (input.size() < 2u || !in_range(input[1], 0x80, 0xbf)) ? 0 : 2;
0056     }
0057     else if (first_char == 0xe0)
0058     {
0059         return (input.size() < 3u || !in_range(input[1], 0xa0, 0xbf) || !in_range(input[2], 0x80, 0xbf)) ? 0
0060                                                                                                          : 3;
0061     }
0062     else if (first_char == 0xed)
0063     {
0064         return (input.size() < 3u || !in_range(input[1], 0x80, 0x9f) || !in_range(input[2], 0x80, 0xbf)) ? 0
0065                                                                                                          : 3;
0066     }
0067     else if (first_char <= 0xef)
0068     {
0069         // Includes e1-ec and ee-ef
0070         return (input.size() < 3u || !in_range(input[1], 0x80, 0xbf) || !in_range(input[2], 0x80, 0xbf)) ? 0
0071                                                                                                          : 3;
0072     }
0073     else if (first_char == 0xf0)
0074     {
0075         return (input.size() < 4u || !in_range(input[1], 0x90, 0xbf) || !in_range(input[2], 0x80, 0xbf) ||
0076                 !in_range(input[3], 0x80, 0xbf))
0077                    ? 0
0078                    : 4;
0079     }
0080     else if (first_char <= 0xf3)
0081     {
0082         return (input.size() < 4u || !in_range(input[1], 0x80, 0xbf) || !in_range(input[2], 0x80, 0xbf) ||
0083                 !in_range(input[3], 0x80, 0xbf))
0084                    ? 0
0085                    : 4;
0086     }
0087     else if (first_char == 0xf4)
0088     {
0089         return (input.size() < 4u || !in_range(input[1], 0x80, 0x8f) || !in_range(input[2], 0x80, 0xbf) ||
0090                 !in_range(input[3], 0x80, 0xbf))
0091                    ? 0
0092                    : 4;
0093     }
0094     else
0095     {
0096         return 0;
0097     }
0098 }
0099 
0100 #endif