File indexing completed on 2025-10-30 08:12:07
0001 
0002 
0003 
0004 
0005 
0006 
0007 
0008 
0009 
0010 #ifndef BOOST_BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_IPP
0011 #define BOOST_BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_IPP
0012 
0013 #include <boost/beast/websocket/detail/utf8_checker.hpp>
0014 
0015 #include <boost/assert.hpp>
0016 
0017 namespace boost {
0018 namespace beast {
0019 namespace websocket {
0020 namespace detail {
0021 
0022 void
0023 utf8_checker::
0024 reset()
0025 {
0026     need_ = 0;
0027     p_ = cp_;
0028 }
0029 
0030 bool
0031 utf8_checker::
0032 finish()
0033 {
0034     auto const success = need_ == 0;
0035     reset();
0036     return success;
0037 }
0038 
0039 bool
0040 utf8_checker::
0041 write(std::uint8_t const* in, std::size_t size)
0042 {
0043     auto const valid =
0044         [](std::uint8_t const*& p)
0045         {
0046             if(p[0] < 128)
0047             {
0048                 ++p;
0049                 return true;
0050             }
0051             if((p[0] & 0xe0) == 0xc0)
0052             {
0053                 if( (p[1] & 0xc0) != 0x80 ||
0054                     (p[0] & 0x1e) == 0)  
0055                     return false;
0056                 p += 2;
0057                 return true;
0058             }
0059             if((p[0] & 0xf0) == 0xe0)
0060             {
0061                 if(    (p[1] & 0xc0) != 0x80
0062                     || (p[2] & 0xc0) != 0x80
0063                     || (p[0] == 0xe0 && (p[1] & 0x20) == 0) 
0064                     || (p[0] == 0xed && (p[1] & 0x20) == 0x20) 
0065                     
0066                     )
0067                     return false;
0068                 p += 3;
0069                 return true;
0070             }
0071             if((p[0] & 0xf8) == 0xf0)
0072             {
0073                 if(    (p[0] & 0x07) >= 0x05 
0074                     || (p[1] & 0xc0) != 0x80
0075                     || (p[2] & 0xc0) != 0x80
0076                     || (p[3] & 0xc0) != 0x80
0077                     || (p[0] == 0xf0 && (p[1] & 0x30) == 0) 
0078                     || (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4 
0079                     )
0080                     return false;
0081                 p += 4;
0082                 return true;
0083             }
0084             return false;
0085         };
0086     auto const fail_fast =
0087         [&]()
0088         {
0089             if(cp_[0] < 128)
0090             {
0091                 return false;
0092             }
0093 
0094             const auto& p = cp_; 
0095             const auto known_only = p_ - cp_;
0096             if (known_only == 1)
0097             {
0098                 if((p[0] & 0xe0) == 0xc0)
0099                 {
0100                     return ((p[0] & 0x1e) == 0);  
0101                 }
0102                 if((p[0] & 0xf0) == 0xe0)
0103                 {
0104                     return false;
0105                 }
0106                 if((p[0] & 0xf8) == 0xf0)
0107                 {
0108                     return ((p[0] & 0x07) >= 0x05);  
0109                 }
0110             }
0111             else if (known_only == 2)
0112             {
0113                 if((p[0] & 0xe0) == 0xc0)
0114                 {
0115                     return ((p[1] & 0xc0) != 0x80 ||
0116                             (p[0] & 0x1e) == 0);  
0117                 }
0118                 if((p[0] & 0xf0) == 0xe0)
0119                 {
0120                     return (  (p[1] & 0xc0) != 0x80
0121                            || (p[0] == 0xe0 && (p[1] & 0x20) == 0) 
0122                            || (p[0] == 0xed && (p[1] & 0x20) == 0x20)); 
0123                 }
0124                 if((p[0] & 0xf8) == 0xf0)
0125                 {
0126                     return (  (p[0] & 0x07) >= 0x05 
0127                            || (p[1] & 0xc0) != 0x80
0128                            || (p[0] == 0xf0 && (p[1] & 0x30) == 0) 
0129                            || (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4); 
0130                 }
0131             }
0132             else if (known_only == 3)
0133             {
0134                 if((p[0] & 0xe0) == 0xc0)
0135                 {
0136                     return (  (p[1] & 0xc0) != 0x80
0137                            || (p[0] & 0x1e) == 0);  
0138                 }
0139                 if((p[0] & 0xf0) == 0xe0)
0140                 {
0141                     return (  (p[1] & 0xc0) != 0x80
0142                            || (p[2] & 0xc0) != 0x80
0143                            || (p[0] == 0xe0 && (p[1] & 0x20) == 0) 
0144                            || (p[0] == 0xed && (p[1] & 0x20) == 0x20)); 
0145                            
0146                 }
0147                 if((p[0] & 0xf8) == 0xf0)
0148                 {
0149                     return (  (p[0] & 0x07) >= 0x05 
0150                            || (p[1] & 0xc0) != 0x80
0151                            || (p[2] & 0xc0) != 0x80
0152                            || (p[0] == 0xf0 && (p[1] & 0x30) == 0) 
0153                            || (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4); 
0154                 }
0155             }
0156             return true;
0157         };
0158     auto const needed =
0159         [](std::uint8_t const v)
0160         {
0161             if(v < 128)
0162                 return 1;
0163             if(v < 192)
0164                 return 0;
0165             if(v < 224)
0166                 return 2;
0167             if(v < 240)
0168                 return 3;
0169             if(v < 248)
0170                 return 4;
0171             return 0;
0172         };
0173 
0174     auto const end = in + size;
0175 
0176     
0177     if(need_ > 0)
0178     {
0179         
0180         auto n = (std::min)(size, need_);
0181         size -= n;
0182         need_ -= n;
0183 
0184         
0185         while(n--)
0186             *p_++ = *in++;
0187         BOOST_ASSERT(p_ <= cp_ + 4);
0188 
0189         
0190         if(need_ > 0)
0191         {
0192             
0193             BOOST_ASSERT(in == end);
0194 
0195             
0196             
0197             
0198             return ! fail_fast();
0199         }
0200 
0201         
0202         std::uint8_t const* p = &cp_[0];
0203         if(! valid(p))
0204             return false;
0205         p_ = cp_;
0206     }
0207 
0208     if(size <= sizeof(std::size_t))
0209         goto slow;
0210 
0211     
0212     {
0213         auto const in0 = in;
0214         auto last = reinterpret_cast<std::uint8_t const*>(
0215             ((reinterpret_cast<std::uintptr_t>(in) + sizeof(std::size_t) - 1) /
0216                 sizeof(std::size_t)) * sizeof(std::size_t));
0217 
0218         
0219         while(in < last)
0220         {
0221             if(*in & 0x80)
0222             {
0223                 
0224                 size = size - (in - in0);
0225                 goto slow;
0226             }
0227             ++in;
0228         }
0229         size = size - (in - in0);
0230     }
0231 
0232     
0233     {
0234         auto const in0 = in;
0235         auto last = in + size - 7;
0236         auto constexpr mask = static_cast<
0237             std::size_t>(0x8080808080808080 & ~std::size_t{0});
0238         while(in < last)
0239         {
0240 #if 0
0241             std::size_t temp;
0242             std::memcpy(&temp, in, sizeof(temp));
0243             if((temp & mask) != 0)
0244 #else
0245             
0246             if((*reinterpret_cast<std::size_t const*>(in) & mask) != 0)
0247 #endif
0248             {
0249                 size = size - (in - in0);
0250                 goto slow;
0251             }
0252             in += sizeof(std::size_t);
0253         }
0254         
0255         last += 4;
0256         while(in < last)
0257             if(! valid(in))
0258                 return false;
0259         goto tail;
0260     }
0261 
0262 slow:
0263     
0264     {
0265         auto last = in + size - 3;
0266         while(in < last)
0267             if(! valid(in))
0268                 return false;
0269     }
0270 
0271 tail:
0272     
0273     
0274     
0275     
0276     
0277     
0278     
0279     for(;;)
0280     {
0281         
0282         auto n = end - in;
0283         if(! n)
0284             break;
0285 
0286         
0287         auto const need = needed(*in);
0288         if(need == 0)
0289             return false;
0290         if(need <= n)
0291         {
0292             
0293             if(! valid(in))
0294                 return false;
0295         }
0296         else
0297         {
0298             
0299             
0300             need_ = need - n;
0301 
0302             
0303             while(n--)
0304                 *p_++ = *in++;
0305             BOOST_ASSERT(in == end);
0306             BOOST_ASSERT(p_ <= cp_ + 4);
0307 
0308             
0309             
0310             
0311             return ! fail_fast();
0312         }
0313     }
0314     return true;
0315 }
0316 
0317 bool
0318 check_utf8(char const* p, std::size_t n)
0319 {
0320     utf8_checker c;
0321     if(! c.write(reinterpret_cast<const uint8_t*>(p), n))
0322         return false;
0323     return c.finish();
0324 }
0325 
0326 } 
0327 } 
0328 } 
0329 } 
0330 
0331 #endif