File indexing completed on 2025-01-18 09:29:33
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #ifndef BOOST_BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_IPP
0011 #define BOOST_BEAST_WEBSOCKET_DETAIL_UTF8_CHECKER_IPP
0012
0013 #include <boost/beast/websocket/detail/utf8_checker.hpp>
0014
0015 #include <boost/assert.hpp>
0016
0017 namespace boost {
0018 namespace beast {
0019 namespace websocket {
0020 namespace detail {
0021
0022 void
0023 utf8_checker::
0024 reset()
0025 {
0026 need_ = 0;
0027 p_ = cp_;
0028 }
0029
0030 bool
0031 utf8_checker::
0032 finish()
0033 {
0034 auto const success = need_ == 0;
0035 reset();
0036 return success;
0037 }
0038
0039 bool
0040 utf8_checker::
0041 write(std::uint8_t const* in, std::size_t size)
0042 {
0043 auto const valid =
0044 [](std::uint8_t const*& p)
0045 {
0046 if(p[0] < 128)
0047 {
0048 ++p;
0049 return true;
0050 }
0051 if((p[0] & 0xe0) == 0xc0)
0052 {
0053 if( (p[1] & 0xc0) != 0x80 ||
0054 (p[0] & 0x1e) == 0)
0055 return false;
0056 p += 2;
0057 return true;
0058 }
0059 if((p[0] & 0xf0) == 0xe0)
0060 {
0061 if( (p[1] & 0xc0) != 0x80
0062 || (p[2] & 0xc0) != 0x80
0063 || (p[0] == 0xe0 && (p[1] & 0x20) == 0)
0064 || (p[0] == 0xed && (p[1] & 0x20) == 0x20)
0065
0066 )
0067 return false;
0068 p += 3;
0069 return true;
0070 }
0071 if((p[0] & 0xf8) == 0xf0)
0072 {
0073 if( (p[0] & 0x07) >= 0x05
0074 || (p[1] & 0xc0) != 0x80
0075 || (p[2] & 0xc0) != 0x80
0076 || (p[3] & 0xc0) != 0x80
0077 || (p[0] == 0xf0 && (p[1] & 0x30) == 0)
0078 || (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4
0079 )
0080 return false;
0081 p += 4;
0082 return true;
0083 }
0084 return false;
0085 };
0086 auto const fail_fast =
0087 [&]()
0088 {
0089 if(cp_[0] < 128)
0090 {
0091 return false;
0092 }
0093
0094 const auto& p = cp_;
0095 const auto known_only = p_ - cp_;
0096 if (known_only == 1)
0097 {
0098 if((p[0] & 0xe0) == 0xc0)
0099 {
0100 return ((p[0] & 0x1e) == 0);
0101 }
0102 if((p[0] & 0xf0) == 0xe0)
0103 {
0104 return false;
0105 }
0106 if((p[0] & 0xf8) == 0xf0)
0107 {
0108 return ((p[0] & 0x07) >= 0x05);
0109 }
0110 }
0111 else if (known_only == 2)
0112 {
0113 if((p[0] & 0xe0) == 0xc0)
0114 {
0115 return ((p[1] & 0xc0) != 0x80 ||
0116 (p[0] & 0x1e) == 0);
0117 }
0118 if((p[0] & 0xf0) == 0xe0)
0119 {
0120 return ( (p[1] & 0xc0) != 0x80
0121 || (p[0] == 0xe0 && (p[1] & 0x20) == 0)
0122 || (p[0] == 0xed && (p[1] & 0x20) == 0x20));
0123 }
0124 if((p[0] & 0xf8) == 0xf0)
0125 {
0126 return ( (p[0] & 0x07) >= 0x05
0127 || (p[1] & 0xc0) != 0x80
0128 || (p[0] == 0xf0 && (p[1] & 0x30) == 0)
0129 || (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4);
0130 }
0131 }
0132 else if (known_only == 3)
0133 {
0134 if((p[0] & 0xe0) == 0xc0)
0135 {
0136 return ( (p[1] & 0xc0) != 0x80
0137 || (p[0] & 0x1e) == 0);
0138 }
0139 if((p[0] & 0xf0) == 0xe0)
0140 {
0141 return ( (p[1] & 0xc0) != 0x80
0142 || (p[2] & 0xc0) != 0x80
0143 || (p[0] == 0xe0 && (p[1] & 0x20) == 0)
0144 || (p[0] == 0xed && (p[1] & 0x20) == 0x20));
0145
0146 }
0147 if((p[0] & 0xf8) == 0xf0)
0148 {
0149 return ( (p[0] & 0x07) >= 0x05
0150 || (p[1] & 0xc0) != 0x80
0151 || (p[2] & 0xc0) != 0x80
0152 || (p[0] == 0xf0 && (p[1] & 0x30) == 0)
0153 || (p[0] == 0xf4 && p[1] > 0x8f) || p[0] > 0xf4);
0154 }
0155 }
0156 return true;
0157 };
0158 auto const needed =
0159 [](std::uint8_t const v)
0160 {
0161 if(v < 128)
0162 return 1;
0163 if(v < 192)
0164 return 0;
0165 if(v < 224)
0166 return 2;
0167 if(v < 240)
0168 return 3;
0169 if(v < 248)
0170 return 4;
0171 return 0;
0172 };
0173
0174 auto const end = in + size;
0175
0176
0177 if(need_ > 0)
0178 {
0179
0180 auto n = (std::min)(size, need_);
0181 size -= n;
0182 need_ -= n;
0183
0184
0185 while(n--)
0186 *p_++ = *in++;
0187 BOOST_ASSERT(p_ <= cp_ + 4);
0188
0189
0190 if(need_ > 0)
0191 {
0192
0193 BOOST_ASSERT(in == end);
0194
0195
0196
0197
0198 return ! fail_fast();
0199 }
0200
0201
0202 std::uint8_t const* p = &cp_[0];
0203 if(! valid(p))
0204 return false;
0205 p_ = cp_;
0206 }
0207
0208 if(size <= sizeof(std::size_t))
0209 goto slow;
0210
0211
0212 {
0213 auto const in0 = in;
0214 auto last = reinterpret_cast<std::uint8_t const*>(
0215 ((reinterpret_cast<std::uintptr_t>(in) + sizeof(std::size_t) - 1) /
0216 sizeof(std::size_t)) * sizeof(std::size_t));
0217
0218
0219 while(in < last)
0220 {
0221 if(*in & 0x80)
0222 {
0223
0224 size = size - (in - in0);
0225 goto slow;
0226 }
0227 ++in;
0228 }
0229 size = size - (in - in0);
0230 }
0231
0232
0233 {
0234 auto const in0 = in;
0235 auto last = in + size - 7;
0236 auto constexpr mask = static_cast<
0237 std::size_t>(0x8080808080808080 & ~std::size_t{0});
0238 while(in < last)
0239 {
0240 #if 0
0241 std::size_t temp;
0242 std::memcpy(&temp, in, sizeof(temp));
0243 if((temp & mask) != 0)
0244 #else
0245
0246 if((*reinterpret_cast<std::size_t const*>(in) & mask) != 0)
0247 #endif
0248 {
0249 size = size - (in - in0);
0250 goto slow;
0251 }
0252 in += sizeof(std::size_t);
0253 }
0254
0255 last += 4;
0256 while(in < last)
0257 if(! valid(in))
0258 return false;
0259 goto tail;
0260 }
0261
0262 slow:
0263
0264 {
0265 auto last = in + size - 3;
0266 while(in < last)
0267 if(! valid(in))
0268 return false;
0269 }
0270
0271 tail:
0272
0273
0274
0275
0276
0277
0278
0279 for(;;)
0280 {
0281
0282 auto n = end - in;
0283 if(! n)
0284 break;
0285
0286
0287 auto const need = needed(*in);
0288 if(need == 0)
0289 return false;
0290 if(need <= n)
0291 {
0292
0293 if(! valid(in))
0294 return false;
0295 }
0296 else
0297 {
0298
0299
0300 need_ = need - n;
0301
0302
0303 while(n--)
0304 *p_++ = *in++;
0305 BOOST_ASSERT(in == end);
0306 BOOST_ASSERT(p_ <= cp_ + 4);
0307
0308
0309
0310
0311 return ! fail_fast();
0312 }
0313 }
0314 return true;
0315 }
0316
0317 bool
0318 check_utf8(char const* p, std::size_t n)
0319 {
0320 utf8_checker c;
0321 if(! c.write(reinterpret_cast<const uint8_t*>(p), n))
0322 return false;
0323 return c.finish();
0324 }
0325
0326 }
0327 }
0328 }
0329 }
0330
0331 #endif