Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:38:59

0001 //
0002 // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
0003 //                    Vinnie Falco (vinnie.falco@gmail.com)
0004 // Copyright (c) 2020 Krystian Stasiowski (sdkrystian@gmail.com)
0005 //
0006 // Distributed under the Boost Software License, Version 1.0. (See accompanying
0007 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
0008 //
0009 // Official repository: https://github.com/boostorg/json
0010 //
0011 
0012 #ifndef BOOST_JSON_DETAIL_SSE2_HPP
0013 #define BOOST_JSON_DETAIL_SSE2_HPP
0014 
0015 #include <boost/json/detail/config.hpp>
0016 #include <boost/json/detail/utf8.hpp>
0017 #include <cstddef>
0018 #include <cstring>
0019 #ifdef BOOST_JSON_USE_SSE2
0020 # include <emmintrin.h>
0021 # include <xmmintrin.h>
0022 # ifdef _MSC_VER
0023 #  include <intrin.h>
0024 # endif
0025 #endif
0026 
0027 namespace boost {
0028 namespace json {
0029 namespace detail {
0030 
0031 #ifdef BOOST_JSON_USE_SSE2
0032 
0033 template<bool AllowBadUTF8>
0034 inline
0035 const char*
0036 count_valid(
0037     char const* p,
0038     const char* end) noexcept
0039 {
0040     __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
0041     __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
0042     __m128i const q3 = _mm_set1_epi8( 0x1F );
0043 
0044     while(end - p >= 16)
0045     {
0046         __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
0047         __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
0048         __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
0049         __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
0050         __m128i v5 = _mm_min_epu8( v1, q3 );
0051         __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
0052         __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
0053 
0054         int w = _mm_movemask_epi8( v7 );
0055 
0056         if( w != 0 )
0057         {
0058             int m;
0059 #if defined(__GNUC__) || defined(__clang__)
0060             m = __builtin_ffs( w ) - 1;
0061 #else
0062             unsigned long index;
0063             _BitScanForward( &index, w );
0064             m = index;
0065 #endif
0066             return p + m;
0067         }
0068 
0069         p += 16;
0070     }
0071 
0072     while(p != end)
0073     {
0074         const unsigned char c = *p;
0075         if(c == '\x22' || c == '\\' || c < 0x20)
0076             break;
0077         ++p;
0078     }
0079 
0080     return p;
0081 }
0082 
0083 template<>
0084 inline
0085 const char*
0086 count_valid<false>(
0087     char const* p,
0088     const char* end) noexcept
0089 {
0090     __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
0091     __m128i const q2 = _mm_set1_epi8( '\\' );
0092     __m128i const q3 = _mm_set1_epi8( 0x20 );
0093 
0094     while(end - p >= 16)
0095     {
0096         __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
0097 
0098         __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
0099         __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
0100         __m128i v4 = _mm_cmplt_epi8( v1, q3 );
0101 
0102         __m128i v5 = _mm_or_si128( v2, v3 );
0103         __m128i v6 = _mm_or_si128( v5, v4 );
0104 
0105         int w = _mm_movemask_epi8( v6 );
0106 
0107         if( w != 0 )
0108         {
0109             int m;
0110 #if defined(__GNUC__) || defined(__clang__)
0111             m = __builtin_ffs( w ) - 1;
0112 #else
0113             unsigned long index;
0114             _BitScanForward( &index, w );
0115             m = index;
0116 #endif
0117             p += m;
0118             break;
0119         }
0120 
0121         p += 16;
0122     }
0123 
0124     while(p != end)
0125     {
0126         const unsigned char c = *p;
0127         if(c == '\x22' || c == '\\' || c < 0x20)
0128             break;
0129         if(c < 0x80)
0130         {
0131             ++p;
0132             continue;
0133         }
0134         // validate utf-8
0135         uint16_t first = classify_utf8(c);
0136         uint8_t len = first & 0xFF;
0137         if(BOOST_JSON_UNLIKELY(end - p < len))
0138             break;
0139         if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
0140             break;
0141         p += len;
0142     }
0143 
0144     return p;
0145 }
0146 
0147 #else
0148 
0149 template<bool AllowBadUTF8>
0150 char const*
0151 count_valid(
0152     char const* p,
0153     char const* end) noexcept
0154 {
0155     while(p != end)
0156     {
0157         const unsigned char c = *p;
0158         if(c == '\x22' || c == '\\' || c < 0x20)
0159             break;
0160         ++p;
0161     }
0162 
0163     return p;
0164 }
0165 
0166 template<>
0167 inline
0168 char const*
0169 count_valid<false>(
0170     char const* p,
0171     char const* end) noexcept
0172 {
0173     while(p != end)
0174     {
0175         const unsigned char c = *p;
0176         if(c == '\x22' || c == '\\' || c < 0x20)
0177             break;
0178         if(c < 0x80)
0179         {
0180             ++p;
0181             continue;
0182         }
0183         // validate utf-8
0184         uint16_t first = classify_utf8(c);
0185         uint8_t len = first & 0xFF;
0186         if(BOOST_JSON_UNLIKELY(end - p < len))
0187             break;
0188         if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
0189             break;
0190         p += len;
0191     }
0192 
0193     return p;
0194 }
0195 
0196 #endif
0197 
0198 // KRYSTIAN NOTE: does not stop to validate
0199 // count_unescaped
0200 
0201 #ifdef BOOST_JSON_USE_SSE2
0202 
0203 inline
0204 size_t
0205 count_unescaped(
0206     char const* s,
0207     size_t n) noexcept
0208 {
0209 
0210     __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
0211     __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
0212     __m128i const q3 = _mm_set1_epi8( 0x1F );
0213 
0214     char const * s0 = s;
0215 
0216     while( n >= 16 )
0217     {
0218         __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
0219         __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
0220         __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
0221         __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
0222         __m128i v5 = _mm_min_epu8( v1, q3 );
0223         __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
0224         __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
0225 
0226         int w = _mm_movemask_epi8( v7 );
0227 
0228         if( w != 0 )
0229         {
0230             int m;
0231 #if defined(__GNUC__) || defined(__clang__)
0232             m = __builtin_ffs( w ) - 1;
0233 #else
0234             unsigned long index;
0235             _BitScanForward( &index, w );
0236             m = index;
0237 #endif
0238 
0239             s += m;
0240             break;
0241         }
0242 
0243         s += 16;
0244         n -= 16;
0245     }
0246 
0247     return s - s0;
0248 }
0249 
0250 #else
0251 
0252 inline
0253 std::size_t
0254 count_unescaped(
0255     char const*,
0256     std::size_t) noexcept
0257 {
0258     return 0;
0259 }
0260 
0261 #endif
0262 
0263 // count_digits
0264 
0265 #ifdef BOOST_JSON_USE_SSE2
0266 
0267 // assumes p..p+15 are valid
0268 inline int count_digits( char const* p ) noexcept
0269 {
0270     __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
0271     v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
0272     v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
0273 
0274     int m = _mm_movemask_epi8(v1);
0275 
0276     int n;
0277 
0278     if( m == 0 )
0279     {
0280         n = 16;
0281     }
0282     else
0283     {
0284 #if defined(__GNUC__) || defined(__clang__)
0285         n = __builtin_ffs( m ) - 1;
0286 #else
0287         unsigned long index;
0288         _BitScanForward( &index, m );
0289         n = static_cast<int>(index);
0290 #endif
0291     }
0292 
0293     return n;
0294 }
0295 
0296 #else
0297 
0298 // assumes p..p+15 are valid
0299 inline int count_digits( char const* p ) noexcept
0300 {
0301     int n = 0;
0302 
0303     for( ; n < 16; ++n )
0304     {
0305         unsigned char const d = *p++ - '0';
0306         if(d > 9) break;
0307     }
0308 
0309     return n;
0310 }
0311 
0312 #endif
0313 
0314 // parse_unsigned
0315 
0316 inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
0317 {
0318     while( n >= 4 )
0319     {
0320         // faster on on clang for x86,
0321         // slower on gcc
0322 #ifdef __clang__
0323         r = r * 10 + p[0] - '0';
0324         r = r * 10 + p[1] - '0';
0325         r = r * 10 + p[2] - '0';
0326         r = r * 10 + p[3] - '0';
0327 #else
0328         uint32_t v;
0329         std::memcpy( &v, p, 4 );
0330 
0331         v -= 0x30303030;
0332 
0333         unsigned w0 = v & 0xFF;
0334         unsigned w1 = (v >> 8) & 0xFF;
0335         unsigned w2 = (v >> 16) & 0xFF;
0336         unsigned w3 = (v >> 24);
0337 
0338 #ifdef BOOST_JSON_BIG_ENDIAN
0339         r = (((r * 10 + w3) * 10 + w2) * 10 + w1) * 10 + w0;
0340 #else
0341         r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
0342 #endif
0343 #endif
0344         p += 4;
0345         n -= 4;
0346     }
0347 
0348     switch( n )
0349     {
0350     case 0:
0351         break;
0352     case 1:
0353         r = r * 10 + p[0] - '0';
0354         break;
0355     case 2:
0356         r = r * 10 + p[0] - '0';
0357         r = r * 10 + p[1] - '0';
0358         break;
0359     case 3:
0360         r = r * 10 + p[0] - '0';
0361         r = r * 10 + p[1] - '0';
0362         r = r * 10 + p[2] - '0';
0363         break;
0364     }
0365     return r;
0366 }
0367 
0368 // KRYSTIAN: this function is unused
0369 // count_leading
0370 
0371 /*
0372 #ifdef BOOST_JSON_USE_SSE2
0373 
0374 // assumes p..p+15
0375 inline std::size_t count_leading( char const * p, char ch ) noexcept
0376 {
0377     __m128i const q1 = _mm_set1_epi8( ch );
0378 
0379     __m128i v = _mm_loadu_si128( (__m128i const*)p );
0380 
0381     __m128i w = _mm_cmpeq_epi8( v, q1 );
0382 
0383     int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
0384 
0385     std::size_t n;
0386 
0387     if( m == 0 )
0388     {
0389         n = 16;
0390     }
0391     else
0392     {
0393 #if defined(__GNUC__) || defined(__clang__)
0394         n = __builtin_ffs( m ) - 1;
0395 #else
0396         unsigned long index;
0397         _BitScanForward( &index, m );
0398         n = index;
0399 #endif
0400     }
0401 
0402     return n;
0403 }
0404 
0405 #else
0406 
0407 // assumes p..p+15
0408 inline std::size_t count_leading( char const * p, char ch ) noexcept
0409 {
0410     std::size_t n = 0;
0411 
0412     for( ; n < 16 && *p == ch; ++p, ++n );
0413 
0414     return n;
0415 }
0416 
0417 #endif
0418 */
0419 
0420 // count_whitespace
0421 
0422 #ifdef BOOST_JSON_USE_SSE2
0423 
0424 inline const char* count_whitespace( char const* p, const char* end ) noexcept
0425 {
0426     if( p == end )
0427     {
0428         return p;
0429     }
0430 
0431     if( static_cast<unsigned char>( *p ) > 0x20 )
0432     {
0433         return p;
0434     }
0435 
0436     __m128i const q1 = _mm_set1_epi8( ' ' );
0437     __m128i const q2 = _mm_set1_epi8( '\n' );
0438     __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
0439     __m128i const q4 = _mm_set1_epi8( '\r' );
0440 
0441     while( end - p >= 16 )
0442     {
0443         __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
0444 
0445         __m128i w0 = _mm_or_si128(
0446             _mm_cmpeq_epi8( v0, q1 ),
0447             _mm_cmpeq_epi8( v0, q2 ));
0448         __m128i v1 = _mm_or_si128( v0, q3 );
0449         __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
0450         __m128i w2 = _mm_or_si128( w0, w1 );
0451 
0452         int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
0453 
0454         if( m != 0 )
0455         {
0456 #if defined(__GNUC__) || defined(__clang__)
0457             std::size_t c = __builtin_ffs( m ) - 1;
0458 #else
0459             unsigned long index;
0460             _BitScanForward( &index, m );
0461             std::size_t c = index;
0462 #endif
0463 
0464             p += c;
0465             return p;
0466         }
0467 
0468         p += 16;
0469     }
0470 
0471     while( p != end )
0472     {
0473         if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
0474         {
0475             return p;
0476         }
0477 
0478         ++p;
0479     }
0480 
0481     return p;
0482 }
0483 
0484 /*
0485 
0486 // slightly faster on msvc-14.2, slightly slower on clang-win
0487 
0488 inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
0489 {
0490     char const * p0 = p;
0491 
0492     while( n > 0 )
0493     {
0494         char ch = *p;
0495 
0496         if( ch == '\n' || ch == '\r' )
0497         {
0498             ++p;
0499             --n;
0500             continue;
0501         }
0502 
0503         if( ch != ' ' && ch != '\t' )
0504         {
0505             break;
0506         }
0507 
0508         ++p;
0509         --n;
0510 
0511         while( n >= 16 )
0512         {
0513             std::size_t n2 = count_leading( p, ch );
0514 
0515             p += n2;
0516             n -= n2;
0517 
0518             if( n2 < 16 )
0519             {
0520                 break;
0521             }
0522         }
0523     }
0524 
0525     return p - p0;
0526 }
0527 */
0528 
0529 #else
0530 
0531 inline const char* count_whitespace( char const* p, const char* end ) noexcept
0532 {
0533 
0534     for(; p != end; ++p)
0535     {
0536         char const c = *p;
0537         if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
0538     }
0539 
0540     return p;
0541 }
0542 
0543 #endif
0544 
0545 } // detail
0546 } // namespace json
0547 } // namespace boost
0548 
0549 #endif