Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-06-30 08:32:19

0001 /*
0002  *        Copyright Andrey Semashev 2013, 2022, 2024.
0003  * Distributed under the Boost Software License, Version 1.0.
0004  *    (See accompanying file LICENSE_1_0.txt or copy at
0005  *          https://www.boost.org/LICENSE_1_0.txt)
0006  */
0007 /*!
0008  * \file   uuid/detail/uuid_x86.ipp
0009  *
0010  * \brief  This header contains optimized SSE implementation of \c boost::uuid operations.
0011  */
0012 
0013 #ifndef BOOST_UUID_DETAIL_UUID_X86_IPP_INCLUDED_
0014 #define BOOST_UUID_DETAIL_UUID_X86_IPP_INCLUDED_
0015 
0016 #include <boost/uuid/detail/endian.hpp>
0017 #include <cstdint>
0018 
0019 #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
0020 #include <boost/config/pragma_message.hpp>
0021 
0022 #if defined(BOOST_UUID_USE_AVX10_1)
0023 BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, AVX10.1" )
0024 
0025 #elif defined(BOOST_UUID_USE_SSE41)
0026 BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, SSE4.1" )
0027 
0028 #elif defined(BOOST_UUID_USE_SSE3)
0029 BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, SSE3" )
0030 
0031 #else
0032 BOOST_PRAGMA_MESSAGE( "Using uuid_x86.ipp, SSE2" )
0033 
0034 #endif
0035 #endif // #if defined(BOOST_UUID_REPORT_IMPLEMENTATION)
0036 
0037 // MSVC does not always have immintrin.h (at least, not up to MSVC 10), so include the appropriate header for each instruction set
0038 #if defined(BOOST_UUID_USE_AVX10_1)
0039 #include <immintrin.h>
0040 #elif defined(BOOST_UUID_USE_SSE41)
0041 #include <smmintrin.h>
0042 #elif defined(BOOST_UUID_USE_SSE3)
0043 #include <pmmintrin.h>
0044 #else
0045 #include <emmintrin.h>
0046 #endif
0047 
0048 namespace boost {
0049 namespace uuids {
0050 namespace detail {
0051 
0052 BOOST_FORCEINLINE __m128i load_unaligned_si128(const std::uint8_t* p) noexcept
0053 {
0054     return _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
0055 }
0056 
0057 BOOST_FORCEINLINE void compare(uuid const& lhs, uuid const& rhs, std::uint32_t& cmp, std::uint32_t& rcmp) noexcept
0058 {
0059     __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
0060     __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
0061 
0062     // To emulate lexicographical_compare behavior we have to perform two comparisons - the forward and reverse one.
0063     // Then we know which bytes are equivalent and which ones are different, and for those different the comparison results
0064     // will be opposite. Then we'll be able to find the first differing comparison result (for both forward and reverse ways),
0065     // and depending on which way it is for, this will be the result of the operation. There are a few notes to consider:
0066     //
0067     // 1. Due to little endian byte order the first bytes go into the lower part of the xmm registers,
0068     //    so the comparison results in the least significant bits will actually be the most signigicant for the final operation result.
0069     //    This means we have to determine which of the comparison results have the least significant bit on, and this is achieved with
0070     //    the "(x - 1) ^ x" trick. With BMI, this will produce a single blsmsk instruction.
0071     // 2. Because there is only signed byte comparison until AVX-512, we have to invert byte comparison results whenever signs of the
0072     //    corresponding bytes are different. I.e. in signed comparison it's -1 < 1, but in unsigned it is the opposite (255 > 1). To do
0073     //    that we XOR left and right, making the most significant bit of each byte 1 if the signs are different, and later apply this mask
0074     //    with another XOR to the comparison results.
0075     // 3. Until AVX-512, there is only pcmpgtb instruction that compares for "greater" relation, so we swap the arguments to get what we need.
0076 
0077 #if defined(BOOST_UUID_USE_AVX10_1)
0078 
0079     __mmask16 k_cmp = _mm_cmplt_epu8_mask(mm_left, mm_right);
0080     __mmask16 k_rcmp = _mm_cmplt_epu8_mask(mm_right, mm_left);
0081 
0082     cmp = static_cast< std::uint32_t >(_cvtmask16_u32(k_cmp));
0083     rcmp = static_cast< std::uint32_t >(_cvtmask16_u32(k_rcmp));
0084 
0085 #else // defined(BOOST_UUID_USE_AVX10_1)
0086 
0087     const __m128i mm_signs_mask = _mm_xor_si128(mm_left, mm_right);
0088 
0089     __m128i mm_cmp = _mm_cmpgt_epi8(mm_right, mm_left), mm_rcmp = _mm_cmpgt_epi8(mm_left, mm_right);
0090 
0091     mm_cmp = _mm_xor_si128(mm_signs_mask, mm_cmp);
0092     mm_rcmp = _mm_xor_si128(mm_signs_mask, mm_rcmp);
0093 
0094     cmp = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_cmp));
0095     rcmp = static_cast< std::uint32_t >(_mm_movemask_epi8(mm_rcmp));
0096 
0097 #endif // defined(BOOST_UUID_USE_AVX10_1)
0098 
0099     cmp = (cmp - 1u) ^ cmp;
0100     rcmp = (rcmp - 1u) ^ rcmp;
0101 }
0102 
0103 } // namespace detail
0104 
0105 inline bool uuid::is_nil() const noexcept
0106 {
0107     __m128i mm = uuids::detail::load_unaligned_si128(data);
0108 #if defined(BOOST_UUID_USE_SSE41)
0109     return _mm_test_all_zeros(mm, mm) != 0;
0110 #else
0111     mm = _mm_cmpeq_epi32(mm, _mm_setzero_si128());
0112     return _mm_movemask_epi8(mm) == 0xFFFF;
0113 #endif
0114 }
0115 
0116 inline void uuid::swap(uuid& rhs) noexcept
0117 {
0118     __m128i mm_this = uuids::detail::load_unaligned_si128(data);
0119     __m128i mm_rhs = uuids::detail::load_unaligned_si128(rhs.data);
0120     _mm_storeu_si128(reinterpret_cast< __m128i* >(rhs.data+0), mm_this);
0121     _mm_storeu_si128(reinterpret_cast< __m128i* >(data+0), mm_rhs);
0122 }
0123 
0124 inline bool operator== (uuid const& lhs, uuid const& rhs) noexcept
0125 {
0126     __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
0127     __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
0128 
0129 #if defined(BOOST_UUID_USE_SSE41)
0130     __m128i mm = _mm_xor_si128(mm_left, mm_right);
0131     return _mm_test_all_zeros(mm, mm) != 0;
0132 #else
0133     __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
0134     return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
0135 #endif
0136 }
0137 
0138 inline bool operator< (uuid const& lhs, uuid const& rhs) noexcept
0139 {
0140     std::uint32_t cmp, rcmp;
0141     uuids::detail::compare(lhs, rhs, cmp, rcmp);
0142     return cmp < rcmp;
0143 }
0144 
0145 #if defined(BOOST_UUID_HAS_THREE_WAY_COMPARISON)
0146 
0147 inline std::strong_ordering operator<=> (uuid const& lhs, uuid const& rhs) noexcept
0148 {
0149     std::uint32_t cmp, rcmp;
0150     uuids::detail::compare(lhs, rhs, cmp, rcmp);
0151     return cmp <=> rcmp;
0152 }
0153 
0154 #endif
0155 
0156 } // namespace uuids
0157 } // namespace boost
0158 
0159 #endif // BOOST_UUID_DETAIL_UUID_X86_IPP_INCLUDED_