Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-02-28 10:25:31

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
0003 
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014 
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025 
0026 }}}*/
0027 
0028 #ifndef VC_SSE_CASTS_H_
0029 #define VC_SSE_CASTS_H_
0030 
0031 #include "intrinsics.h"
0032 #include "types.h"
0033 #include "macros.h"
0034 
0035 namespace Vc_VERSIONED_NAMESPACE
0036 {
0037 namespace SSE
0038 {
0039 using uint = unsigned int;
0040 using ushort = unsigned short;
0041 using uchar = unsigned char;
0042 using schar = signed char;
0043 
0044 // sse_cast {{{1
0045 template <typename To, typename From> Vc_ALWAYS_INLINE Vc_CONST To sse_cast(From v)
0046 {
0047     return v;
0048 }
0049 template<> Vc_ALWAYS_INLINE Vc_CONST __m128i sse_cast<__m128i, __m128 >(__m128  v) { return _mm_castps_si128(v); }
0050 template<> Vc_ALWAYS_INLINE Vc_CONST __m128i sse_cast<__m128i, __m128d>(__m128d v) { return _mm_castpd_si128(v); }
0051 template<> Vc_ALWAYS_INLINE Vc_CONST __m128  sse_cast<__m128 , __m128d>(__m128d v) { return _mm_castpd_ps(v);    }
0052 template<> Vc_ALWAYS_INLINE Vc_CONST __m128  sse_cast<__m128 , __m128i>(__m128i v) { return _mm_castsi128_ps(v); }
0053 template<> Vc_ALWAYS_INLINE Vc_CONST __m128d sse_cast<__m128d, __m128i>(__m128i v) { return _mm_castsi128_pd(v); }
0054 template<> Vc_ALWAYS_INLINE Vc_CONST __m128d sse_cast<__m128d, __m128 >(__m128  v) { return _mm_castps_pd(v);    }
0055 
0056 // convert {{{1
0057 template <typename From, typename To> struct ConvertTag
0058 {
0059 };
0060 template <typename From, typename To>
0061 Vc_INTRINSIC typename VectorTraits<To>::VectorType convert(
0062     typename VectorTraits<From>::VectorType v)
0063 {
0064     return convert(v, ConvertTag<From, To>());
0065 }
0066 
0067 Vc_INTRINSIC __m128i convert(__m128  v, ConvertTag<float , int   >) { return _mm_cvttps_epi32(v); }
0068 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, int   >) { return _mm_cvttpd_epi32(v); }
0069 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int   , int   >) { return v; }
0070 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint  , int   >) { return v; }
0071 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , int   >) {
0072 #ifdef Vc_IMPL_SSE4_1
0073     return _mm_cvtepi16_epi32(v);
0074 #else
0075     return _mm_srai_epi32(_mm_unpacklo_epi16(v, v), 16);
0076 #endif
0077 }
0078 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, int   >) {
0079 #ifdef Vc_IMPL_SSE4_1
0080     return _mm_cvtepu16_epi32(v);
0081 #else
0082     return _mm_srli_epi32(_mm_unpacklo_epi16(v, v), 16);
0083 #endif
0084 }
0085 Vc_INTRINSIC __m128i convert(__m128  v, ConvertTag<float , uint  >) {
0086     return _mm_castps_si128(
0087         blendv_ps(_mm_castsi128_ps(_mm_cvttps_epi32(v)),
0088                   _mm_castsi128_ps(_mm_xor_si128(
0089                       _mm_cvttps_epi32(_mm_sub_ps(v, _mm_set1_ps(1u << 31))),
0090                       _mm_set1_epi32(1 << 31))),
0091                   _mm_cmpge_ps(v, _mm_set1_ps(1u << 31))));
0092 }
0093 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, uint  >) {
0094 #ifdef Vc_IMPL_SSE4_1
0095     return _mm_xor_si128(_mm_cvttpd_epi32(_mm_sub_pd(_mm_floor_pd(v), _mm_set1_pd(0x80000000u))),
0096                          _mm_cvtsi64_si128(0x8000000080000000ull));
0097 #else
0098     return blendv_epi8(_mm_cvttpd_epi32(v),
0099                        _mm_xor_si128(_mm_cvttpd_epi32(_mm_sub_pd(v, _mm_set1_pd(0x80000000u))),
0100                                      _mm_cvtsi64_si128(0x8000000080000000ull)),
0101                        _mm_castpd_si128(_mm_cmpge_pd(v, _mm_set1_pd(0x80000000u))));
0102 #endif
0103 }
0104 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int   , uint  >) { return v; }
0105 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint  , uint  >) { return v; }
0106 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , uint  >) { return convert(v, ConvertTag<short, int>()); }
0107 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, uint  >) { return convert(v, ConvertTag<ushort, int>()); }
0108 Vc_INTRINSIC __m128  convert(__m128  v, ConvertTag<float , float >) { return v; }
0109 Vc_INTRINSIC __m128  convert(__m128d v, ConvertTag<double, float >) { return _mm_cvtpd_ps(v); }
0110 Vc_INTRINSIC __m128  convert(__m128i v, ConvertTag<int   , float >) { return _mm_cvtepi32_ps(v); }
0111 Vc_INTRINSIC __m128  convert(__m128i v, ConvertTag<uint  , float >) {
0112     // see AVX::convert<uint, float> for an explanation of the math behind the
0113     // implementation
0114     using namespace SSE;
0115     return blendv_ps(_mm_cvtepi32_ps(v),
0116         _mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(v, _mm_set1_epi32(0x7ffffe00))),
0117                       _mm_add_ps(_mm_set1_ps(1u << 31), _mm_cvtepi32_ps(_mm_and_si128(
0118                                                           v, _mm_set1_epi32(0x000001ff))))),
0119         _mm_castsi128_ps(_mm_cmplt_epi32(v, _mm_setzero_si128())));
0120 }
0121 Vc_INTRINSIC __m128  convert(__m128i v, ConvertTag<short , float >) { return convert(convert(v, ConvertTag<short, int>()), ConvertTag<int, float>()); }
0122 Vc_INTRINSIC __m128  convert(__m128i v, ConvertTag<ushort, float >) { return convert(convert(v, ConvertTag<ushort, int>()), ConvertTag<int, float>()); }
0123 Vc_INTRINSIC __m128d convert(__m128  v, ConvertTag<float , double>) { return _mm_cvtps_pd(v); }
0124 Vc_INTRINSIC __m128d convert(__m128d v, ConvertTag<double, double>) { return v; }
0125 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<int   , double>) { return _mm_cvtepi32_pd(v); }
0126 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<uint  , double>) { return _mm_add_pd(_mm_cvtepi32_pd(_mm_xor_si128(v, setmin_epi32())), _mm_set1_pd(1u << 31)); }
0127 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<short , double>) { return convert(convert(v, ConvertTag<short, int>()), ConvertTag<int, double>()); }
0128 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<ushort, double>) { return convert(convert(v, ConvertTag<ushort, int>()), ConvertTag<int, double>()); }
0129 Vc_INTRINSIC __m128i convert(__m128  v, ConvertTag<float , short >) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); }
0130 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int   , short >) { return _mm_packs_epi32(v, _mm_setzero_si128()); }
0131 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint  , short >) { return _mm_packs_epi32(v, _mm_setzero_si128()); }
0132 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , short >) { return v; }
0133 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, short >) { return v; }
0134 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, short >) { return convert(convert(v, ConvertTag<double, int>()), ConvertTag<int, short>()); }
0135 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int   , ushort>) {
0136     auto tmp0 = _mm_unpacklo_epi16(v, _mm_setzero_si128());  // 0 4 X X 1 5 X X
0137     auto tmp1 = _mm_unpackhi_epi16(v, _mm_setzero_si128());  // 2 6 X X 3 7 X X
0138     auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);              // 0 2 4 6 X X X X
0139     auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);              // 1 3 5 7 X X X X
0140     return _mm_unpacklo_epi16(tmp2, tmp3);                   // 0 1 2 3 4 5 6 7
0141 }
0142 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint  , ushort>) {
0143     auto tmp0 = _mm_unpacklo_epi16(v, _mm_setzero_si128());  // 0 4 X X 1 5 X X
0144     auto tmp1 = _mm_unpackhi_epi16(v, _mm_setzero_si128());  // 2 6 X X 3 7 X X
0145     auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);              // 0 2 4 6 X X X X
0146     auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);              // 1 3 5 7 X X X X
0147     return _mm_unpacklo_epi16(tmp2, tmp3);                   // 0 1 2 3 4 5 6 7
0148 }
0149 Vc_INTRINSIC __m128i convert(__m128  v, ConvertTag<float , ushort>) { return convert(_mm_cvttps_epi32(v), ConvertTag<int, ushort>()); }
0150 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , ushort>) { return v; }
0151 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, ushort>) { return v; }
0152 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, ushort>) { return convert(convert(v, ConvertTag<double, int>()), ConvertTag<int, ushort>()); }
0153 
0154 // }}}1
0155 }  // namespace SSE
0156 }  // namespace Vc
0157 
0158 #endif // VC_SSE_CASTS_H_
0159 
0160 // vim: foldmethod=marker