Warning, file /include/Vc/sse/casts.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_SSE_CASTS_H_
0029 #define VC_SSE_CASTS_H_
0030
0031 #include "intrinsics.h"
0032 #include "types.h"
0033 #include "macros.h"
0034
0035 namespace Vc_VERSIONED_NAMESPACE
0036 {
0037 namespace SSE
0038 {
0039 using uint = unsigned int;
0040 using ushort = unsigned short;
0041 using uchar = unsigned char;
0042 using schar = signed char;
0043
0044
0045 template <typename To, typename From> Vc_ALWAYS_INLINE Vc_CONST To sse_cast(From v)
0046 {
0047 return v;
0048 }
0049 template<> Vc_ALWAYS_INLINE Vc_CONST __m128i sse_cast<__m128i, __m128 >(__m128 v) { return _mm_castps_si128(v); }
0050 template<> Vc_ALWAYS_INLINE Vc_CONST __m128i sse_cast<__m128i, __m128d>(__m128d v) { return _mm_castpd_si128(v); }
0051 template<> Vc_ALWAYS_INLINE Vc_CONST __m128 sse_cast<__m128 , __m128d>(__m128d v) { return _mm_castpd_ps(v); }
0052 template<> Vc_ALWAYS_INLINE Vc_CONST __m128 sse_cast<__m128 , __m128i>(__m128i v) { return _mm_castsi128_ps(v); }
0053 template<> Vc_ALWAYS_INLINE Vc_CONST __m128d sse_cast<__m128d, __m128i>(__m128i v) { return _mm_castsi128_pd(v); }
0054 template<> Vc_ALWAYS_INLINE Vc_CONST __m128d sse_cast<__m128d, __m128 >(__m128 v) { return _mm_castps_pd(v); }
0055
0056
0057 template <typename From, typename To> struct ConvertTag
0058 {
0059 };
0060 template <typename From, typename To>
0061 Vc_INTRINSIC typename VectorTraits<To>::VectorType convert(
0062 typename VectorTraits<From>::VectorType v)
0063 {
0064 return convert(v, ConvertTag<From, To>());
0065 }
0066
0067 Vc_INTRINSIC __m128i convert(__m128 v, ConvertTag<float , int >) { return _mm_cvttps_epi32(v); }
0068 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, int >) { return _mm_cvttpd_epi32(v); }
0069 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int , int >) { return v; }
0070 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint , int >) { return v; }
0071 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , int >) {
0072 #ifdef Vc_IMPL_SSE4_1
0073 return _mm_cvtepi16_epi32(v);
0074 #else
0075 return _mm_srai_epi32(_mm_unpacklo_epi16(v, v), 16);
0076 #endif
0077 }
0078 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, int >) {
0079 #ifdef Vc_IMPL_SSE4_1
0080 return _mm_cvtepu16_epi32(v);
0081 #else
0082 return _mm_srli_epi32(_mm_unpacklo_epi16(v, v), 16);
0083 #endif
0084 }
0085 Vc_INTRINSIC __m128i convert(__m128 v, ConvertTag<float , uint >) {
0086 return _mm_castps_si128(
0087 blendv_ps(_mm_castsi128_ps(_mm_cvttps_epi32(v)),
0088 _mm_castsi128_ps(_mm_xor_si128(
0089 _mm_cvttps_epi32(_mm_sub_ps(v, _mm_set1_ps(1u << 31))),
0090 _mm_set1_epi32(1 << 31))),
0091 _mm_cmpge_ps(v, _mm_set1_ps(1u << 31))));
0092 }
0093 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, uint >) {
0094 #ifdef Vc_IMPL_SSE4_1
0095 return _mm_xor_si128(_mm_cvttpd_epi32(_mm_sub_pd(_mm_floor_pd(v), _mm_set1_pd(0x80000000u))),
0096 _mm_cvtsi64_si128(0x8000000080000000ull));
0097 #else
0098 return blendv_epi8(_mm_cvttpd_epi32(v),
0099 _mm_xor_si128(_mm_cvttpd_epi32(_mm_sub_pd(v, _mm_set1_pd(0x80000000u))),
0100 _mm_cvtsi64_si128(0x8000000080000000ull)),
0101 _mm_castpd_si128(_mm_cmpge_pd(v, _mm_set1_pd(0x80000000u))));
0102 #endif
0103 }
0104 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int , uint >) { return v; }
0105 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint , uint >) { return v; }
0106 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , uint >) { return convert(v, ConvertTag<short, int>()); }
0107 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, uint >) { return convert(v, ConvertTag<ushort, int>()); }
0108 Vc_INTRINSIC __m128 convert(__m128 v, ConvertTag<float , float >) { return v; }
0109 Vc_INTRINSIC __m128 convert(__m128d v, ConvertTag<double, float >) { return _mm_cvtpd_ps(v); }
0110 Vc_INTRINSIC __m128 convert(__m128i v, ConvertTag<int , float >) { return _mm_cvtepi32_ps(v); }
0111 Vc_INTRINSIC __m128 convert(__m128i v, ConvertTag<uint , float >) {
0112
0113
0114 using namespace SSE;
0115 return blendv_ps(_mm_cvtepi32_ps(v),
0116 _mm_add_ps(_mm_cvtepi32_ps(_mm_and_si128(v, _mm_set1_epi32(0x7ffffe00))),
0117 _mm_add_ps(_mm_set1_ps(1u << 31), _mm_cvtepi32_ps(_mm_and_si128(
0118 v, _mm_set1_epi32(0x000001ff))))),
0119 _mm_castsi128_ps(_mm_cmplt_epi32(v, _mm_setzero_si128())));
0120 }
0121 Vc_INTRINSIC __m128 convert(__m128i v, ConvertTag<short , float >) { return convert(convert(v, ConvertTag<short, int>()), ConvertTag<int, float>()); }
0122 Vc_INTRINSIC __m128 convert(__m128i v, ConvertTag<ushort, float >) { return convert(convert(v, ConvertTag<ushort, int>()), ConvertTag<int, float>()); }
0123 Vc_INTRINSIC __m128d convert(__m128 v, ConvertTag<float , double>) { return _mm_cvtps_pd(v); }
0124 Vc_INTRINSIC __m128d convert(__m128d v, ConvertTag<double, double>) { return v; }
0125 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<int , double>) { return _mm_cvtepi32_pd(v); }
0126 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<uint , double>) { return _mm_add_pd(_mm_cvtepi32_pd(_mm_xor_si128(v, setmin_epi32())), _mm_set1_pd(1u << 31)); }
0127 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<short , double>) { return convert(convert(v, ConvertTag<short, int>()), ConvertTag<int, double>()); }
0128 Vc_INTRINSIC __m128d convert(__m128i v, ConvertTag<ushort, double>) { return convert(convert(v, ConvertTag<ushort, int>()), ConvertTag<int, double>()); }
0129 Vc_INTRINSIC __m128i convert(__m128 v, ConvertTag<float , short >) { return _mm_packs_epi32(_mm_cvttps_epi32(v), _mm_setzero_si128()); }
0130 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int , short >) { return _mm_packs_epi32(v, _mm_setzero_si128()); }
0131 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint , short >) { return _mm_packs_epi32(v, _mm_setzero_si128()); }
0132 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , short >) { return v; }
0133 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, short >) { return v; }
0134 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, short >) { return convert(convert(v, ConvertTag<double, int>()), ConvertTag<int, short>()); }
0135 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<int , ushort>) {
0136 auto tmp0 = _mm_unpacklo_epi16(v, _mm_setzero_si128());
0137 auto tmp1 = _mm_unpackhi_epi16(v, _mm_setzero_si128());
0138 auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);
0139 auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);
0140 return _mm_unpacklo_epi16(tmp2, tmp3);
0141 }
0142 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<uint , ushort>) {
0143 auto tmp0 = _mm_unpacklo_epi16(v, _mm_setzero_si128());
0144 auto tmp1 = _mm_unpackhi_epi16(v, _mm_setzero_si128());
0145 auto tmp2 = _mm_unpacklo_epi16(tmp0, tmp1);
0146 auto tmp3 = _mm_unpackhi_epi16(tmp0, tmp1);
0147 return _mm_unpacklo_epi16(tmp2, tmp3);
0148 }
0149 Vc_INTRINSIC __m128i convert(__m128 v, ConvertTag<float , ushort>) { return convert(_mm_cvttps_epi32(v), ConvertTag<int, ushort>()); }
0150 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<short , ushort>) { return v; }
0151 Vc_INTRINSIC __m128i convert(__m128i v, ConvertTag<ushort, ushort>) { return v; }
0152 Vc_INTRINSIC __m128i convert(__m128d v, ConvertTag<double, ushort>) { return convert(convert(v, ConvertTag<double, int>()), ConvertTag<int, ushort>()); }
0153
0154
0155 }
0156 }
0157
0158 #endif
0159
0160