Warning, file /include/Vc/sse/shuffle.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026
0027
0028 #ifndef VC_SSE_SHUFFLE_H_
0029 #define VC_SSE_SHUFFLE_H_
0030
0031 #include "intrinsics.h"
0032 #include "macros.h"
0033
0034 namespace Vc_VERSIONED_NAMESPACE
0035 {
0036 enum VecPos {
0037 X0, X1, X2, X3, X4, X5, X6, X7,
0038 Y0, Y1, Y2, Y3, Y4, Y5, Y6, Y7,
0039 Const0
0040 };
0041
0042 namespace Mem
0043 {
0044
0045 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m128 Vc_CONST shuffle(__m128 x, __m128 y) {
0046 static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= Y0 && Dst3 >= Y0, "Incorrect_Range");
0047 static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= Y3 && Dst3 <= Y3, "Incorrect_Range");
0048 return _mm_shuffle_ps(x, y, Dst0 + Dst1 * 4 + (Dst2 - Y0) * 16 + (Dst3 - Y0) * 64);
0049 }
0050
0051
0052 template<VecPos Dst0, VecPos Dst1> static Vc_ALWAYS_INLINE __m128d Vc_CONST shuffle(__m128d x, __m128d y) {
0053 static_assert(Dst0 >= X0 && Dst1 >= Y0, "Incorrect_Range");
0054 static_assert(Dst0 <= X1 && Dst1 <= Y1, "Incorrect_Range");
0055 return _mm_shuffle_pd(x, y, Dst0 + (Dst1 - Y0) * 2);
0056 }
0057
0058
0059 template <VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3>
0060 Vc_INTRINSIC Vc_CONST __m128i shuffle(__m128i x, __m128i y)
0061 {
0062 return _mm_castps_si128(shuffle<Dst0, Dst1, Dst2, Dst3>(_mm_castsi128_ps(x),
0063 _mm_castsi128_ps(y)));
0064 }
0065
0066
0067 template<VecPos Dst0, VecPos Dst1> static Vc_ALWAYS_INLINE __m128d Vc_CONST blend(__m128d x, __m128d y) {
0068 static_assert(Dst0 == X0 || Dst0 == Y0, "Incorrect_Range");
0069 static_assert(Dst1 == X1 || Dst1 == Y1, "Incorrect_Range");
0070 return Vc::SseIntrinsics::blend_pd<(Dst0 / Y0) + (Dst1 / Y0) * 2>(x, y);
0071 }
0072
0073
0074 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m128 Vc_CONST blend(__m128 x, __m128 y) {
0075 static_assert(Dst0 == X0 || Dst0 == Y0, "Incorrect_Range");
0076 static_assert(Dst1 == X1 || Dst1 == Y1, "Incorrect_Range");
0077 static_assert(Dst2 == X2 || Dst2 == Y2, "Incorrect_Range");
0078 static_assert(Dst3 == X3 || Dst3 == Y3, "Incorrect_Range");
0079 return Vc::SseIntrinsics::blend_ps<(Dst0 / Y0) * 1 + (Dst1 / Y1) * 2 +
0080 (Dst2 / Y2) * 4 + (Dst3 / Y3) * 8>(x, y);
0081 }
0082
0083 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3, VecPos Dst4, VecPos Dst5, VecPos Dst6, VecPos Dst7>
0084 static Vc_ALWAYS_INLINE __m128i Vc_CONST blend(__m128i x, __m128i y) {
0085 static_assert(Dst0 == X0 || Dst0 == Y0, "Incorrect_Range");
0086 static_assert(Dst1 == X1 || Dst1 == Y1, "Incorrect_Range");
0087 static_assert(Dst2 == X2 || Dst2 == Y2, "Incorrect_Range");
0088 static_assert(Dst3 == X3 || Dst3 == Y3, "Incorrect_Range");
0089 static_assert(Dst4 == X4 || Dst4 == Y4, "Incorrect_Range");
0090 static_assert(Dst5 == X5 || Dst5 == Y5, "Incorrect_Range");
0091 static_assert(Dst6 == X6 || Dst6 == Y6, "Incorrect_Range");
0092 static_assert(Dst7 == X7 || Dst7 == Y7, "Incorrect_Range");
0093 return Vc::SseIntrinsics::blend_epi16<
0094 (Dst0 / Y0) * 1 + (Dst1 / Y1) * 2 + (Dst2 / Y2) * 4 + (Dst3 / Y3) * 8 +
0095 (Dst4 / Y4) * 16 + (Dst5 / Y5) * 32 + (Dst6 / Y6) * 64 +
0096 (Dst7 / Y7) * 128>(x, y);
0097 }
0098
0099
0100 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m128 Vc_CONST permute(__m128 x) {
0101 static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
0102 static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
0103 return _mm_shuffle_ps(x, x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
0104 }
0105
0106 template<VecPos Dst0, VecPos Dst1> static Vc_ALWAYS_INLINE Vc_CONST __m128d permute(__m128d x) {
0107 static_assert(Dst0 >= X0 && Dst1 >= X0, "Incorrect_Range");
0108 static_assert(Dst0 <= X1 && Dst1 <= X1, "Incorrect_Range");
0109 return _mm_shuffle_pd(x, x, Dst0 + Dst1 * 4);
0110 }
0111
0112 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m128i Vc_CONST permute(__m128i x) {
0113 static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
0114 static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
0115 return _mm_shuffle_epi32(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
0116 }
0117
0118 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m128i Vc_CONST permuteLo(__m128i x) {
0119 static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
0120 static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
0121 return _mm_shufflelo_epi16(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
0122 }
0123
0124 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3> static Vc_ALWAYS_INLINE __m128i Vc_CONST permuteHi(__m128i x) {
0125 static_assert(Dst0 >= X4 && Dst1 >= X4 && Dst2 >= X4 && Dst3 >= X4, "Incorrect_Range");
0126 static_assert(Dst0 <= X7 && Dst1 <= X7 && Dst2 <= X7 && Dst3 <= X7, "Incorrect_Range");
0127 return _mm_shufflehi_epi16(x, (Dst0 - X4) + (Dst1 - X4) * 4 + (Dst2 - X4) * 16 + (Dst3 - X4) * 64);
0128 }
0129
0130 template<VecPos Dst0, VecPos Dst1, VecPos Dst2, VecPos Dst3, VecPos Dst4, VecPos Dst5, VecPos Dst6, VecPos Dst7>
0131 static Vc_ALWAYS_INLINE __m128i Vc_CONST permute(__m128i x) {
0132 static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
0133 static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
0134 static_assert(Dst4 >= X4 && Dst5 >= X4 && Dst6 >= X4 && Dst7 >= X4, "Incorrect_Range");
0135 static_assert(Dst4 <= X7 && Dst5 <= X7 && Dst6 <= X7 && Dst7 <= X7, "Incorrect_Range");
0136 if (Dst0 != X0 || Dst1 != X1 || Dst2 != X2 || Dst3 != X3) {
0137 x = _mm_shufflelo_epi16(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
0138 }
0139 if (Dst4 != X4 || Dst5 != X5 || Dst6 != X6 || Dst7 != X7) {
0140 x = _mm_shufflehi_epi16(x, (Dst4 - X4) + (Dst5 - X4) * 4 + (Dst6 - X4) * 16 + (Dst7 - X4) * 64);
0141 }
0142 return x;
0143 }
0144 }
0145
0146
0147 namespace Reg
0148 {
0149
0150 template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128 Vc_CONST shuffle(__m128 x, __m128 y) {
0151 return Mem::shuffle<Dst0, Dst1, Dst2, Dst3>(x, y);
0152 }
0153
0154
0155 template<VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128d Vc_CONST shuffle(__m128d x, __m128d y) {
0156 return Mem::shuffle<Dst0, Dst1>(x, y);
0157 }
0158
0159
0160 template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128i Vc_CONST permute(__m128i x) {
0161 static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= X0 && Dst3 >= X0, "Incorrect_Range");
0162 static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= X3 && Dst3 <= X3, "Incorrect_Range");
0163 return _mm_shuffle_epi32(x, Dst0 + Dst1 * 4 + Dst2 * 16 + Dst3 * 64);
0164 }
0165
0166
0167 template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128i Vc_CONST shuffle(__m128i x, __m128i y) {
0168 static_assert(Dst0 >= X0 && Dst1 >= X0 && Dst2 >= Y0 && Dst3 >= Y0, "Incorrect_Range");
0169 static_assert(Dst0 <= X3 && Dst1 <= X3 && Dst2 <= Y3 && Dst3 <= Y3, "Incorrect_Range");
0170 return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x), _mm_castsi128_ps(y), Dst0 + Dst1 * 4 + (Dst2 - Y0) * 16 + (Dst3 - Y0) * 64));
0171 }
0172
0173
0174 template<VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128d Vc_CONST blend(__m128d x, __m128d y) {
0175 return Mem::blend<Dst0, Dst1>(x, y);
0176 }
0177
0178 template<VecPos Dst3, VecPos Dst2, VecPos Dst1, VecPos Dst0> static Vc_ALWAYS_INLINE __m128 Vc_CONST blend(__m128 x, __m128 y) {
0179 return Mem::blend<Dst0, Dst1, Dst2, Dst3>(x, y);
0180 }
0181 }
0182 }
0183
0184 #endif