File indexing completed on 2025-01-19 09:51:45
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #ifndef EIGEN_COMPLEX_SSE_H
0011 #define EIGEN_COMPLEX_SSE_H
0012
0013 namespace Eigen {
0014
0015 namespace internal {
0016
0017
0018 struct Packet2cf
0019 {
0020 EIGEN_STRONG_INLINE Packet2cf() {}
0021 EIGEN_STRONG_INLINE explicit Packet2cf(const __m128& a) : v(a) {}
0022 Packet4f v;
0023 };
0024
0025
0026
0027 #ifndef EIGEN_VECTORIZE_AVX
0028 template<> struct packet_traits<std::complex<float> > : default_packet_traits
0029 {
0030 typedef Packet2cf type;
0031 typedef Packet2cf half;
0032 enum {
0033 Vectorizable = 1,
0034 AlignedOnScalar = 1,
0035 size = 2,
0036 HasHalfPacket = 0,
0037
0038 HasAdd = 1,
0039 HasSub = 1,
0040 HasMul = 1,
0041 HasDiv = 1,
0042 HasNegate = 1,
0043 HasSqrt = 1,
0044 HasAbs = 0,
0045 HasAbs2 = 0,
0046 HasMin = 0,
0047 HasMax = 0,
0048 HasSetLinear = 0,
0049 HasBlend = 1
0050 };
0051 };
0052 #endif
0053
0054 template<> struct unpacket_traits<Packet2cf> {
0055 typedef std::complex<float> type;
0056 typedef Packet2cf half;
0057 typedef Packet4f as_real;
0058 enum {
0059 size=2,
0060 alignment=Aligned16,
0061 vectorizable=true,
0062 masked_load_available=false,
0063 masked_store_available=false
0064 };
0065 };
0066
0067 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_add_ps(a.v,b.v)); }
0068 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_sub_ps(a.v,b.v)); }
0069
0070 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a)
0071 {
0072 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
0073 return Packet2cf(_mm_xor_ps(a.v,mask));
0074 }
0075 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
0076 {
0077 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000));
0078 return Packet2cf(_mm_xor_ps(a.v,mask));
0079 }
0080
0081 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0082 {
0083 #ifdef EIGEN_VECTORIZE_SSE3
0084 return Packet2cf(_mm_addsub_ps(_mm_mul_ps(_mm_moveldup_ps(a.v), b.v),
0085 _mm_mul_ps(_mm_movehdup_ps(a.v),
0086 vec4f_swizzle1(b.v, 1, 0, 3, 2))));
0087
0088
0089
0090 #else
0091 const __m128 mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x00000000,0x80000000,0x00000000));
0092 return Packet2cf(_mm_add_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 0, 0, 2, 2), b.v),
0093 _mm_xor_ps(_mm_mul_ps(vec4f_swizzle1(a.v, 1, 1, 3, 3),
0094 vec4f_swizzle1(b.v, 1, 0, 3, 2)), mask)));
0095 #endif
0096 }
0097
0098 template<> EIGEN_STRONG_INLINE Packet2cf ptrue <Packet2cf>(const Packet2cf& a) { return Packet2cf(ptrue(Packet4f(a.v))); }
0099 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_and_ps(a.v,b.v)); }
0100 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_or_ps(a.v,b.v)); }
0101 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_xor_ps(a.v,b.v)); }
0102 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(_mm_andnot_ps(b.v,a.v)); }
0103
0104 template<> EIGEN_STRONG_INLINE Packet2cf pload <Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(&numext::real_ref(*from))); }
0105 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(&numext::real_ref(*from))); }
0106
0107 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
0108 {
0109 Packet2cf res;
0110 #ifdef EIGEN_VECTORIZE_SSE3
0111 res.v = _mm_castpd_ps(_mm_loaddup_pd(reinterpret_cast<double const*>(&from)));
0112 #else
0113 res.v = _mm_castpd_ps(_mm_load_sd(reinterpret_cast<double const*>(&from)));
0114 res.v = _mm_movelh_ps(res.v, res.v);
0115 #endif
0116 return res;
0117 }
0118
0119 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
0120
0121 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore(&numext::real_ref(*to), Packet4f(from.v)); }
0122 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(&numext::real_ref(*to), Packet4f(from.v)); }
0123
0124
0125 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(const std::complex<float>* from, Index stride)
0126 {
0127 return Packet2cf(_mm_set_ps(std::imag(from[1*stride]), std::real(from[1*stride]),
0128 std::imag(from[0*stride]), std::real(from[0*stride])));
0129 }
0130
0131 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(std::complex<float>* to, const Packet2cf& from, Index stride)
0132 {
0133 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 0)),
0134 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 1)));
0135 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 2)),
0136 _mm_cvtss_f32(_mm_shuffle_ps(from.v, from.v, 3)));
0137 }
0138
0139 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
0140
0141 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
0142 {
0143 #if EIGEN_GNUC_AT_MOST(4,3)
0144
0145
0146 EIGEN_ALIGN16 std::complex<float> res[2];
0147 _mm_store_ps((float*)res, a.v);
0148 return res[0];
0149 #else
0150 std::complex<float> res;
0151 _mm_storel_pi((__m64*)&res, a.v);
0152 return res;
0153 #endif
0154 }
0155
0156 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a) { return Packet2cf(_mm_castpd_ps(preverse(Packet2d(_mm_castps_pd(a.v))))); }
0157
0158 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
0159 {
0160 return pfirst(Packet2cf(_mm_add_ps(a.v, _mm_movehl_ps(a.v,a.v))));
0161 }
0162
0163 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
0164 {
0165 return pfirst(pmul(a, Packet2cf(_mm_movehl_ps(a.v,a.v))));
0166 }
0167
0168 EIGEN_STRONG_INLINE Packet2cf pcplxflip(const Packet2cf& x)
0169 {
0170 return Packet2cf(vec4f_swizzle1(x.v, 1, 0, 3, 2));
0171 }
0172
0173 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
0174
0175 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0176 {
0177
0178 Packet2cf res = pmul(a, pconj(b));
0179 __m128 s = _mm_mul_ps(b.v,b.v);
0180 return Packet2cf(_mm_div_ps(res.v,_mm_add_ps(s,vec4f_swizzle1(s, 1, 0, 3, 2))));
0181 }
0182
0183
0184
0185
0186 struct Packet1cd
0187 {
0188 EIGEN_STRONG_INLINE Packet1cd() {}
0189 EIGEN_STRONG_INLINE explicit Packet1cd(const __m128d& a) : v(a) {}
0190 Packet2d v;
0191 };
0192
0193
0194
0195 #ifndef EIGEN_VECTORIZE_AVX
0196 template<> struct packet_traits<std::complex<double> > : default_packet_traits
0197 {
0198 typedef Packet1cd type;
0199 typedef Packet1cd half;
0200 enum {
0201 Vectorizable = 1,
0202 AlignedOnScalar = 0,
0203 size = 1,
0204 HasHalfPacket = 0,
0205
0206 HasAdd = 1,
0207 HasSub = 1,
0208 HasMul = 1,
0209 HasDiv = 1,
0210 HasNegate = 1,
0211 HasSqrt = 1,
0212 HasAbs = 0,
0213 HasAbs2 = 0,
0214 HasMin = 0,
0215 HasMax = 0,
0216 HasSetLinear = 0
0217 };
0218 };
0219 #endif
0220
0221 template<> struct unpacket_traits<Packet1cd> {
0222 typedef std::complex<double> type;
0223 typedef Packet1cd half;
0224 typedef Packet2d as_real;
0225 enum {
0226 size=1,
0227 alignment=Aligned16,
0228 vectorizable=true,
0229 masked_load_available=false,
0230 masked_store_available=false
0231 };
0232 };
0233
0234 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_add_pd(a.v,b.v)); }
0235 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_sub_pd(a.v,b.v)); }
0236 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a) { return Packet1cd(pnegate(Packet2d(a.v))); }
0237 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
0238 {
0239 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x80000000,0x0,0x0,0x0));
0240 return Packet1cd(_mm_xor_pd(a.v,mask));
0241 }
0242
0243 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0244 {
0245 #ifdef EIGEN_VECTORIZE_SSE3
0246 return Packet1cd(_mm_addsub_pd(_mm_mul_pd(_mm_movedup_pd(a.v), b.v),
0247 _mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
0248 vec2d_swizzle1(b.v, 1, 0))));
0249 #else
0250 const __m128d mask = _mm_castsi128_pd(_mm_set_epi32(0x0,0x0,0x80000000,0x0));
0251 return Packet1cd(_mm_add_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 0, 0), b.v),
0252 _mm_xor_pd(_mm_mul_pd(vec2d_swizzle1(a.v, 1, 1),
0253 vec2d_swizzle1(b.v, 1, 0)), mask)));
0254 #endif
0255 }
0256
0257 template<> EIGEN_STRONG_INLINE Packet1cd ptrue <Packet1cd>(const Packet1cd& a) { return Packet1cd(ptrue(Packet2d(a.v))); }
0258 template<> EIGEN_STRONG_INLINE Packet1cd pand <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_and_pd(a.v,b.v)); }
0259 template<> EIGEN_STRONG_INLINE Packet1cd por <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_or_pd(a.v,b.v)); }
0260 template<> EIGEN_STRONG_INLINE Packet1cd pxor <Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_xor_pd(a.v,b.v)); }
0261 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b) { return Packet1cd(_mm_andnot_pd(b.v,a.v)); }
0262
0263
0264 template<> EIGEN_STRONG_INLINE Packet1cd pload <Packet1cd>(const std::complex<double>* from)
0265 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>((const double*)from)); }
0266 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
0267 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>((const double*)from)); }
0268 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
0269 { return ploadu<Packet1cd>(&from); }
0270
0271 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from) { return pset1<Packet1cd>(*from); }
0272
0273
0274 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((double*)to, Packet2d(from.v)); }
0275 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> * to, const Packet1cd& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((double*)to, Packet2d(from.v)); }
0276
0277 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> * addr) { _mm_prefetch((SsePrefetchPtrType)(addr), _MM_HINT_T0); }
0278
0279 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
0280 {
0281 EIGEN_ALIGN16 double res[2];
0282 _mm_store_pd(res, a.v);
0283 return std::complex<double>(res[0],res[1]);
0284 }
0285
0286 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
0287
0288 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a)
0289 {
0290 return pfirst(a);
0291 }
0292
0293 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a)
0294 {
0295 return pfirst(a);
0296 }
0297
0298 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
0299
0300 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0301 {
0302
0303 Packet1cd res = pmul(a,pconj(b));
0304 __m128d s = _mm_mul_pd(b.v,b.v);
0305 return Packet1cd(_mm_div_pd(res.v, _mm_add_pd(s,_mm_shuffle_pd(s, s, 0x1))));
0306 }
0307
0308 EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
0309 {
0310 return Packet1cd(preverse(Packet2d(x.v)));
0311 }
0312
0313 EIGEN_DEVICE_FUNC inline void
0314 ptranspose(PacketBlock<Packet2cf,2>& kernel) {
0315 __m128d w1 = _mm_castps_pd(kernel.packet[0].v);
0316 __m128d w2 = _mm_castps_pd(kernel.packet[1].v);
0317
0318 __m128 tmp = _mm_castpd_ps(_mm_unpackhi_pd(w1, w2));
0319 kernel.packet[0].v = _mm_castpd_ps(_mm_unpacklo_pd(w1, w2));
0320 kernel.packet[1].v = tmp;
0321 }
0322
0323 template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
0324 {
0325 __m128 eq = _mm_cmpeq_ps(a.v, b.v);
0326 return Packet2cf(pand<Packet4f>(eq, vec4f_swizzle1(eq, 1, 0, 3, 2)));
0327 }
0328
0329 template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
0330 {
0331 __m128d eq = _mm_cmpeq_pd(a.v, b.v);
0332 return Packet1cd(pand<Packet2d>(eq, vec2d_swizzle1(eq, 1, 0)));
0333 }
0334
0335 template<> EIGEN_STRONG_INLINE Packet2cf pblend(const Selector<2>& ifPacket, const Packet2cf& thenPacket, const Packet2cf& elsePacket) {
0336 __m128d result = pblend<Packet2d>(ifPacket, _mm_castps_pd(thenPacket.v), _mm_castps_pd(elsePacket.v));
0337 return Packet2cf(_mm_castpd_ps(result));
0338 }
0339
0340 template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
0341 return psqrt_complex<Packet1cd>(a);
0342 }
0343
0344 template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
0345 return psqrt_complex<Packet2cf>(a);
0346 }
0347
0348 }
0349 }
0350
0351 #endif