File indexing completed on 2025-01-19 09:51:42
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011 #ifndef EIGEN_COMPLEX_NEON_H
0012 #define EIGEN_COMPLEX_NEON_H
0013
0014 namespace Eigen {
0015
0016 namespace internal {
0017
0018 inline uint32x4_t p4ui_CONJ_XOR()
0019 {
0020
0021 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
0022 uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
0023 return ret;
0024 #else
0025 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
0026 return vld1q_u32( conj_XOR_DATA );
0027 #endif
0028 }
0029
0030 inline uint32x2_t p2ui_CONJ_XOR()
0031 {
0032 static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
0033 return vld1_u32( conj_XOR_DATA );
0034 }
0035
0036
0037
0038 struct Packet1cf
0039 {
0040 EIGEN_STRONG_INLINE Packet1cf() {}
0041 EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}
0042 Packet2f v;
0043 };
0044 struct Packet2cf
0045 {
0046 EIGEN_STRONG_INLINE Packet2cf() {}
0047 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
0048 Packet4f v;
0049 };
0050
0051 template<> struct packet_traits<std::complex<float> > : default_packet_traits
0052 {
0053 typedef Packet2cf type;
0054 typedef Packet1cf half;
0055 enum
0056 {
0057 Vectorizable = 1,
0058 AlignedOnScalar = 1,
0059 size = 2,
0060 HasHalfPacket = 1,
0061
0062 HasAdd = 1,
0063 HasSub = 1,
0064 HasMul = 1,
0065 HasDiv = 1,
0066 HasNegate = 1,
0067 HasAbs = 0,
0068 HasAbs2 = 0,
0069 HasMin = 0,
0070 HasMax = 0,
0071 HasSetLinear = 0
0072 };
0073 };
0074
0075 template<> struct unpacket_traits<Packet1cf>
0076 {
0077 typedef std::complex<float> type;
0078 typedef Packet1cf half;
0079 typedef Packet2f as_real;
0080 enum
0081 {
0082 size = 1,
0083 alignment = Aligned16,
0084 vectorizable = true,
0085 masked_load_available = false,
0086 masked_store_available = false
0087 };
0088 };
0089 template<> struct unpacket_traits<Packet2cf>
0090 {
0091 typedef std::complex<float> type;
0092 typedef Packet1cf half;
0093 typedef Packet4f as_real;
0094 enum
0095 {
0096 size = 2,
0097 alignment = Aligned16,
0098 vectorizable = true,
0099 masked_load_available = false,
0100 masked_store_available = false
0101 };
0102 };
0103
0104 template<> EIGEN_STRONG_INLINE Packet1cf pcast<float,Packet1cf>(const float& a)
0105 { return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0)); }
0106 template<> EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f,Packet2cf>(const Packet2f& a)
0107 { return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); }
0108
0109 template<> EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from)
0110 { return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from))); }
0111 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
0112 {
0113 const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));
0114 return Packet2cf(vcombine_f32(r64, r64));
0115 }
0116
0117 template<> EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0118 { return Packet1cf(padd<Packet2f>(a.v, b.v)); }
0119 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0120 { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
0121
0122 template<> EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0123 { return Packet1cf(psub<Packet2f>(a.v, b.v)); }
0124 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0125 { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
0126
0127 template<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); }
0128 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
0129
0130 template<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)
0131 {
0132 const Packet2ui b = vreinterpret_u32_f32(a.v);
0133 return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
0134 }
0135 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
0136 {
0137 const Packet4ui b = vreinterpretq_u32_f32(a.v);
0138 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
0139 }
0140
0141 template<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0142 {
0143 Packet2f v1, v2;
0144
0145
0146 v1 = vdup_lane_f32(a.v, 0);
0147
0148 v2 = vdup_lane_f32(a.v, 1);
0149
0150 v1 = vmul_f32(v1, b.v);
0151
0152 v2 = vmul_f32(v2, b.v);
0153
0154 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
0155
0156 v2 = vrev64_f32(v2);
0157
0158 return Packet1cf(vadd_f32(v1, v2));
0159 }
0160 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0161 {
0162 Packet4f v1, v2;
0163
0164
0165 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
0166
0167 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
0168
0169 v1 = vmulq_f32(v1, b.v);
0170
0171 v2 = vmulq_f32(v2, b.v);
0172
0173 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
0174
0175 v2 = vrev64q_f32(v2);
0176
0177 return Packet2cf(vaddq_f32(v1, v2));
0178 }
0179
0180 template<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b)
0181 {
0182
0183
0184 Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);
0185
0186
0187 Packet2f eq_swapped = vrev64_f32(eq);
0188
0189 return Packet1cf(pand<Packet2f>(eq, eq_swapped));
0190 }
0191 template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
0192 {
0193
0194
0195 Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
0196
0197
0198 Packet4f eq_swapped = vrev64q_f32(eq);
0199
0200 return Packet2cf(pand<Packet4f>(eq, eq_swapped));
0201 }
0202
0203 template<> EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0204 { return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
0205 template<> EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0206 { return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
0207
0208 template<> EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0209 { return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
0210 template<> EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0211 { return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
0212
0213 template<> EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0214 { return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
0215 template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0216 { return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
0217
0218 template<> EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0219 { return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
0220 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0221 { return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
0222
0223 template<> EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from)
0224 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from)); }
0225 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from)
0226 { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from))); }
0227
0228 template<> EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from)
0229 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from)); }
0230 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)
0231 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from))); }
0232
0233 template<> EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from)
0234 { return pset1<Packet1cf>(*from); }
0235 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
0236 { return pset1<Packet2cf>(*from); }
0237
0238 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
0239 { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
0240 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
0241 { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v); }
0242
0243 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
0244 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
0245 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
0246 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v); }
0247
0248 template<> EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(
0249 const std::complex<float>* from, Index stride)
0250 {
0251 const Packet2f tmp = vdup_n_f32(std::real(from[0*stride]));
0252 return Packet1cf(vset_lane_f32(std::imag(from[0*stride]), tmp, 1));
0253 }
0254 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(
0255 const std::complex<float>* from, Index stride)
0256 {
0257 Packet4f res = vdupq_n_f32(std::real(from[0*stride]));
0258 res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
0259 res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
0260 res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
0261 return Packet2cf(res);
0262 }
0263
0264 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(
0265 std::complex<float>* to, const Packet1cf& from, Index stride)
0266 { to[stride*0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1)); }
0267 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(
0268 std::complex<float>* to, const Packet2cf& from, Index stride)
0269 {
0270 to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
0271 to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
0272 }
0273
0274 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *addr)
0275 { EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr)); }
0276
0277 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a)
0278 {
0279 EIGEN_ALIGN16 std::complex<float> x;
0280 vst1_f32(reinterpret_cast<float*>(&x), a.v);
0281 return x;
0282 }
0283 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
0284 {
0285 EIGEN_ALIGN16 std::complex<float> x[2];
0286 vst1q_f32(reinterpret_cast<float*>(x), a.v);
0287 return x[0];
0288 }
0289
0290 template<> EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) { return a; }
0291 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
0292 { return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v))); }
0293
0294 template<> EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a)
0295 { return Packet1cf(vrev64_f32(a.v)); }
0296 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
0297 { return Packet2cf(vrev64q_f32(a.v)); }
0298
0299 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a)
0300 {
0301 std::complex<float> s;
0302 vst1_f32((float *)&s, a.v);
0303 return s;
0304 }
0305 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
0306 {
0307 std::complex<float> s;
0308 vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));
0309 return s;
0310 }
0311
0312 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a)
0313 {
0314 std::complex<float> s;
0315 vst1_f32((float *)&s, a.v);
0316 return s;
0317 }
0318 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
0319 {
0320 float32x2_t a1, a2, v1, v2, prod;
0321 std::complex<float> s;
0322
0323 a1 = vget_low_f32(a.v);
0324 a2 = vget_high_f32(a.v);
0325
0326 v1 = vdup_lane_f32(a1, 0);
0327
0328 v2 = vdup_lane_f32(a1, 1);
0329
0330 v1 = vmul_f32(v1, a2);
0331
0332 v2 = vmul_f32(v2, a2);
0333
0334 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
0335
0336 v2 = vrev64_f32(v2);
0337
0338 prod = vadd_f32(v1, v2);
0339
0340 vst1_f32(reinterpret_cast<float*>(&s), prod);
0341
0342 return s;
0343 }
0344
0345 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cf,Packet2f)
0346 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet2cf,Packet4f)
0347
0348 template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
0349 {
0350
0351 Packet1cf res = pmul(a, pconj(b));
0352 Packet2f s, rev_s;
0353
0354
0355 s = vmul_f32(b.v, b.v);
0356 rev_s = vrev64_f32(s);
0357
0358 return Packet1cf(pdiv<Packet2f>(res.v, vadd_f32(s, rev_s)));
0359 }
0360 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
0361 {
0362
0363 Packet2cf res = pmul(a,pconj(b));
0364 Packet4f s, rev_s;
0365
0366
0367 s = vmulq_f32(b.v, b.v);
0368 rev_s = vrev64q_f32(s);
0369
0370 return Packet2cf(pdiv<Packet4f>(res.v, vaddq_f32(s, rev_s)));
0371 }
0372
0373 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& ) {}
0374 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel)
0375 {
0376 Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
0377 kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
0378 kernel.packet[1].v = tmp;
0379 }
0380
0381 template<> EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {
0382 return psqrt_complex<Packet1cf>(a);
0383 }
0384
0385 template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
0386 return psqrt_complex<Packet2cf>(a);
0387 }
0388
0389
0390 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
0391
0392
0393 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
0394 static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
0395 #else
0396 const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
0397 static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
0398 #endif
0399
0400 struct Packet1cd
0401 {
0402 EIGEN_STRONG_INLINE Packet1cd() {}
0403 EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
0404 Packet2d v;
0405 };
0406
0407 template<> struct packet_traits<std::complex<double> > : default_packet_traits
0408 {
0409 typedef Packet1cd type;
0410 typedef Packet1cd half;
0411 enum
0412 {
0413 Vectorizable = 1,
0414 AlignedOnScalar = 0,
0415 size = 1,
0416 HasHalfPacket = 0,
0417
0418 HasAdd = 1,
0419 HasSub = 1,
0420 HasMul = 1,
0421 HasDiv = 1,
0422 HasNegate = 1,
0423 HasAbs = 0,
0424 HasAbs2 = 0,
0425 HasMin = 0,
0426 HasMax = 0,
0427 HasSetLinear = 0
0428 };
0429 };
0430
0431 template<> struct unpacket_traits<Packet1cd>
0432 {
0433 typedef std::complex<double> type;
0434 typedef Packet1cd half;
0435 typedef Packet2d as_real;
0436 enum
0437 {
0438 size=1,
0439 alignment=Aligned16,
0440 vectorizable=true,
0441 masked_load_available=false,
0442 masked_store_available=false
0443 };
0444 };
0445
0446 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from)
0447 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from))); }
0448
0449 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
0450 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from))); }
0451
0452 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
0453 {
0454
0455 return ploadu<Packet1cd>(&from);
0456 }
0457
0458 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0459 { return Packet1cd(padd<Packet2d>(a.v, b.v)); }
0460
0461 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0462 { return Packet1cd(psub<Packet2d>(a.v, b.v)); }
0463
0464 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)
0465 { return Packet1cd(pnegate<Packet2d>(a.v)); }
0466
0467 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
0468 { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
0469
0470 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0471 {
0472 Packet2d v1, v2;
0473
0474
0475 v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
0476
0477 v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
0478
0479 v1 = vmulq_f64(v1, b.v);
0480
0481 v2 = vmulq_f64(v2, b.v);
0482
0483 v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
0484
0485 v2 = preverse<Packet2d>(v2);
0486
0487 return Packet1cd(vaddq_f64(v1, v2));
0488 }
0489
0490 template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
0491 {
0492
0493
0494 Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
0495
0496
0497 Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));
0498
0499 return Packet1cd(pand<Packet2d>(eq, eq_swapped));
0500 }
0501
0502 template<> EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0503 { return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
0504
0505 template<> EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0506 { return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
0507
0508 template<> EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0509 { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
0510
0511 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0512 { return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
0513
0514 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
0515 { return pset1<Packet1cd>(*from); }
0516
0517 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
0518 { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v); }
0519
0520 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
0521 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v); }
0522
0523 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *addr)
0524 { EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr)); }
0525
0526 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(
0527 const std::complex<double>* from, Index stride)
0528 {
0529 Packet2d res = pset1<Packet2d>(0.0);
0530 res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
0531 res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
0532 return Packet1cd(res);
0533 }
0534
0535 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(
0536 std::complex<double>* to, const Packet1cd& from, Index stride)
0537 { to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); }
0538
0539 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
0540 {
0541 EIGEN_ALIGN16 std::complex<double> res;
0542 pstore<std::complex<double> >(&res, a);
0543 return res;
0544 }
0545
0546 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
0547
0548 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
0549
0550 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
0551
0552 EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(Packet1cd,Packet2d)
0553
0554 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
0555 {
0556
0557 Packet1cd res = pmul(a,pconj(b));
0558 Packet2d s = pmul<Packet2d>(b.v, b.v);
0559 Packet2d rev_s = preverse<Packet2d>(s);
0560
0561 return Packet1cd(pdiv(res.v, padd<Packet2d>(s,rev_s)));
0562 }
0563
0564 EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd& x)
0565 { return Packet1cd(preverse(Packet2d(x.v))); }
0566
0567 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
0568 {
0569 Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
0570 kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
0571 kernel.packet[1].v = tmp;
0572 }
0573
0574 template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
0575 return psqrt_complex<Packet1cd>(a);
0576 }
0577
0578 #endif
0579
0580 }
0581
0582 }
0583
0584 #endif