File indexing completed on 2025-01-19 09:51:41
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013 #ifndef EIGEN_PACKET_MATH_MSA_H
0014 #define EIGEN_PACKET_MATH_MSA_H
0015
0016 #include <iostream>
0017 #include <string>
0018
0019 namespace Eigen {
0020
0021 namespace internal {
0022
0023 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
0024 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
0025 #endif
0026
0027 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
0028 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
0029 #endif
0030
0031 #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
0032 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
0033 #endif
0034
0035 #if 0
0036 #define EIGEN_MSA_DEBUG \
0037 static bool firstTime = true; \
0038 do { \
0039 if (firstTime) { \
0040 std::cout << __FILE__ << ':' << __LINE__ << ':' << __FUNCTION__ << std::endl; \
0041 firstTime = false; \
0042 } \
0043 } while (0)
0044 #else
0045 #define EIGEN_MSA_DEBUG
0046 #endif
0047
0048 #define EIGEN_MSA_SHF_I8(a, b, c, d) (((d) << 6) | ((c) << 4) | ((b) << 2) | (a))
0049
0050 typedef v4f32 Packet4f;
0051 typedef v4i32 Packet4i;
0052 typedef v4u32 Packet4ui;
0053
0054 #define _EIGEN_DECLARE_CONST_Packet4f(NAME, X) const Packet4f p4f_##NAME = { X, X, X, X }
0055 #define _EIGEN_DECLARE_CONST_Packet4i(NAME, X) const Packet4i p4i_##NAME = { X, X, X, X }
0056 #define _EIGEN_DECLARE_CONST_Packet4ui(NAME, X) const Packet4ui p4ui_##NAME = { X, X, X, X }
0057
0058 inline std::ostream& operator<<(std::ostream& os, const Packet4f& value) {
0059 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
0060 return os;
0061 }
0062
0063 inline std::ostream& operator<<(std::ostream& os, const Packet4i& value) {
0064 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
0065 return os;
0066 }
0067
0068 inline std::ostream& operator<<(std::ostream& os, const Packet4ui& value) {
0069 os << "[ " << value[0] << ", " << value[1] << ", " << value[2] << ", " << value[3] << " ]";
0070 return os;
0071 }
0072
0073 template <>
0074 struct packet_traits<float> : default_packet_traits {
0075 typedef Packet4f type;
0076 typedef Packet4f half;
0077 enum {
0078 Vectorizable = 1,
0079 AlignedOnScalar = 1,
0080 size = 4,
0081 HasHalfPacket = 0,
0082
0083 HasDiv = 1,
0084 HasSin = EIGEN_FAST_MATH,
0085 HasCos = EIGEN_FAST_MATH,
0086 HasTanh = EIGEN_FAST_MATH,
0087 HasErf = EIGEN_FAST_MATH,
0088 HasLog = 1,
0089 HasExp = 1,
0090 HasSqrt = 1,
0091 HasRsqrt = 1,
0092 HasRound = 1,
0093 HasFloor = 1,
0094 HasCeil = 1,
0095 HasBlend = 1
0096 };
0097 };
0098
0099 template <>
0100 struct packet_traits<int32_t> : default_packet_traits {
0101 typedef Packet4i type;
0102 typedef Packet4i half;
0103 enum {
0104 Vectorizable = 1,
0105 AlignedOnScalar = 1,
0106 size = 4,
0107 HasHalfPacket = 0,
0108
0109 HasDiv = 1,
0110 HasBlend = 1
0111 };
0112 };
0113
0114 template <>
0115 struct unpacket_traits<Packet4f> {
0116 typedef float type;
0117 enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
0118 typedef Packet4f half;
0119 };
0120
0121 template <>
0122 struct unpacket_traits<Packet4i> {
0123 typedef int32_t type;
0124 enum { size = 4, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
0125 typedef Packet4i half;
0126 };
0127
0128 template <>
0129 EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from) {
0130 EIGEN_MSA_DEBUG;
0131
0132 Packet4f v = { from, from, from, from };
0133 return v;
0134 }
0135
0136 template <>
0137 EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int32_t& from) {
0138 EIGEN_MSA_DEBUG;
0139
0140 return __builtin_msa_fill_w(from);
0141 }
0142
0143 template <>
0144 EIGEN_STRONG_INLINE Packet4f pload1<Packet4f>(const float* from) {
0145 EIGEN_MSA_DEBUG;
0146
0147 float f = *from;
0148 Packet4f v = { f, f, f, f };
0149 return v;
0150 }
0151
0152 template <>
0153 EIGEN_STRONG_INLINE Packet4i pload1<Packet4i>(const int32_t* from) {
0154 EIGEN_MSA_DEBUG;
0155
0156 return __builtin_msa_fill_w(*from);
0157 }
0158
0159 template <>
0160 EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) {
0161 EIGEN_MSA_DEBUG;
0162
0163 return __builtin_msa_fadd_w(a, b);
0164 }
0165
0166 template <>
0167 EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) {
0168 EIGEN_MSA_DEBUG;
0169
0170 return __builtin_msa_addv_w(a, b);
0171 }
0172
0173 template <>
0174 EIGEN_STRONG_INLINE Packet4f plset<Packet4f>(const float& a) {
0175 EIGEN_MSA_DEBUG;
0176
0177 static const Packet4f countdown = { 0.0f, 1.0f, 2.0f, 3.0f };
0178 return padd(pset1<Packet4f>(a), countdown);
0179 }
0180
0181 template <>
0182 EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int32_t& a) {
0183 EIGEN_MSA_DEBUG;
0184
0185 static const Packet4i countdown = { 0, 1, 2, 3 };
0186 return padd(pset1<Packet4i>(a), countdown);
0187 }
0188
0189 template <>
0190 EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) {
0191 EIGEN_MSA_DEBUG;
0192
0193 return __builtin_msa_fsub_w(a, b);
0194 }
0195
0196 template <>
0197 EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) {
0198 EIGEN_MSA_DEBUG;
0199
0200 return __builtin_msa_subv_w(a, b);
0201 }
0202
0203 template <>
0204 EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a) {
0205 EIGEN_MSA_DEBUG;
0206
0207 return (Packet4f)__builtin_msa_bnegi_w((v4u32)a, 31);
0208 }
0209
0210 template <>
0211 EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) {
0212 EIGEN_MSA_DEBUG;
0213
0214 return __builtin_msa_addvi_w((v4i32)__builtin_msa_nori_b((v16u8)a, 0), 1);
0215 }
0216
0217 template <>
0218 EIGEN_STRONG_INLINE Packet4f pconj(const Packet4f& a) {
0219 EIGEN_MSA_DEBUG;
0220
0221 return a;
0222 }
0223
0224 template <>
0225 EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) {
0226 EIGEN_MSA_DEBUG;
0227
0228 return a;
0229 }
0230
0231 template <>
0232 EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) {
0233 EIGEN_MSA_DEBUG;
0234
0235 return __builtin_msa_fmul_w(a, b);
0236 }
0237
0238 template <>
0239 EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) {
0240 EIGEN_MSA_DEBUG;
0241
0242 return __builtin_msa_mulv_w(a, b);
0243 }
0244
0245 template <>
0246 EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) {
0247 EIGEN_MSA_DEBUG;
0248
0249 return __builtin_msa_fdiv_w(a, b);
0250 }
0251
0252 template <>
0253 EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) {
0254 EIGEN_MSA_DEBUG;
0255
0256 return __builtin_msa_div_s_w(a, b);
0257 }
0258
0259 template <>
0260 EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c) {
0261 EIGEN_MSA_DEBUG;
0262
0263 return __builtin_msa_fmadd_w(c, a, b);
0264 }
0265
0266 template <>
0267 EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) {
0268 EIGEN_MSA_DEBUG;
0269
0270
0271 Packet4i value = c;
0272 __asm__("maddv.w %w[value], %w[a], %w[b]\n"
0273
0274 : [value] "+f"(value)
0275
0276 : [a] "f"(a), [b] "f"(b));
0277 return value;
0278 }
0279
0280 template <>
0281 EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b) {
0282 EIGEN_MSA_DEBUG;
0283
0284 return (Packet4f)__builtin_msa_and_v((v16u8)a, (v16u8)b);
0285 }
0286
0287 template <>
0288 EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) {
0289 EIGEN_MSA_DEBUG;
0290
0291 return (Packet4i)__builtin_msa_and_v((v16u8)a, (v16u8)b);
0292 }
0293
0294 template <>
0295 EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b) {
0296 EIGEN_MSA_DEBUG;
0297
0298 return (Packet4f)__builtin_msa_or_v((v16u8)a, (v16u8)b);
0299 }
0300
0301 template <>
0302 EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) {
0303 EIGEN_MSA_DEBUG;
0304
0305 return (Packet4i)__builtin_msa_or_v((v16u8)a, (v16u8)b);
0306 }
0307
0308 template <>
0309 EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b) {
0310 EIGEN_MSA_DEBUG;
0311
0312 return (Packet4f)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
0313 }
0314
0315 template <>
0316 EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) {
0317 EIGEN_MSA_DEBUG;
0318
0319 return (Packet4i)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
0320 }
0321
0322 template <>
0323 EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) {
0324 EIGEN_MSA_DEBUG;
0325
0326 return pand(a, (Packet4f)__builtin_msa_xori_b((v16u8)b, 255));
0327 }
0328
0329 template <>
0330 EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) {
0331 EIGEN_MSA_DEBUG;
0332
0333 return pand(a, (Packet4i)__builtin_msa_xori_b((v16u8)b, 255));
0334 }
0335
0336 template <>
0337 EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b) {
0338 EIGEN_MSA_DEBUG;
0339
0340 #if EIGEN_FAST_MATH
0341
0342 return __builtin_msa_fmin_w(a, b);
0343 #else
0344
0345 Packet4i aNaN = __builtin_msa_fcun_w(a, a);
0346 Packet4i aMinOrNaN = por(__builtin_msa_fclt_w(a, b), aNaN);
0347 return (Packet4f)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
0348 #endif
0349 }
0350
0351 template <>
0352 EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) {
0353 EIGEN_MSA_DEBUG;
0354
0355 return __builtin_msa_min_s_w(a, b);
0356 }
0357
0358 template <>
0359 EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b) {
0360 EIGEN_MSA_DEBUG;
0361
0362 #if EIGEN_FAST_MATH
0363
0364 return __builtin_msa_fmax_w(a, b);
0365 #else
0366
0367 Packet4i aNaN = __builtin_msa_fcun_w(a, a);
0368 Packet4i aMaxOrNaN = por(__builtin_msa_fclt_w(b, a), aNaN);
0369 return (Packet4f)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
0370 #endif
0371 }
0372
0373 template <>
0374 EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) {
0375 EIGEN_MSA_DEBUG;
0376
0377 return __builtin_msa_max_s_w(a, b);
0378 }
0379
0380 template <>
0381 EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from) {
0382 EIGEN_MSA_DEBUG;
0383
0384 EIGEN_DEBUG_ALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
0385 }
0386
0387 template <>
0388 EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int32_t* from) {
0389 EIGEN_MSA_DEBUG;
0390
0391 EIGEN_DEBUG_ALIGNED_LOAD return __builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
0392 }
0393
0394 template <>
0395 EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f>(const float* from) {
0396 EIGEN_MSA_DEBUG;
0397
0398 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4f)__builtin_msa_ld_w(const_cast<float*>(from), 0);
0399 }
0400
0401 template <>
0402 EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int32_t* from) {
0403 EIGEN_MSA_DEBUG;
0404
0405 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet4i)__builtin_msa_ld_w(const_cast<int32_t*>(from), 0);
0406 }
0407
0408 template <>
0409 EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from) {
0410 EIGEN_MSA_DEBUG;
0411
0412 float f0 = from[0], f1 = from[1];
0413 Packet4f v0 = { f0, f0, f0, f0 };
0414 Packet4f v1 = { f1, f1, f1, f1 };
0415 return (Packet4f)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
0416 }
0417
0418 template <>
0419 EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int32_t* from) {
0420 EIGEN_MSA_DEBUG;
0421
0422 int32_t i0 = from[0], i1 = from[1];
0423 Packet4i v0 = { i0, i0, i0, i0 };
0424 Packet4i v1 = { i1, i1, i1, i1 };
0425 return (Packet4i)__builtin_msa_ilvr_d((v2i64)v1, (v2i64)v0);
0426 }
0427
0428 template <>
0429 EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from) {
0430 EIGEN_MSA_DEBUG;
0431
0432 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
0433 }
0434
0435 template <>
0436 EIGEN_STRONG_INLINE void pstore<int32_t>(int32_t* to, const Packet4i& from) {
0437 EIGEN_MSA_DEBUG;
0438
0439 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_w(from, to, 0);
0440 }
0441
0442 template <>
0443 EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) {
0444 EIGEN_MSA_DEBUG;
0445
0446 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w((Packet4i)from, to, 0);
0447 }
0448
0449 template <>
0450 EIGEN_STRONG_INLINE void pstoreu<int32_t>(int32_t* to, const Packet4i& from) {
0451 EIGEN_MSA_DEBUG;
0452
0453 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_w(from, to, 0);
0454 }
0455
0456 template <>
0457 EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride) {
0458 EIGEN_MSA_DEBUG;
0459
0460 float f = *from;
0461 Packet4f v = { f, f, f, f };
0462 v[1] = from[stride];
0463 v[2] = from[2 * stride];
0464 v[3] = from[3 * stride];
0465 return v;
0466 }
0467
0468 template <>
0469 EIGEN_DEVICE_FUNC inline Packet4i pgather<int32_t, Packet4i>(const int32_t* from, Index stride) {
0470 EIGEN_MSA_DEBUG;
0471
0472 int32_t i = *from;
0473 Packet4i v = { i, i, i, i };
0474 v[1] = from[stride];
0475 v[2] = from[2 * stride];
0476 v[3] = from[3 * stride];
0477 return v;
0478 }
0479
0480 template <>
0481 EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from,
0482 Index stride) {
0483 EIGEN_MSA_DEBUG;
0484
0485 *to = from[0];
0486 to += stride;
0487 *to = from[1];
0488 to += stride;
0489 *to = from[2];
0490 to += stride;
0491 *to = from[3];
0492 }
0493
0494 template <>
0495 EIGEN_DEVICE_FUNC inline void pscatter<int32_t, Packet4i>(int32_t* to, const Packet4i& from,
0496 Index stride) {
0497 EIGEN_MSA_DEBUG;
0498
0499 *to = from[0];
0500 to += stride;
0501 *to = from[1];
0502 to += stride;
0503 *to = from[2];
0504 to += stride;
0505 *to = from[3];
0506 }
0507
0508 template <>
0509 EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) {
0510 EIGEN_MSA_DEBUG;
0511
0512 __builtin_prefetch(addr);
0513 }
0514
0515 template <>
0516 EIGEN_STRONG_INLINE void prefetch<int32_t>(const int32_t* addr) {
0517 EIGEN_MSA_DEBUG;
0518
0519 __builtin_prefetch(addr);
0520 }
0521
0522 template <>
0523 EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) {
0524 EIGEN_MSA_DEBUG;
0525
0526 return a[0];
0527 }
0528
0529 template <>
0530 EIGEN_STRONG_INLINE int32_t pfirst<Packet4i>(const Packet4i& a) {
0531 EIGEN_MSA_DEBUG;
0532
0533 return a[0];
0534 }
0535
0536 template <>
0537 EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a) {
0538 EIGEN_MSA_DEBUG;
0539
0540 return (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
0541 }
0542
0543 template <>
0544 EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a) {
0545 EIGEN_MSA_DEBUG;
0546
0547 return __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(3, 2, 1, 0));
0548 }
0549
0550 template <>
0551 EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f& a) {
0552 EIGEN_MSA_DEBUG;
0553
0554 return (Packet4f)__builtin_msa_bclri_w((v4u32)a, 31);
0555 }
0556
0557 template <>
0558 EIGEN_STRONG_INLINE Packet4i pabs(const Packet4i& a) {
0559 EIGEN_MSA_DEBUG;
0560
0561 Packet4i zero = __builtin_msa_ldi_w(0);
0562 return __builtin_msa_add_a_w(zero, a);
0563 }
0564
0565 template <>
0566 EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a) {
0567 EIGEN_MSA_DEBUG;
0568
0569 Packet4f s = padd(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
0570 s = padd(s, (Packet4f)__builtin_msa_shf_w((v4i32)s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0571 return s[0];
0572 }
0573
0574
0575 template <>
0576 EIGEN_STRONG_INLINE int32_t predux<Packet4i>(const Packet4i& a) {
0577 EIGEN_MSA_DEBUG;
0578
0579 Packet4i s = padd(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
0580 s = padd(s, __builtin_msa_shf_w(s, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0581 return s[0];
0582 }
0583
0584
0585
0586 template <>
0587 EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a) {
0588 EIGEN_MSA_DEBUG;
0589
0590 Packet4f p = pmul(a, (Packet4f)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
0591 p = pmul(p, (Packet4f)__builtin_msa_shf_w((v4i32)p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0592 return p[0];
0593 }
0594
0595 template <>
0596 EIGEN_STRONG_INLINE int32_t predux_mul<Packet4i>(const Packet4i& a) {
0597 EIGEN_MSA_DEBUG;
0598
0599 Packet4i p = pmul(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
0600 p = pmul(p, __builtin_msa_shf_w(p, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0601 return p[0];
0602 }
0603
0604
0605 template <>
0606 EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a) {
0607 EIGEN_MSA_DEBUG;
0608
0609
0610 Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
0611 #if !EIGEN_FAST_MATH
0612
0613
0614 v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
0615
0616 unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
0617 #endif
0618
0619 Packet4f v = __builtin_msa_fmin_w(a, swapped);
0620 v = __builtin_msa_fmin_w(
0621 v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0622 #if !EIGEN_FAST_MATH
0623
0624 v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
0625 v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
0626 #endif
0627 return v[0];
0628 }
0629
0630 template <>
0631 EIGEN_STRONG_INLINE int32_t predux_min<Packet4i>(const Packet4i& a) {
0632 EIGEN_MSA_DEBUG;
0633
0634 Packet4i m = pmin(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
0635 m = pmin(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0636 return m[0];
0637 }
0638
0639
0640 template <>
0641 EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a) {
0642 EIGEN_MSA_DEBUG;
0643
0644
0645 Packet4f swapped = (Packet4f)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
0646 #if !EIGEN_FAST_MATH
0647
0648
0649 v16u8 unord = (v16u8)__builtin_msa_fcun_w(a, swapped);
0650
0651 unord = (v16u8)__builtin_msa_ceqi_d((v2i64)unord, 0);
0652 #endif
0653
0654 Packet4f v = __builtin_msa_fmax_w(a, swapped);
0655 v = __builtin_msa_fmax_w(
0656 v, (Packet4f)__builtin_msa_shf_w((Packet4i)v, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0657 #if !EIGEN_FAST_MATH
0658
0659 v16u8 qnans = (v16u8)__builtin_msa_fill_w(0x7FC00000);
0660 v = (Packet4f)__builtin_msa_bsel_v(unord, qnans, (v16u8)v);
0661 #endif
0662 return v[0];
0663 }
0664
0665 template <>
0666 EIGEN_STRONG_INLINE int32_t predux_max<Packet4i>(const Packet4i& a) {
0667 EIGEN_MSA_DEBUG;
0668
0669 Packet4i m = pmax(a, __builtin_msa_shf_w(a, EIGEN_MSA_SHF_I8(2, 3, 0, 1)));
0670 m = pmax(m, __builtin_msa_shf_w(m, EIGEN_MSA_SHF_I8(1, 0, 3, 2)));
0671 return m[0];
0672 }
0673
0674 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4f, 4>& value) {
0675 os << "[ " << value.packet[0] << "," << std::endl
0676 << " " << value.packet[1] << "," << std::endl
0677 << " " << value.packet[2] << "," << std::endl
0678 << " " << value.packet[3] << " ]";
0679 return os;
0680 }
0681
0682 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f, 4>& kernel) {
0683 EIGEN_MSA_DEBUG;
0684
0685 v4i32 tmp1, tmp2, tmp3, tmp4;
0686
0687 tmp1 = __builtin_msa_ilvr_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
0688 tmp2 = __builtin_msa_ilvr_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
0689 tmp3 = __builtin_msa_ilvl_w((v4i32)kernel.packet[1], (v4i32)kernel.packet[0]);
0690 tmp4 = __builtin_msa_ilvl_w((v4i32)kernel.packet[3], (v4i32)kernel.packet[2]);
0691
0692 kernel.packet[0] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
0693 kernel.packet[1] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
0694 kernel.packet[2] = (Packet4f)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
0695 kernel.packet[3] = (Packet4f)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
0696 }
0697
0698 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet4i, 4>& value) {
0699 os << "[ " << value.packet[0] << "," << std::endl
0700 << " " << value.packet[1] << "," << std::endl
0701 << " " << value.packet[2] << "," << std::endl
0702 << " " << value.packet[3] << " ]";
0703 return os;
0704 }
0705
0706 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4i, 4>& kernel) {
0707 EIGEN_MSA_DEBUG;
0708
0709 v4i32 tmp1, tmp2, tmp3, tmp4;
0710
0711 tmp1 = __builtin_msa_ilvr_w(kernel.packet[1], kernel.packet[0]);
0712 tmp2 = __builtin_msa_ilvr_w(kernel.packet[3], kernel.packet[2]);
0713 tmp3 = __builtin_msa_ilvl_w(kernel.packet[1], kernel.packet[0]);
0714 tmp4 = __builtin_msa_ilvl_w(kernel.packet[3], kernel.packet[2]);
0715
0716 kernel.packet[0] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp2, (v2i64)tmp1);
0717 kernel.packet[1] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp2, (v2i64)tmp1);
0718 kernel.packet[2] = (Packet4i)__builtin_msa_ilvr_d((v2i64)tmp4, (v2i64)tmp3);
0719 kernel.packet[3] = (Packet4i)__builtin_msa_ilvod_d((v2i64)tmp4, (v2i64)tmp3);
0720 }
0721
0722 template <>
0723 EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f& a) {
0724 EIGEN_MSA_DEBUG;
0725
0726 return __builtin_msa_fsqrt_w(a);
0727 }
0728
0729 template <>
0730 EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f& a) {
0731 EIGEN_MSA_DEBUG;
0732
0733 #if EIGEN_FAST_MATH
0734 return __builtin_msa_frsqrt_w(a);
0735 #else
0736 Packet4f ones = __builtin_msa_ffint_s_w(__builtin_msa_ldi_w(1));
0737 return pdiv(ones, psqrt(a));
0738 #endif
0739 }
0740
0741 template <>
0742 EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a) {
0743 Packet4f v = a;
0744 int32_t old_mode, new_mode;
0745 asm volatile(
0746 "cfcmsa %[old_mode], $1\n"
0747 "ori %[new_mode], %[old_mode], 3\n"
0748 "ctcmsa $1, %[new_mode]\n"
0749 "frint.w %w[v], %w[v]\n"
0750 "ctcmsa $1, %[old_mode]\n"
0751 :
0752 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
0753 [v] "+f"(v)
0754 :
0755 :
0756 );
0757 return v;
0758 }
0759
0760 template <>
0761 EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a) {
0762 Packet4f v = a;
0763 int32_t old_mode, new_mode;
0764 asm volatile(
0765 "cfcmsa %[old_mode], $1\n"
0766 "ori %[new_mode], %[old_mode], 3\n"
0767 "xori %[new_mode], %[new_mode], 1\n"
0768 "ctcmsa $1, %[new_mode]\n"
0769 "frint.w %w[v], %w[v]\n"
0770 "ctcmsa $1, %[old_mode]\n"
0771 :
0772 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
0773 [v] "+f"(v)
0774 :
0775 :
0776 );
0777 return v;
0778 }
0779
0780 template <>
0781 EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a) {
0782 Packet4f v = a;
0783 int32_t old_mode, new_mode;
0784 asm volatile(
0785 "cfcmsa %[old_mode], $1\n"
0786 "ori %[new_mode], %[old_mode], 3\n"
0787 "xori %[new_mode], %[new_mode], 3\n"
0788 "ctcmsa $1, %[new_mode]\n"
0789 "frint.w %w[v], %w[v]\n"
0790 "ctcmsa $1, %[old_mode]\n"
0791 :
0792 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
0793 [v] "+f"(v)
0794 :
0795 :
0796 );
0797 return v;
0798 }
0799
0800 template <>
0801 EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket,
0802 const Packet4f& elsePacket) {
0803 Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
0804 ifPacket.select[3] };
0805 Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
0806 return (Packet4f)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
0807 }
0808
0809 template <>
0810 EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket,
0811 const Packet4i& elsePacket) {
0812 Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2],
0813 ifPacket.select[3] };
0814 Packet4i mask = __builtin_msa_ceqi_w((Packet4i)select, 0);
0815 return (Packet4i)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
0816 }
0817
0818
0819
0820 typedef v2f64 Packet2d;
0821 typedef v2i64 Packet2l;
0822 typedef v2u64 Packet2ul;
0823
0824 #define _EIGEN_DECLARE_CONST_Packet2d(NAME, X) const Packet2d p2d_##NAME = { X, X }
0825 #define _EIGEN_DECLARE_CONST_Packet2l(NAME, X) const Packet2l p2l_##NAME = { X, X }
0826 #define _EIGEN_DECLARE_CONST_Packet2ul(NAME, X) const Packet2ul p2ul_##NAME = { X, X }
0827
0828 inline std::ostream& operator<<(std::ostream& os, const Packet2d& value) {
0829 os << "[ " << value[0] << ", " << value[1] << " ]";
0830 return os;
0831 }
0832
0833 inline std::ostream& operator<<(std::ostream& os, const Packet2l& value) {
0834 os << "[ " << value[0] << ", " << value[1] << " ]";
0835 return os;
0836 }
0837
0838 inline std::ostream& operator<<(std::ostream& os, const Packet2ul& value) {
0839 os << "[ " << value[0] << ", " << value[1] << " ]";
0840 return os;
0841 }
0842
0843 template <>
0844 struct packet_traits<double> : default_packet_traits {
0845 typedef Packet2d type;
0846 typedef Packet2d half;
0847 enum {
0848 Vectorizable = 1,
0849 AlignedOnScalar = 1,
0850 size = 2,
0851 HasHalfPacket = 0,
0852
0853 HasDiv = 1,
0854 HasExp = 1,
0855 HasSqrt = 1,
0856 HasRsqrt = 1,
0857 HasRound = 1,
0858 HasFloor = 1,
0859 HasCeil = 1,
0860 HasBlend = 1
0861 };
0862 };
0863
0864 template <>
0865 struct unpacket_traits<Packet2d> {
0866 typedef double type;
0867 enum { size = 2, alignment = Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false };
0868 typedef Packet2d half;
0869 };
0870
0871 template <>
0872 EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
0873 EIGEN_MSA_DEBUG;
0874
0875 Packet2d value = { from, from };
0876 return value;
0877 }
0878
0879 template <>
0880 EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) {
0881 EIGEN_MSA_DEBUG;
0882
0883 return __builtin_msa_fadd_d(a, b);
0884 }
0885
0886 template <>
0887 EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) {
0888 EIGEN_MSA_DEBUG;
0889
0890 static const Packet2d countdown = { 0.0, 1.0 };
0891 return padd(pset1<Packet2d>(a), countdown);
0892 }
0893
0894 template <>
0895 EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) {
0896 EIGEN_MSA_DEBUG;
0897
0898 return __builtin_msa_fsub_d(a, b);
0899 }
0900
0901 template <>
0902 EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) {
0903 EIGEN_MSA_DEBUG;
0904
0905 return (Packet2d)__builtin_msa_bnegi_d((v2u64)a, 63);
0906 }
0907
0908 template <>
0909 EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) {
0910 EIGEN_MSA_DEBUG;
0911
0912 return a;
0913 }
0914
0915 template <>
0916 EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) {
0917 EIGEN_MSA_DEBUG;
0918
0919 return __builtin_msa_fmul_d(a, b);
0920 }
0921
0922 template <>
0923 EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) {
0924 EIGEN_MSA_DEBUG;
0925
0926 return __builtin_msa_fdiv_d(a, b);
0927 }
0928
0929 template <>
0930 EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) {
0931 EIGEN_MSA_DEBUG;
0932
0933 return __builtin_msa_fmadd_d(c, a, b);
0934 }
0935
0936
0937
0938 template <>
0939 EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) {
0940 EIGEN_MSA_DEBUG;
0941
0942 return (Packet2d)__builtin_msa_and_v((v16u8)a, (v16u8)b);
0943 }
0944
0945 template <>
0946 EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) {
0947 EIGEN_MSA_DEBUG;
0948
0949 return (Packet2d)__builtin_msa_or_v((v16u8)a, (v16u8)b);
0950 }
0951
0952 template <>
0953 EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) {
0954 EIGEN_MSA_DEBUG;
0955
0956 return (Packet2d)__builtin_msa_xor_v((v16u8)a, (v16u8)b);
0957 }
0958
0959 template <>
0960 EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) {
0961 EIGEN_MSA_DEBUG;
0962
0963 return pand(a, (Packet2d)__builtin_msa_xori_b((v16u8)b, 255));
0964 }
0965
0966 template <>
0967 EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from) {
0968 EIGEN_MSA_DEBUG;
0969
0970 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
0971 }
0972
0973 template <>
0974 EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) {
0975 EIGEN_MSA_DEBUG;
0976
0977 #if EIGEN_FAST_MATH
0978
0979 return __builtin_msa_fmin_d(a, b);
0980 #else
0981
0982 v2i64 aNaN = __builtin_msa_fcun_d(a, a);
0983 v2i64 aMinOrNaN = por(__builtin_msa_fclt_d(a, b), aNaN);
0984 return (Packet2d)__builtin_msa_bsel_v((v16u8)aMinOrNaN, (v16u8)b, (v16u8)a);
0985 #endif
0986 }
0987
0988 template <>
0989 EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) {
0990 EIGEN_MSA_DEBUG;
0991
0992 #if EIGEN_FAST_MATH
0993
0994 return __builtin_msa_fmax_d(a, b);
0995 #else
0996
0997 v2i64 aNaN = __builtin_msa_fcun_d(a, a);
0998 v2i64 aMaxOrNaN = por(__builtin_msa_fclt_d(b, a), aNaN);
0999 return (Packet2d)__builtin_msa_bsel_v((v16u8)aMaxOrNaN, (v16u8)b, (v16u8)a);
1000 #endif
1001 }
1002
1003 template <>
1004 EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) {
1005 EIGEN_MSA_DEBUG;
1006
1007 EIGEN_DEBUG_UNALIGNED_LOAD return (Packet2d)__builtin_msa_ld_d(const_cast<double*>(from), 0);
1008 }
1009
1010 template <>
1011 EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from) {
1012 EIGEN_MSA_DEBUG;
1013
1014 Packet2d value = { *from, *from };
1015 return value;
1016 }
1017
1018 template <>
1019 EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from) {
1020 EIGEN_MSA_DEBUG;
1021
1022 EIGEN_DEBUG_ALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1023 }
1024
1025 template <>
1026 EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) {
1027 EIGEN_MSA_DEBUG;
1028
1029 EIGEN_DEBUG_UNALIGNED_STORE __builtin_msa_st_d((v2i64)from, to, 0);
1030 }
1031
1032 template <>
1033 EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride) {
1034 EIGEN_MSA_DEBUG;
1035
1036 Packet2d value;
1037 value[0] = *from;
1038 from += stride;
1039 value[1] = *from;
1040 return value;
1041 }
1042
1043 template <>
1044 EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from,
1045 Index stride) {
1046 EIGEN_MSA_DEBUG;
1047
1048 *to = from[0];
1049 to += stride;
1050 *to = from[1];
1051 }
1052
1053 template <>
1054 EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) {
1055 EIGEN_MSA_DEBUG;
1056
1057 __builtin_prefetch(addr);
1058 }
1059
1060 template <>
1061 EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) {
1062 EIGEN_MSA_DEBUG;
1063
1064 return a[0];
1065 }
1066
1067 template <>
1068 EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a) {
1069 EIGEN_MSA_DEBUG;
1070
1071 return (Packet2d)__builtin_msa_shf_w((v4i32)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1072 }
1073
1074 template <>
1075 EIGEN_STRONG_INLINE Packet2d pabs(const Packet2d& a) {
1076 EIGEN_MSA_DEBUG;
1077
1078 return (Packet2d)__builtin_msa_bclri_d((v2u64)a, 63);
1079 }
1080
1081 template <>
1082 EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a) {
1083 EIGEN_MSA_DEBUG;
1084
1085 Packet2d s = padd(a, preverse(a));
1086 return s[0];
1087 }
1088
1089
1090
1091 template <>
1092 EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a) {
1093 EIGEN_MSA_DEBUG;
1094
1095 Packet2d p = pmul(a, preverse(a));
1096 return p[0];
1097 }
1098
1099
1100 template <>
1101 EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a) {
1102 EIGEN_MSA_DEBUG;
1103
1104 #if EIGEN_FAST_MATH
1105 Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1106 Packet2d v = __builtin_msa_fmin_d(a, swapped);
1107 return v[0];
1108 #else
1109 double a0 = a[0], a1 = a[1];
1110 return ((numext::isnan)(a0) || a0 < a1) ? a0 : a1;
1111 #endif
1112 }
1113
1114
1115 template <>
1116 EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a) {
1117 EIGEN_MSA_DEBUG;
1118
1119 #if EIGEN_FAST_MATH
1120 Packet2d swapped = (Packet2d)__builtin_msa_shf_w((Packet4i)a, EIGEN_MSA_SHF_I8(2, 3, 0, 1));
1121 Packet2d v = __builtin_msa_fmax_d(a, swapped);
1122 return v[0];
1123 #else
1124 double a0 = a[0], a1 = a[1];
1125 return ((numext::isnan)(a0) || a0 > a1) ? a0 : a1;
1126 #endif
1127 }
1128
1129 template <>
1130 EIGEN_STRONG_INLINE Packet2d psqrt(const Packet2d& a) {
1131 EIGEN_MSA_DEBUG;
1132
1133 return __builtin_msa_fsqrt_d(a);
1134 }
1135
1136 template <>
1137 EIGEN_STRONG_INLINE Packet2d prsqrt(const Packet2d& a) {
1138 EIGEN_MSA_DEBUG;
1139
1140 #if EIGEN_FAST_MATH
1141 return __builtin_msa_frsqrt_d(a);
1142 #else
1143 Packet2d ones = __builtin_msa_ffint_s_d(__builtin_msa_ldi_d(1));
1144 return pdiv(ones, psqrt(a));
1145 #endif
1146 }
1147
1148 inline std::ostream& operator<<(std::ostream& os, const PacketBlock<Packet2d, 2>& value) {
1149 os << "[ " << value.packet[0] << "," << std::endl << " " << value.packet[1] << " ]";
1150 return os;
1151 }
1152
1153 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2d, 2>& kernel) {
1154 EIGEN_MSA_DEBUG;
1155
1156 Packet2d trn1 = (Packet2d)__builtin_msa_ilvev_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1157 Packet2d trn2 = (Packet2d)__builtin_msa_ilvod_d((v2i64)kernel.packet[1], (v2i64)kernel.packet[0]);
1158 kernel.packet[0] = trn1;
1159 kernel.packet[1] = trn2;
1160 }
1161
1162 template <>
1163 EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) {
1164 Packet2d v = a;
1165 int32_t old_mode, new_mode;
1166 asm volatile(
1167 "cfcmsa %[old_mode], $1\n"
1168 "ori %[new_mode], %[old_mode], 3\n"
1169 "ctcmsa $1, %[new_mode]\n"
1170 "frint.d %w[v], %w[v]\n"
1171 "ctcmsa $1, %[old_mode]\n"
1172 :
1173 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1174 [v] "+f"(v)
1175 :
1176 :
1177 );
1178 return v;
1179 }
1180
1181 template <>
1182 EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) {
1183 Packet2d v = a;
1184 int32_t old_mode, new_mode;
1185 asm volatile(
1186 "cfcmsa %[old_mode], $1\n"
1187 "ori %[new_mode], %[old_mode], 3\n"
1188 "xori %[new_mode], %[new_mode], 1\n"
1189 "ctcmsa $1, %[new_mode]\n"
1190 "frint.d %w[v], %w[v]\n"
1191 "ctcmsa $1, %[old_mode]\n"
1192 :
1193 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1194 [v] "+f"(v)
1195 :
1196 :
1197 );
1198 return v;
1199 }
1200
1201 template <>
1202 EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) {
1203 Packet2d v = a;
1204 int32_t old_mode, new_mode;
1205 asm volatile(
1206 "cfcmsa %[old_mode], $1\n"
1207 "ori %[new_mode], %[old_mode], 3\n"
1208 "xori %[new_mode], %[new_mode], 3\n"
1209 "ctcmsa $1, %[new_mode]\n"
1210 "frint.d %w[v], %w[v]\n"
1211 "ctcmsa $1, %[old_mode]\n"
1212 :
1213 [old_mode] "=r"(old_mode), [new_mode] "=r"(new_mode),
1214 [v] "+f"(v)
1215 :
1216 :
1217 );
1218 return v;
1219 }
1220
1221 template <>
1222 EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket,
1223 const Packet2d& elsePacket) {
1224 Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
1225 Packet2l mask = __builtin_msa_ceqi_d((Packet2l)select, 0);
1226 return (Packet2d)__builtin_msa_bsel_v((v16u8)mask, (v16u8)thenPacket, (v16u8)elsePacket);
1227 }
1228
1229 }
1230
1231 }
1232
1233 #endif