Warning, file /include/Rivet/Math/eigen3/src/Core/arch/MSA/MathFunctions.h was not indexed
or was modified since last indexation (in which case cross-reference links may be missing, inaccurate or erroneous).
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 #ifndef EIGEN_MATH_FUNCTIONS_MSA_H
0027 #define EIGEN_MATH_FUNCTIONS_MSA_H
0028
0029 namespace RivetEigen {
0030
0031 namespace internal {
0032
0033 template <>
0034 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
0035 plog<Packet4f>(const Packet4f& _x) {
0036 static _EIGEN_DECLARE_CONST_Packet4f(cephes_SQRTHF, 0.707106781186547524f);
0037 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p0, 7.0376836292e-2f);
0038 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p1, -1.1514610310e-1f);
0039 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p2, 1.1676998740e-1f);
0040 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p3, -1.2420140846e-1f);
0041 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p4, +1.4249322787e-1f);
0042 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p5, -1.6668057665e-1f);
0043 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p6, +2.0000714765e-1f);
0044 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p7, -2.4999993993e-1f);
0045 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_p8, +3.3333331174e-1f);
0046 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q1, -2.12194440e-4f);
0047 static _EIGEN_DECLARE_CONST_Packet4f(cephes_log_q2, 0.693359375f);
0048 static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
0049 static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
0050
0051
0052 Packet4f zero = (Packet4f)__builtin_msa_ldi_w(0);
0053 Packet4i neg_mask = __builtin_msa_fclt_w(_x, zero);
0054 Packet4i zero_mask = __builtin_msa_fceq_w(_x, zero);
0055 Packet4f non_neg_x_or_nan = padd(_x, (Packet4f)neg_mask);
0056 Packet4f x = non_neg_x_or_nan;
0057
0058
0059
0060 Packet4i e_int = __builtin_msa_ftint_s_w(__builtin_msa_flog2_w(x));
0061
0062 x = __builtin_msa_fexp2_w(x, (Packet4i)__builtin_msa_nori_b((v16u8)e_int, 0));
0063
0064
0065
0066
0067
0068
0069
0070
0071
0072 Packet4f xx = padd(x, x);
0073 Packet4i ge_mask = __builtin_msa_fcle_w(p4f_cephes_SQRTHF, x);
0074 e_int = psub(e_int, ge_mask);
0075 x = (Packet4f)__builtin_msa_bsel_v((v16u8)ge_mask, (v16u8)xx, (v16u8)x);
0076 x = psub(x, p4f_1);
0077 Packet4f e = __builtin_msa_ffint_s_w(e_int);
0078
0079 Packet4f x2 = pmul(x, x);
0080 Packet4f x3 = pmul(x2, x);
0081
0082 Packet4f y, y1, y2;
0083 y = pmadd(p4f_cephes_log_p0, x, p4f_cephes_log_p1);
0084 y1 = pmadd(p4f_cephes_log_p3, x, p4f_cephes_log_p4);
0085 y2 = pmadd(p4f_cephes_log_p6, x, p4f_cephes_log_p7);
0086 y = pmadd(y, x, p4f_cephes_log_p2);
0087 y1 = pmadd(y1, x, p4f_cephes_log_p5);
0088 y2 = pmadd(y2, x, p4f_cephes_log_p8);
0089 y = pmadd(y, x3, y1);
0090 y = pmadd(y, x3, y2);
0091 y = pmul(y, x3);
0092
0093 y = pmadd(e, p4f_cephes_log_q1, y);
0094 x = __builtin_msa_fmsub_w(x, x2, p4f_half);
0095 x = padd(x, y);
0096 x = pmadd(e, p4f_cephes_log_q2, x);
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112 x = __builtin_msa_fmin_w(x, non_neg_x_or_nan);
0113
0114
0115 Packet4i neg_infs = __builtin_msa_slli_w(zero_mask, 23);
0116 x = (Packet4f)__builtin_msa_bsel_v((v16u8)zero_mask, (v16u8)x, (v16u8)neg_infs);
0117
0118 return x;
0119 }
0120
0121 template <>
0122 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
0123 pexp<Packet4f>(const Packet4f& _x) {
0124
0125
0126 static _EIGEN_DECLARE_CONST_Packet4f(exp_lo, -128.0f);
0127 static _EIGEN_DECLARE_CONST_Packet4f(exp_hi, +128.0f);
0128 static _EIGEN_DECLARE_CONST_Packet4f(cephes_LOG2EF, 1.44269504088896341f);
0129 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C1, 0.693359375f);
0130 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_C2, -2.12194440e-4f);
0131 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p0, 1.9875691500e-4f);
0132 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p1, 1.3981999507e-3f);
0133 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p2, 8.3334519073e-3f);
0134 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p3, 4.1665795894e-2f);
0135 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459e-1f);
0136 static _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201e-1f);
0137 static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
0138 static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
0139
0140 Packet4f x = _x;
0141
0142
0143 x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(x, p4f_exp_lo), (v16u8)x,
0144 (v16u8)p4f_exp_lo);
0145 x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_exp_hi, x), (v16u8)x,
0146 (v16u8)p4f_exp_hi);
0147
0148
0149 Packet4f x2_add = (Packet4f)__builtin_msa_binsli_w((v4u32)p4f_half, (v4u32)x, 0);
0150 Packet4f x2 = pmadd(x, p4f_cephes_LOG2EF, x2_add);
0151 Packet4i x2_int = __builtin_msa_ftrunc_s_w(x2);
0152 Packet4f x2_int_f = __builtin_msa_ffint_s_w(x2_int);
0153
0154 x = __builtin_msa_fmsub_w(x, x2_int_f, p4f_cephes_exp_C1);
0155 x = __builtin_msa_fmsub_w(x, x2_int_f, p4f_cephes_exp_C2);
0156
0157 Packet4f z = pmul(x, x);
0158
0159 Packet4f y = p4f_cephes_exp_p0;
0160 y = pmadd(y, x, p4f_cephes_exp_p1);
0161 y = pmadd(y, x, p4f_cephes_exp_p2);
0162 y = pmadd(y, x, p4f_cephes_exp_p3);
0163 y = pmadd(y, x, p4f_cephes_exp_p4);
0164 y = pmadd(y, x, p4f_cephes_exp_p5);
0165 y = pmadd(y, z, x);
0166 y = padd(y, p4f_1);
0167
0168
0169 y = __builtin_msa_fexp2_w(y, x2_int);
0170
0171 return y;
0172 }
0173
0174 template <>
0175 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
0176 ptanh<Packet4f>(const Packet4f& _x) {
0177 static _EIGEN_DECLARE_CONST_Packet4f(tanh_tiny, 1e-4f);
0178 static _EIGEN_DECLARE_CONST_Packet4f(tanh_hi, 9.0f);
0179
0180 static _EIGEN_DECLARE_CONST_Packet4f(alpha_1, 4.89352455891786e-3f);
0181 static _EIGEN_DECLARE_CONST_Packet4f(alpha_3, 6.37261928875436e-4f);
0182 static _EIGEN_DECLARE_CONST_Packet4f(alpha_5, 1.48572235717979e-5f);
0183 static _EIGEN_DECLARE_CONST_Packet4f(alpha_7, 5.12229709037114e-8f);
0184 static _EIGEN_DECLARE_CONST_Packet4f(alpha_9, -8.60467152213735e-11f);
0185 static _EIGEN_DECLARE_CONST_Packet4f(alpha_11, 2.00018790482477e-13f);
0186 static _EIGEN_DECLARE_CONST_Packet4f(alpha_13, -2.76076847742355e-16f);
0187
0188 static _EIGEN_DECLARE_CONST_Packet4f(beta_0, 4.89352518554385e-3f);
0189 static _EIGEN_DECLARE_CONST_Packet4f(beta_2, 2.26843463243900e-3f);
0190 static _EIGEN_DECLARE_CONST_Packet4f(beta_4, 1.18534705686654e-4f);
0191 static _EIGEN_DECLARE_CONST_Packet4f(beta_6, 1.19825839466702e-6f);
0192
0193 Packet4f x = pabs(_x);
0194 Packet4i tiny_mask = __builtin_msa_fclt_w(x, p4f_tanh_tiny);
0195
0196
0197
0198 x = (Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_tanh_hi, x), (v16u8)x,
0199 (v16u8)p4f_tanh_hi);
0200
0201
0202 Packet4f x2 = pmul(x, x);
0203
0204
0205 Packet4f p = pmadd(x2, p4f_alpha_13, p4f_alpha_11);
0206 p = pmadd(x2, p, p4f_alpha_9);
0207 p = pmadd(x2, p, p4f_alpha_7);
0208 p = pmadd(x2, p, p4f_alpha_5);
0209 p = pmadd(x2, p, p4f_alpha_3);
0210 p = pmadd(x2, p, p4f_alpha_1);
0211 p = pmul(x, p);
0212
0213
0214 Packet4f q = pmadd(x2, p4f_beta_6, p4f_beta_4);
0215 q = pmadd(x2, q, p4f_beta_2);
0216 q = pmadd(x2, q, p4f_beta_0);
0217
0218
0219 p = pdiv(p, q);
0220
0221
0222 p = (Packet4f)__builtin_msa_binsli_w((v4u32)p, (v4u32)_x, 0);
0223
0224
0225 p = (Packet4f)__builtin_msa_bsel_v((v16u8)tiny_mask, (v16u8)p, (v16u8)_x);
0226
0227 return p;
0228 }
0229
0230 template <bool sine>
0231 Packet4f psincos_inner_msa_float(const Packet4f& _x) {
0232 static _EIGEN_DECLARE_CONST_Packet4f(sincos_max_arg, 13176795.0f);
0233 static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP1, -0.78515625f);
0234 static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP2, -2.4187564849853515625e-4f);
0235 static _EIGEN_DECLARE_CONST_Packet4f(minus_cephes_DP3, -3.77489497744594108e-8f);
0236 static _EIGEN_DECLARE_CONST_Packet4f(sincof_p0, -1.9515295891e-4f);
0237 static _EIGEN_DECLARE_CONST_Packet4f(sincof_p1, 8.3321608736e-3f);
0238 static _EIGEN_DECLARE_CONST_Packet4f(sincof_p2, -1.6666654611e-1f);
0239 static _EIGEN_DECLARE_CONST_Packet4f(coscof_p0, 2.443315711809948e-5f);
0240 static _EIGEN_DECLARE_CONST_Packet4f(coscof_p1, -1.388731625493765e-3f);
0241 static _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827e-2f);
0242 static _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f);
0243 static _EIGEN_DECLARE_CONST_Packet4f(half, 0.5f);
0244 static _EIGEN_DECLARE_CONST_Packet4f(1, 1.0f);
0245
0246 Packet4f x = pabs(_x);
0247
0248
0249 Packet4f zero_or_nan_if_inf = psub(_x, _x);
0250 x = padd(x, zero_or_nan_if_inf);
0251
0252
0253 Packet4i small_or_nan_mask = __builtin_msa_fcult_w(x, p4f_sincos_max_arg);
0254 x = pand(x, (Packet4f)small_or_nan_mask);
0255
0256
0257 Packet4f y = pmul(x, p4f_cephes_FOPI);
0258
0259 Packet4i y_int = __builtin_msa_ftrunc_s_w(y);
0260
0261
0262
0263 Packet4i y_int1 = __builtin_msa_addvi_w(y_int, 1);
0264 Packet4i y_int2 = (Packet4i)__builtin_msa_bclri_w((Packet4ui)y_int1, 0);
0265 y = __builtin_msa_ffint_s_w(y_int2);
0266
0267
0268 Packet4i sign_mask = sine ? pxor(__builtin_msa_slli_w(y_int1, 29), (Packet4i)_x)
0269 : __builtin_msa_slli_w(__builtin_msa_addvi_w(y_int, 3), 29);
0270
0271
0272
0273 Packet4i poly_mask = __builtin_msa_ceqi_w(__builtin_msa_slli_w(y_int2, 30), 0);
0274
0275
0276
0277
0278 Packet4f tmp1 = pmul(y, p4f_minus_cephes_DP1);
0279 Packet4f tmp2 = pmul(y, p4f_minus_cephes_DP2);
0280 Packet4f tmp3 = pmul(y, p4f_minus_cephes_DP3);
0281 x = padd(x, tmp1);
0282 x = padd(x, tmp2);
0283 x = padd(x, tmp3);
0284
0285
0286 y = p4f_coscof_p0;
0287 Packet4f z = pmul(x, x);
0288 y = pmadd(y, z, p4f_coscof_p1);
0289 y = pmadd(y, z, p4f_coscof_p2);
0290 y = pmul(y, z);
0291 y = pmul(y, z);
0292 y = __builtin_msa_fmsub_w(y, z, p4f_half);
0293 y = padd(y, p4f_1);
0294
0295
0296 Packet4f y2 = p4f_sincof_p0;
0297 y2 = pmadd(y2, z, p4f_sincof_p1);
0298 y2 = pmadd(y2, z, p4f_sincof_p2);
0299 y2 = pmul(y2, z);
0300 y2 = pmadd(y2, x, x);
0301
0302
0303 y = sine ? (Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)y, (v16u8)y2)
0304 : (Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)y2, (v16u8)y);
0305
0306
0307 sign_mask = pxor(sign_mask, (Packet4i)y);
0308 y = (Packet4f)__builtin_msa_binsli_w((v4u32)y, (v4u32)sign_mask, 0);
0309 return y;
0310 }
0311
0312 template <>
0313 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
0314 psin<Packet4f>(const Packet4f& x) {
0315 return psincos_inner_msa_float< true>(x);
0316 }
0317
0318 template <>
0319 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
0320 pcos<Packet4f>(const Packet4f& x) {
0321 return psincos_inner_msa_float< false>(x);
0322 }
0323
0324 template <>
0325 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d
0326 pexp<Packet2d>(const Packet2d& _x) {
0327
0328
0329 static _EIGEN_DECLARE_CONST_Packet2d(exp_lo, -1024.0);
0330 static _EIGEN_DECLARE_CONST_Packet2d(exp_hi, +1024.0);
0331 static _EIGEN_DECLARE_CONST_Packet2d(cephes_LOG2EF, 1.4426950408889634073599);
0332 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C1, 0.693145751953125);
0333 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
0334 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p0, 1.26177193074810590878e-4);
0335 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p1, 3.02994407707441961300e-2);
0336 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_p2, 9.99999999999999999910e-1);
0337 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q0, 3.00198505138664455042e-6);
0338 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q1, 2.52448340349684104192e-3);
0339 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q2, 2.27265548208155028766e-1);
0340 static _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_q3, 2.00000000000000000009e0);
0341 static _EIGEN_DECLARE_CONST_Packet2d(half, 0.5);
0342 static _EIGEN_DECLARE_CONST_Packet2d(1, 1.0);
0343 static _EIGEN_DECLARE_CONST_Packet2d(2, 2.0);
0344
0345 Packet2d x = _x;
0346
0347
0348 x = (Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(x, p2d_exp_lo), (v16u8)x,
0349 (v16u8)p2d_exp_lo);
0350 x = (Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(p2d_exp_hi, x), (v16u8)x,
0351 (v16u8)p2d_exp_hi);
0352
0353
0354 Packet2d x2_add = (Packet2d)__builtin_msa_binsli_d((v2u64)p2d_half, (v2u64)x, 0);
0355 Packet2d x2 = pmadd(x, p2d_cephes_LOG2EF, x2_add);
0356 Packet2l x2_long = __builtin_msa_ftrunc_s_d(x2);
0357 Packet2d x2_long_d = __builtin_msa_ffint_s_d(x2_long);
0358
0359 x = __builtin_msa_fmsub_d(x, x2_long_d, p2d_cephes_exp_C1);
0360 x = __builtin_msa_fmsub_d(x, x2_long_d, p2d_cephes_exp_C2);
0361
0362 x2 = pmul(x, x);
0363
0364 Packet2d px = p2d_cephes_exp_p0;
0365 px = pmadd(px, x2, p2d_cephes_exp_p1);
0366 px = pmadd(px, x2, p2d_cephes_exp_p2);
0367 px = pmul(px, x);
0368
0369 Packet2d qx = p2d_cephes_exp_q0;
0370 qx = pmadd(qx, x2, p2d_cephes_exp_q1);
0371 qx = pmadd(qx, x2, p2d_cephes_exp_q2);
0372 qx = pmadd(qx, x2, p2d_cephes_exp_q3);
0373
0374 x = pdiv(px, psub(qx, px));
0375 x = pmadd(p2d_2, x, p2d_1);
0376
0377
0378 x = __builtin_msa_fexp2_d(x, x2_long);
0379
0380 return x;
0381 }
0382
0383 }
0384
0385 }
0386
0387 #endif