File indexing completed on 2025-01-19 09:51:45
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015 #ifndef EIGEN_MATH_FUNCTIONS_SSE_H
0016 #define EIGEN_MATH_FUNCTIONS_SSE_H
0017
0018 namespace Eigen {
0019
0020 namespace internal {
0021
0022 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0023 Packet4f plog<Packet4f>(const Packet4f& _x) {
0024 return plog_float(_x);
0025 }
0026
0027 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0028 Packet2d plog<Packet2d>(const Packet2d& _x) {
0029 return plog_double(_x);
0030 }
0031
0032 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0033 Packet4f plog2<Packet4f>(const Packet4f& _x) {
0034 return plog2_float(_x);
0035 }
0036
0037 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0038 Packet2d plog2<Packet2d>(const Packet2d& _x) {
0039 return plog2_double(_x);
0040 }
0041
0042 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0043 Packet4f plog1p<Packet4f>(const Packet4f& _x) {
0044 return generic_plog1p(_x);
0045 }
0046
0047 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0048 Packet4f pexpm1<Packet4f>(const Packet4f& _x) {
0049 return generic_expm1(_x);
0050 }
0051
0052 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0053 Packet4f pexp<Packet4f>(const Packet4f& _x)
0054 {
0055 return pexp_float(_x);
0056 }
0057
0058 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0059 Packet2d pexp<Packet2d>(const Packet2d& x)
0060 {
0061 return pexp_double(x);
0062 }
0063
0064 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0065 Packet4f psin<Packet4f>(const Packet4f& _x)
0066 {
0067 return psin_float(_x);
0068 }
0069
0070 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0071 Packet4f pcos<Packet4f>(const Packet4f& _x)
0072 {
0073 return pcos_float(_x);
0074 }
0075
0076 #if EIGEN_FAST_MATH
0077
0078
0079
0080
0081
0082
0083
0084
0085
0086 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0087 Packet4f psqrt<Packet4f>(const Packet4f& _x)
0088 {
0089 Packet4f minus_half_x = pmul(_x, pset1<Packet4f>(-0.5f));
0090 Packet4f denormal_mask = pandnot(
0091 pcmp_lt(_x, pset1<Packet4f>((std::numeric_limits<float>::min)())),
0092 pcmp_lt(_x, pzero(_x)));
0093
0094
0095 Packet4f x = _mm_rsqrt_ps(_x);
0096
0097 x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet4f>(1.5f)));
0098
0099 return pandnot(pmul(_x,x), denormal_mask);
0100 }
0101
0102 #else
0103
0104 template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0105 Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }
0106
0107 #endif
0108
0109 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0110 Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }
0111
0112 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0113 Packet16b psqrt<Packet16b>(const Packet16b& x) { return x; }
0114
0115 #if EIGEN_FAST_MATH
0116
0117 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0118 Packet4f prsqrt<Packet4f>(const Packet4f& _x) {
0119 _EIGEN_DECLARE_CONST_Packet4f(one_point_five, 1.5f);
0120 _EIGEN_DECLARE_CONST_Packet4f(minus_half, -0.5f);
0121 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(inf, 0x7f800000u);
0122 _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(flt_min, 0x00800000u);
0123
0124 Packet4f neg_half = pmul(_x, p4f_minus_half);
0125
0126
0127 Packet4f lt_min_mask = _mm_cmplt_ps(_x, p4f_flt_min);
0128 Packet4f inf_mask = _mm_cmpeq_ps(_x, p4f_inf);
0129 Packet4f not_normal_finite_mask = _mm_or_ps(lt_min_mask, inf_mask);
0130
0131
0132 Packet4f y_approx = _mm_rsqrt_ps(_x);
0133
0134
0135
0136
0137
0138 Packet4f y_newton = pmul(
0139 y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p4f_one_point_five));
0140
0141
0142
0143
0144
0145
0146 return pselect<Packet4f>(not_normal_finite_mask, y_approx, y_newton);
0147 }
0148
0149 #else
0150
0151 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0152 Packet4f prsqrt<Packet4f>(const Packet4f& x) {
0153
0154 return _mm_div_ps(pset1<Packet4f>(1.0f), _mm_sqrt_ps(x));
0155 }
0156
0157 #endif
0158
0159 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0160 Packet2d prsqrt<Packet2d>(const Packet2d& x) {
0161 return _mm_div_pd(pset1<Packet2d>(1.0), _mm_sqrt_pd(x));
0162 }
0163
0164
0165 template <>
0166 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4f
0167 ptanh<Packet4f>(const Packet4f& x) {
0168 return internal::generic_fast_tanh_float(x);
0169 }
0170
0171 }
0172
0173 namespace numext {
0174
0175 template<>
0176 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0177 float sqrt(const float &x)
0178 {
0179 return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));
0180 }
0181
0182 template<>
0183 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
0184 double sqrt(const double &x)
0185 {
0186 #if EIGEN_COMP_GNUC_STRICT
0187
0188
0189 return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));
0190 #else
0191 return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));
0192 #endif
0193 }
0194
0195 }
0196
0197 }
0198
0199 #endif