File indexing completed on 2025-01-19 09:51:36
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #ifndef EIGEN_MATH_FUNCTIONS_AVX_H
0011 #define EIGEN_MATH_FUNCTIONS_AVX_H
0012
0013
0014
0015
0016
0017 namespace Eigen {
0018
0019 namespace internal {
0020
0021 template <>
0022 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
0023 psin<Packet8f>(const Packet8f& _x) {
0024 return psin_float(_x);
0025 }
0026
0027 template <>
0028 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
0029 pcos<Packet8f>(const Packet8f& _x) {
0030 return pcos_float(_x);
0031 }
0032
0033 template <>
0034 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
0035 plog<Packet8f>(const Packet8f& _x) {
0036 return plog_float(_x);
0037 }
0038
0039 template <>
0040 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
0041 plog<Packet4d>(const Packet4d& _x) {
0042 return plog_double(_x);
0043 }
0044
0045 template <>
0046 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
0047 plog2<Packet8f>(const Packet8f& _x) {
0048 return plog2_float(_x);
0049 }
0050
0051 template <>
0052 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
0053 plog2<Packet4d>(const Packet4d& _x) {
0054 return plog2_double(_x);
0055 }
0056
0057 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0058 Packet8f plog1p<Packet8f>(const Packet8f& _x) {
0059 return generic_plog1p(_x);
0060 }
0061
0062 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0063 Packet8f pexpm1<Packet8f>(const Packet8f& _x) {
0064 return generic_expm1(_x);
0065 }
0066
0067
0068
0069
0070 template <>
0071 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
0072 pexp<Packet8f>(const Packet8f& _x) {
0073 return pexp_float(_x);
0074 }
0075
0076
0077 template <>
0078 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet8f
0079 ptanh<Packet8f>(const Packet8f& _x) {
0080 return internal::generic_fast_tanh_float(_x);
0081 }
0082
0083
0084 template <>
0085 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet4d
0086 pexp<Packet4d>(const Packet4d& _x) {
0087 return pexp_double(_x);
0088 }
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098 #if EIGEN_FAST_MATH
0099 template <>
0100 EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0101 Packet8f psqrt<Packet8f>(const Packet8f& _x) {
0102 Packet8f minus_half_x = pmul(_x, pset1<Packet8f>(-0.5f));
0103 Packet8f denormal_mask = pandnot(
0104 pcmp_lt(_x, pset1<Packet8f>((std::numeric_limits<float>::min)())),
0105 pcmp_lt(_x, pzero(_x)));
0106
0107
0108 Packet8f x = _mm256_rsqrt_ps(_x);
0109
0110 x = pmul(x, pmadd(minus_half_x, pmul(x,x), pset1<Packet8f>(1.5f)));
0111
0112 return pandnot(pmul(_x,x), denormal_mask);
0113 }
0114
0115 #else
0116
0117 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0118 Packet8f psqrt<Packet8f>(const Packet8f& _x) {
0119 return _mm256_sqrt_ps(_x);
0120 }
0121
0122 #endif
0123
0124 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0125 Packet4d psqrt<Packet4d>(const Packet4d& _x) {
0126 return _mm256_sqrt_pd(_x);
0127 }
0128
0129 #if EIGEN_FAST_MATH
0130 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0131 Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
0132 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
0133 _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
0134 _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
0135 _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(flt_min, 0x00800000);
0136
0137 Packet8f neg_half = pmul(_x, p8f_minus_half);
0138
0139
0140
0141 Packet8f lt_min_mask = _mm256_cmp_ps(_x, p8f_flt_min, _CMP_LT_OQ);
0142 Packet8f inf_mask = _mm256_cmp_ps(_x, p8f_inf, _CMP_EQ_OQ);
0143 Packet8f not_normal_finite_mask = _mm256_or_ps(lt_min_mask, inf_mask);
0144
0145
0146 Packet8f y_approx = _mm256_rsqrt_ps(_x);
0147
0148
0149
0150
0151
0152 Packet8f y_newton = pmul(y_approx, pmadd(y_approx, pmul(neg_half, y_approx), p8f_one_point_five));
0153
0154
0155
0156
0157
0158
0159 return pselect<Packet8f>(not_normal_finite_mask, y_approx, y_newton);
0160 }
0161
0162 #else
0163 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0164 Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
0165 _EIGEN_DECLARE_CONST_Packet8f(one, 1.0f);
0166 return _mm256_div_ps(p8f_one, _mm256_sqrt_ps(_x));
0167 }
0168 #endif
0169
0170 template <> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
0171 Packet4d prsqrt<Packet4d>(const Packet4d& _x) {
0172 _EIGEN_DECLARE_CONST_Packet4d(one, 1.0);
0173 return _mm256_div_pd(p4d_one, _mm256_sqrt_pd(_x));
0174 }
0175
0176 F16_PACKET_FUNCTION(Packet8f, Packet8h, psin)
0177 F16_PACKET_FUNCTION(Packet8f, Packet8h, pcos)
0178 F16_PACKET_FUNCTION(Packet8f, Packet8h, plog)
0179 F16_PACKET_FUNCTION(Packet8f, Packet8h, plog2)
0180 F16_PACKET_FUNCTION(Packet8f, Packet8h, plog1p)
0181 F16_PACKET_FUNCTION(Packet8f, Packet8h, pexpm1)
0182 F16_PACKET_FUNCTION(Packet8f, Packet8h, pexp)
0183 F16_PACKET_FUNCTION(Packet8f, Packet8h, ptanh)
0184 F16_PACKET_FUNCTION(Packet8f, Packet8h, psqrt)
0185 F16_PACKET_FUNCTION(Packet8f, Packet8h, prsqrt)
0186
0187 template <>
0188 EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
0189 Packet8f fexponent;
0190 const Packet8h out = float2half(pfrexp<Packet8f>(half2float(a), fexponent));
0191 exponent = float2half(fexponent);
0192 return out;
0193 }
0194
0195 template <>
0196 EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent) {
0197 return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));
0198 }
0199
0200 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psin)
0201 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pcos)
0202 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog)
0203 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog2)
0204 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, plog1p)
0205 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexpm1)
0206 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, pexp)
0207 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, ptanh)
0208 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, psqrt)
0209 BF16_PACKET_FUNCTION(Packet8f, Packet8bf, prsqrt)
0210
0211 template <>
0212 EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
0213 Packet8f fexponent;
0214 const Packet8bf out = F32ToBf16(pfrexp<Packet8f>(Bf16ToF32(a), fexponent));
0215 exponent = F32ToBf16(fexponent);
0216 return out;
0217 }
0218
0219 template <>
0220 EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& exponent) {
0221 return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));
0222 }
0223
0224 }
0225
0226 }
0227
0228 #endif