|
|
|||
File indexing completed on 2025-12-16 10:33:07
0001 /* This file is part of the Vc library. {{{ 0002 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org> 0003 0004 Redistribution and use in source and binary forms, with or without 0005 modification, are permitted provided that the following conditions are met: 0006 * Redistributions of source code must retain the above copyright 0007 notice, this list of conditions and the following disclaimer. 0008 * Redistributions in binary form must reproduce the above copyright 0009 notice, this list of conditions and the following disclaimer in the 0010 documentation and/or other materials provided with the distribution. 0011 * Neither the names of contributing organizations nor the 0012 names of its contributors may be used to endorse or promote products 0013 derived from this software without specific prior written permission. 0014 0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY 0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0025 0026 }}}*/ 0027 0028 /* The log implementations are based on code from Julien Pommier which carries the following 0029 copyright information: 0030 */ 0031 /* 0032 Inspired by Intel Approximate Math library, and based on the 0033 corresponding algorithms of the cephes math library 0034 */ 0035 /* Copyright (C) 2007 Julien Pommier 0036 0037 This software is provided 'as-is', without any express or implied 0038 warranty. In no event will the authors be held liable for any damages 0039 arising from the use of this software. 0040 0041 Permission is granted to anyone to use this software for any purpose, 0042 including commercial applications, and to alter it and redistribute it 0043 freely, subject to the following restrictions: 0044 0045 1. The origin of this software must not be misrepresented; you must not 0046 claim that you wrote the original software. If you use this software 0047 in a product, an acknowledgment in the product documentation would be 0048 appreciated but is not required. 0049 2. Altered source versions must be plainly marked as such, and must not be 0050 misrepresented as being the original software. 0051 3. This notice may not be removed or altered from any source distribution. 0052 0053 (this is the zlib license) 0054 */ 0055 0056 #ifdef Vc_COMMON_MATH_H_INTERNAL 0057 0058 enum LogarithmBase { 0059 BaseE, Base10, Base2 0060 }; 0061 0062 namespace Detail 0063 { 0064 template <typename T, typename Abi> 0065 using Const = typename std::conditional<std::is_same<Abi, VectorAbi::Avx>::value, 0066 AVX::Const<T>, SSE::Const<T>>::type; 0067 0068 template<LogarithmBase Base> 0069 struct LogImpl 0070 { 0071 template<typename T, typename Abi> static Vc_ALWAYS_INLINE void log_series(Vector<T, Abi> &Vc_RESTRICT x, typename Vector<T, Abi>::AsArg exponent) { 0072 typedef Vector<T, Abi> V; 0073 typedef Detail::Const<T, Abi> C; 0074 // Taylor series around x = 2^exponent 0075 // f(x) = ln(x) → exponent * ln(2) → C::ln2_small + C::ln2_large 0076 // f'(x) = x⁻¹ → x → 1 0077 // f''(x) = - x⁻² → -x² / 2 → C::_1_2() 0078 // = 2!x⁻³ → x³ / 3 → C::P(8) 0079 // = -3!x⁻⁴ → -x⁴ / 4 → C::P(7) 0080 // = 4!x⁻⁵ → x⁵ / 5 → C::P(6) 0081 // ... 0082 // The high order coefficients are adjusted to reduce the error that occurs from ommission 0083 // of higher order terms. 0084 // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹| 0085 // The order of additions must go from smallest to largest terms 0086 const V x2 = x * x; // 0 → 4 0087 #ifdef Vc_LOG_ILP 0088 V y2 = (C::P(6) * /*4 → 8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8); 0089 V y0 = (C::P(0) * /*5 → 9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2); 0090 V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5); 0091 const V x3 = x2 * x; // 7 → 11 0092 const V x6 = x3 * x3; // 11 → 15 0093 const V x9 = x6 * x3; // 15 → 19 0094 V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3; 0095 #elif defined Vc_LOG_ILP2 0096 /* 0097 * name start done 0098 * movaps %xmm0, %xmm1 ; x 0 1 0099 * movaps %xmm0, %xmm2 ; x 0 1 0100 * mulps %xmm1, %xmm1 ; x2 1 5 *xmm1 0101 * movaps <P8>, %xmm15 ; y8 1 2 0102 * mulps %xmm1, %xmm2 ; x3 5 9 *xmm2 0103 * movaps %xmm1, %xmm3 ; x2 5 6 0104 * movaps %xmm1, %xmm4 ; x2 5 6 0105 * mulps %xmm3, %xmm3 ; x4 6 10 *xmm3 0106 * movaps %xmm2, %xmm5 ; x3 9 10 0107 * movaps %xmm2, %xmm6 ; x3 9 10 0108 * mulps %xmm2, %xmm4 ; x5 9 13 *xmm4 0109 * movaps %xmm3, %xmm7 ; x4 10 11 0110 * movaps %xmm3, %xmm8 ; x4 10 11 0111 * movaps %xmm3, %xmm9 ; x4 10 11 0112 * mulps %xmm5, %xmm5 ; x6 10 14 *xmm5 0113 * mulps %xmm3, %xmm6 ; x7 11 15 *xmm6 0114 * mulps %xmm7, %xmm7 ; x8 12 16 *xmm7 0115 * movaps %xmm4, %xmm10 ; x5 13 14 0116 * mulps %xmm4, %xmm8 ; x9 13 17 *xmm8 0117 * mulps %xmm5, %xmm10 ; x11 14 18 *xmm10 0118 * mulps %xmm5, %xmm9 ; x10 15 19 *xmm9 0119 * mulps <P0>, %xmm10 ; y0 18 22 0120 * mulps <P1>, %xmm9 ; y1 19 23 0121 * mulps <P2>, %xmm8 ; y2 20 24 0122 * mulps <P3>, %xmm7 ; y3 21 25 0123 * addps %xmm10, %xmm9 ; y 23 26 0124 * addps %xmm9, %xmm8 ; y 26 29 0125 * addps %xmm8, %xmm7 ; y 29 32 0126 */ 0127 const V x3 = x2 * x; // 4 → 8 0128 const V x4 = x2 * x2; // 5 → 9 0129 const V x5 = x2 * x3; // 8 → 12 0130 const V x6 = x3 * x3; // 9 → 13 0131 const V x7 = x4 * x3; // 0132 const V x8 = x4 * x4; 0133 const V x9 = x5 * x4; 0134 const V x10 = x5 * x5; 0135 const V x11 = x5 * x6; // 13 → 17 0136 V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7 0137 + C::P(5) * x6 + C::P(6) * x5 + C::P(7) * x4 + C::P(8) * x3; 0138 #else 0139 V y = C::P(0); 0140 Vc::Common::unrolled_loop<int, 1, 9>([&](int i) { y = y * x + C::P(i); }); 0141 y *= x * x2; 0142 #endif 0143 switch (Base) { 0144 case BaseE: 0145 // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2)) 0146 y += exponent * C::ln2_small(); 0147 y -= x2 * C::_1_2(); // [0, 0.25[ 0148 x += y; 0149 x += exponent * C::ln2_large(); 0150 break; 0151 case Base10: 0152 y += exponent * C::ln2_small(); 0153 y -= x2 * C::_1_2(); // [0, 0.25[ 0154 x += y; 0155 x += exponent * C::ln2_large(); 0156 x *= C::log10_e(); 0157 break; 0158 case Base2: 0159 { 0160 const V x_ = x; 0161 x *= C::log2_e(); 0162 y *= C::log2_e(); 0163 y -= x_ * x * C::_1_2(); // [0, 0.25[ 0164 x += y; 0165 x += exponent; 0166 break; 0167 } 0168 } 0169 } 0170 0171 template <typename Abi> 0172 static Vc_ALWAYS_INLINE void log_series(Vector<double, Abi> &Vc_RESTRICT x, 0173 typename Vector<double, Abi>::AsArg exponent) 0174 { 0175 typedef Vector<double, Abi> V; 0176 typedef Detail::Const<double, Abi> C; 0177 const V x2 = x * x; 0178 V y = C::P(0); 0179 V y2 = C::Q(0) + x; 0180 Vc::Common::unrolled_loop<int, 1, 5>([&](int i) { 0181 y = y * x + C::P(i); 0182 y2 = y2 * x + C::Q(i); 0183 }); 0184 y2 = x / y2; 0185 y = y * x + C::P(5); 0186 y = x2 * y * y2; 0187 // TODO: refactor the following with the float implementation: 0188 switch (Base) { 0189 case BaseE: 0190 // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2)) 0191 y += exponent * C::ln2_small(); 0192 y -= x2 * C::_1_2(); // [0, 0.25[ 0193 x += y; 0194 x += exponent * C::ln2_large(); 0195 break; 0196 case Base10: 0197 y += exponent * C::ln2_small(); 0198 y -= x2 * C::_1_2(); // [0, 0.25[ 0199 x += y; 0200 x += exponent * C::ln2_large(); 0201 x *= C::log10_e(); 0202 break; 0203 case Base2: 0204 { 0205 const V x_ = x; 0206 x *= C::log2_e(); 0207 y *= C::log2_e(); 0208 y -= x_ * x * C::_1_2(); // [0, 0.25[ 0209 x += y; 0210 x += exponent; 0211 break; 0212 } 0213 } 0214 } 0215 0216 template <typename T, typename Abi, typename V = Vector<T, Abi>> 0217 static inline Vector<T, Abi> calc(V _x) 0218 { 0219 typedef typename V::Mask M; 0220 typedef Detail::Const<T, Abi> C; 0221 0222 V x(_x); 0223 0224 const M invalidMask = x < V::Zero(); 0225 const M infinityMask = x == V::Zero(); 0226 const M denormal = x <= C::min(); 0227 0228 x(denormal) *= V(Vc::Detail::doubleConstant<1, 0, 54>()); // 2²⁵ 0229 V exponent = Detail::exponent(x.data()); // = ⎣log₂(x)⎦ 0230 exponent(denormal) -= 54; 0231 0232 x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[ 0233 x = Detail::operator|(x, 0234 C::_1_2()); // and set the exponent to 2⁻¹ ⇒ x ∈ [½, 1[ 0235 0236 // split calculation in two cases: 0237 // A: x ∈ [½, √½[ 0238 // B: x ∈ [√½, 1[ 0239 // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e. 0240 // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½ 0241 0242 const M smallX = x < C::_1_sqrt2(); 0243 x(smallX) += x; // => x ∈ [√½, 1[ ∪ [1.5, 1 + √½[ 0244 x -= V::One(); // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[ 0245 exponent(!smallX) += V::One(); 0246 0247 log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e) B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1) 0248 0249 x.setQnan(invalidMask); // x < 0 → NaN 0250 x(infinityMask) = C::neginf(); // x = 0 → -∞ 0251 0252 return x; 0253 } 0254 }; 0255 } // namespace Detail 0256 0257 template <typename T, typename Abi> 0258 Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log( 0259 const Vector<T, Abi> &x) 0260 { 0261 return Detail::LogImpl<BaseE>::calc<T, Abi>(x); 0262 } 0263 template <typename T, typename Abi> 0264 Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log10( 0265 const Vector<T, Abi> &x) 0266 { 0267 return Detail::LogImpl<Base10>::calc<T, Abi>(x); 0268 } 0269 template <typename T, typename Abi> 0270 Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log2( 0271 const Vector<T, Abi> &x) 0272 { 0273 return Detail::LogImpl<Base2>::calc<T, Abi>(x); 0274 } 0275 0276 #endif // Vc_COMMON_MATH_H_INTERNAL
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|