|
||||
File indexing completed on 2025-01-30 10:25:46
0001 /* This file is part of the Vc library. {{{ 0002 Copyright © 2012-2015 Matthias Kretz <kretz@kde.org> 0003 0004 Redistribution and use in source and binary forms, with or without 0005 modification, are permitted provided that the following conditions are met: 0006 * Redistributions of source code must retain the above copyright 0007 notice, this list of conditions and the following disclaimer. 0008 * Redistributions in binary form must reproduce the above copyright 0009 notice, this list of conditions and the following disclaimer in the 0010 documentation and/or other materials provided with the distribution. 0011 * Neither the names of contributing organizations nor the 0012 names of its contributors may be used to endorse or promote products 0013 derived from this software without specific prior written permission. 0014 0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY 0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 0025 0026 ------------------------------------------------------------------- 0027 0028 The exp implementation is derived from Cephes, which carries the 0029 following Copyright notice: 0030 0031 Cephes Math Library Release 2.2: June, 1992 0032 Copyright 1984, 1987, 1989 by Stephen L. Moshier 0033 Direct inquiries to 30 Frost Street, Cambridge, MA 02140 0034 0035 }}}*/ 0036 0037 #ifdef Vc_COMMON_MATH_H_INTERNAL 0038 0039 constexpr float log2_e = 1.44269504088896341f; 0040 0041 // These constants are adjusted to account for single-precision floating point. 0042 // The original are for double precision: 0043 // 0044 // constexpr float MAXLOGF = 88.72283905206835f; 0045 // constexpr float MINLOGF = -103.278929903431851103f; /* log(2^-149) */ 0046 0047 constexpr float MAXLOGF = 88.722831726074219f; /* log(2^127.99998474121094f) */ 0048 constexpr float MINLOGF = -88.029685974121094f; /* log(2^-126.99999237060547f) */ 0049 constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f; 0050 0051 template <typename Abi, typename = enable_if<std::is_same<Abi, VectorAbi::Sse>::value || 0052 std::is_same<Abi, VectorAbi::Avx>::value>> 0053 inline Vector<float, detail::not_fixed_size_abi<Abi>> exp(Vector<float, Abi> x) 0054 { 0055 using V = Vector<float, Abi>; 0056 typedef typename V::Mask M; 0057 typedef Detail::Const<float, Abi> C; 0058 0059 const M overflow = x > MAXLOGF; 0060 const M underflow = x < MINLOGF; 0061 0062 // log₂(eˣ) = x * log₂(e) * log₂(2) 0063 // = log₂(2^(x * log₂(e))) 0064 // => eˣ = 2^(x * log₂(e)) 0065 // => n = ⌊x * log₂(e) + ½⌋ 0066 // => y = x - n * ln(2) | recall that: ln(2) * log₂(e) == 1 0067 // <=> eˣ = 2ⁿ * eʸ 0068 V z = floor(C::log2_e() * x + 0.5f); 0069 const auto n = static_cast<Vc::SimdArray<int, V::Size>>(z); 0070 x -= z * C::ln2_large(); 0071 x -= z * C::ln2_small(); 0072 0073 /* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */ 0074 z = ((((( 1.9875691500E-4f * x 0075 + 1.3981999507E-3f) * x 0076 + 8.3334519073E-3f) * x 0077 + 4.1665795894E-2f) * x 0078 + 1.6666665459E-1f) * x 0079 + 5.0000001201E-1f) * (x * x) 0080 + x 0081 + 1.0f; 0082 0083 x = ldexp(z, n); // == z * 2ⁿ 0084 0085 x(overflow) = std::numeric_limits<typename V::EntryType>::infinity(); 0086 x.setZero(underflow); 0087 0088 return x; 0089 } 0090 0091 #endif // Vc_COMMON_MATH_H_INTERNAL
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |