Vc/common/exponential.h

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2012-2015 Matthias Kretz <kretz@kde.org>
0003
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025
0026 -------------------------------------------------------------------
0027
0028 The exp implementation is derived from Cephes, which carries the
0029 following Copyright notice:
0030
0031 Cephes Math Library Release 2.2:  June, 1992
0032 Copyright 1984, 1987, 1989 by Stephen L. Moshier
0033 Direct inquiries to 30 Frost Street, Cambridge, MA 02140
0034
0035 }}}*/
0036
0037 #ifdef Vc_COMMON_MATH_H_INTERNAL
0038
0039 constexpr float log2_e = 1.44269504088896341f;
0040
0041 // These constants are adjusted to account for single-precision floating point.
0042 // The original are for double precision:
0043 //
0044 // constexpr float MAXLOGF = 88.72283905206835f;
0045 // constexpr float MINLOGF = -103.278929903431851103f; /* log(2^-149) */
0046
0047 constexpr float MAXLOGF = 88.722831726074219f; /* log(2^127.99998474121094f) */
0048 constexpr float MINLOGF = -88.029685974121094f; /* log(2^-126.99999237060547f) */
0049 constexpr float MAXNUMF = 3.4028234663852885981170418348451692544e38f;
0050
0051 template <typename Abi, typename = enable_if<std::is_same<Abi, VectorAbi::Sse>::value ||
0052                                              std::is_same<Abi, VectorAbi::Avx>::value>>
0053 inline Vector<float, detail::not_fixed_size_abi<Abi>> exp(Vector<float, Abi> x)
0054 {
0055     using V = Vector<float, Abi>;
0056     typedef typename V::Mask M;
0057     typedef Detail::Const<float, Abi> C;
0058
0059         const M overflow  = x > MAXLOGF;
0060         const M underflow = x < MINLOGF;
0061
0062         // log₂(eˣ) = x * log₂(e) * log₂(2)
0063         //          = log₂(2^(x * log₂(e)))
0064         // => eˣ = 2^(x * log₂(e))
0065         // => n  = ⌊x * log₂(e) + ½⌋
0066         // => y  = x - n * ln(2)       | recall that: ln(2) * log₂(e) == 1
0067         // <=> eˣ = 2ⁿ * eʸ
0068         V z = floor(C::log2_e() * x + 0.5f);
0069         const auto n = static_cast<Vc::SimdArray<int, V::Size>>(z);
0070         x -= z * C::ln2_large();
0071         x -= z * C::ln2_small();
0072
0073         /* Theoretical peak relative error in [-0.5, +0.5] is 4.2e-9. */
0074         z = ((((( 1.9875691500E-4f  * x
0075                 + 1.3981999507E-3f) * x
0076                 + 8.3334519073E-3f) * x
0077                 + 4.1665795894E-2f) * x
0078                 + 1.6666665459E-1f) * x
0079                 + 5.0000001201E-1f) * (x * x)
0080                 + x
0081                 + 1.0f;
0082
0083         x = ldexp(z, n); // == z * 2ⁿ
0084
0085         x(overflow) = std::numeric_limits<typename V::EntryType>::infinity();
0086         x.setZero(underflow);
0087
0088         return x;
0089     }
0090
0091 #endif // Vc_COMMON_MATH_H_INTERNAL