Vc/common/math.h

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
0003
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025
0026 }}}*/
0027
0028 #ifndef VC_COMMON_MATH_H_
0029 #define VC_COMMON_MATH_H_
0030
0031 #define Vc_COMMON_MATH_H_INTERNAL 1
0032
0033 #include "trigonometric.h"
0034
0035 #include "const.h"
0036 #include "macros.h"
0037
0038 namespace Vc_VERSIONED_NAMESPACE
0039 {
0040 // TODO, not vectorized:
0041 template <class T, class Abi>
0042 SimdArray<int, Vector<T, Abi>::size()> fpclassify(const Vector<T, Abi> &x)
0043 {
0044     return SimdArray<int, Vector<T, Abi>::size()>(
0045         [&](std::size_t i) { return std::fpclassify(x[i]); });
0046 }
0047 template <class T, size_t N> SimdArray<int, N> fpclassify(const SimdArray<T, N> &x)
0048 {
0049     return SimdArray<int, N>([&](std::size_t i) { return std::fpclassify(x[i]); });
0050 }
0051
0052 #ifdef Vc_IMPL_SSE
0053 // for SSE, AVX, and AVX2
0054 #include "logarithm.h"
0055 #include "exponential.h"
0056 #ifdef Vc_IMPL_AVX
0057 inline AVX::double_v exp(AVX::double_v _x)
0058 {
0059     AVX::Vector<double> x = _x;
0060     typedef AVX::Vector<double> V;
0061         typedef V::Mask M;
0062     typedef AVX::Const<double> C;
0063
0064         const M overflow  = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log
0065         const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log
0066
0067         V px = floor(C::log2_e() * x + 0.5);
0068         __m128i tmp = _mm256_cvttpd_epi32(px.data());
0069     const SimdArray<int, V::Size> n = SSE::int_v{tmp};
0070         x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>();  // ln2
0071         x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2
0072
0073         const double P[] = {
0074             Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(),
0075             Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull,  -6>(),
0076             Vc::Detail::doubleConstant<1, 0x0000000000000000ull,   0>()
0077         };
0078         const double Q[] = {
0079             Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(),
0080             Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull,  -9>(),
0081             Vc::Detail::doubleConstant<1, 0x000d17099887e074ull,  -3>(),
0082             Vc::Detail::doubleConstant<1, 0x0000000000000000ull,   1>()
0083         };
0084         const V x2 = x * x;
0085         px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
0086         x =  px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
0087         x = V::One() + 2.0 * x;
0088
0089         x = ldexp(x, n); // == x * 2ⁿ
0090
0091         x(overflow) = std::numeric_limits<double>::infinity();
0092         x.setZero(underflow);
0093
0094         return x;
0095     }
0096 #endif  // Vc_IMPL_AVX
0097
0098 inline SSE::double_v exp(SSE::double_v::AsArg _x) {
0099     SSE::Vector<double> x = _x;
0100     typedef SSE::Vector<double> V;
0101         typedef V::Mask M;
0102     typedef SSE::Const<double> C;
0103
0104         const M overflow  = x > Vc::Detail::doubleConstant< 1, 0x0006232bdd7abcd2ull, 9>(); // max log
0105         const M underflow = x < Vc::Detail::doubleConstant<-1, 0x0006232bdd7abcd2ull, 9>(); // min log
0106
0107         V px = floor(C::log2_e() * x + 0.5);
0108     SimdArray<int, V::Size> n;
0109         _mm_storel_epi64(reinterpret_cast<__m128i *>(&n), _mm_cvttpd_epi32(px.data()));
0110         x -= px * C::ln2_large(); //Vc::Detail::doubleConstant<1, 0x00062e4000000000ull, -1>();  // ln2
0111         x -= px * C::ln2_small(); //Vc::Detail::doubleConstant<1, 0x0007f7d1cf79abcaull, -20>(); // ln2
0112
0113         const double P[] = {
0114             Vc::Detail::doubleConstant<1, 0x000089cdd5e44be8ull, -13>(),
0115             Vc::Detail::doubleConstant<1, 0x000f06d10cca2c7eull,  -6>(),
0116             Vc::Detail::doubleConstant<1, 0x0000000000000000ull,   0>()
0117         };
0118         const double Q[] = {
0119             Vc::Detail::doubleConstant<1, 0x00092eb6bc365fa0ull, -19>(),
0120             Vc::Detail::doubleConstant<1, 0x0004ae39b508b6c0ull,  -9>(),
0121             Vc::Detail::doubleConstant<1, 0x000d17099887e074ull,  -3>(),
0122             Vc::Detail::doubleConstant<1, 0x0000000000000000ull,   1>()
0123         };
0124         const V x2 = x * x;
0125         px = x * ((P[0] * x2 + P[1]) * x2 + P[2]);
0126         x =  px / ((((Q[0] * x2 + Q[1]) * x2 + Q[2]) * x2 + Q[3]) - px);
0127         x = V::One() + 2.0 * x;
0128
0129         x = ldexp(x, n); // == x * 2ⁿ
0130
0131         x(overflow) = std::numeric_limits<double>::infinity();
0132         x.setZero(underflow);
0133
0134         return x;
0135     }
0136
0137 #endif
0138 }  // namespace Vc
0139
0140 #undef Vc_COMMON_MATH_H_INTERNAL
0141
0142 #endif // VC_COMMON_MATH_H_