Vc/common/logarithm.h

0001 /*  This file is part of the Vc library. {{{
0002 Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
0003
0004 Redistribution and use in source and binary forms, with or without
0005 modification, are permitted provided that the following conditions are met:
0006     * Redistributions of source code must retain the above copyright
0007       notice, this list of conditions and the following disclaimer.
0008     * Redistributions in binary form must reproduce the above copyright
0009       notice, this list of conditions and the following disclaimer in the
0010       documentation and/or other materials provided with the distribution.
0011     * Neither the names of contributing organizations nor the
0012       names of its contributors may be used to endorse or promote products
0013       derived from this software without specific prior written permission.
0014
0015 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
0016 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
0017 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
0018 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
0019 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
0020 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
0021 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
0022 ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
0023 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
0024 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0025
0026 }}}*/
0027
0028 /* The log implementations are based on code from Julien Pommier which carries the following
0029    copyright information:
0030  */
0031 /*
0032    Inspired by Intel Approximate Math library, and based on the
0033    corresponding algorithms of the cephes math library
0034 */
0035 /* Copyright (C) 2007  Julien Pommier
0036
0037   This software is provided 'as-is', without any express or implied
0038   warranty.  In no event will the authors be held liable for any damages
0039   arising from the use of this software.
0040
0041   Permission is granted to anyone to use this software for any purpose,
0042   including commercial applications, and to alter it and redistribute it
0043   freely, subject to the following restrictions:
0044
0045   1. The origin of this software must not be misrepresented; you must not
0046      claim that you wrote the original software. If you use this software
0047      in a product, an acknowledgment in the product documentation would be
0048      appreciated but is not required.
0049   2. Altered source versions must be plainly marked as such, and must not be
0050      misrepresented as being the original software.
0051   3. This notice may not be removed or altered from any source distribution.
0052
0053   (this is the zlib license)
0054 */
0055
0056 #ifdef Vc_COMMON_MATH_H_INTERNAL
0057
0058 enum LogarithmBase {
0059     BaseE, Base10, Base2
0060 };
0061
0062 namespace Detail
0063 {
0064 template <typename T, typename Abi>
0065 using Const = typename std::conditional<std::is_same<Abi, VectorAbi::Avx>::value,
0066                                         AVX::Const<T>, SSE::Const<T>>::type;
0067
0068 template<LogarithmBase Base>
0069 struct LogImpl
0070 {
0071     template<typename T, typename Abi> static Vc_ALWAYS_INLINE void log_series(Vector<T, Abi> &Vc_RESTRICT x, typename Vector<T, Abi>::AsArg exponent) {
0072         typedef Vector<T, Abi> V;
0073         typedef Detail::Const<T, Abi> C;
0074         // Taylor series around x = 2^exponent
0075         //   f(x) = ln(x)   → exponent * ln(2) → C::ln2_small + C::ln2_large
0076         //  f'(x) =    x⁻¹  →  x               → 1
0077         // f''(x) = -  x⁻²  → -x² / 2          → C::_1_2()
0078         //        =  2!x⁻³  →  x³ / 3          → C::P(8)
0079         //        = -3!x⁻⁴  → -x⁴ / 4          → C::P(7)
0080         //        =  4!x⁻⁵  →  x⁵ / 5          → C::P(6)
0081         // ...
0082         // The high order coefficients are adjusted to reduce the error that occurs from ommission
0083         // of higher order terms.
0084         // P(0) is the smallest term and |x| < 1 ⇒ |xⁿ| > |xⁿ⁺¹|
0085         // The order of additions must go from smallest to largest terms
0086         const V x2 = x * x; // 0 → 4
0087 #ifdef Vc_LOG_ILP
0088         V y2 = (C::P(6) * /*4 →  8*/ x2 + /* 8 → 11*/ C::P(7) * /*1 → 5*/ x) + /*11 → 14*/ C::P(8);
0089         V y0 = (C::P(0) * /*5 →  9*/ x2 + /* 9 → 12*/ C::P(1) * /*2 → 6*/ x) + /*12 → 15*/ C::P(2);
0090         V y1 = (C::P(3) * /*6 → 10*/ x2 + /*10 → 13*/ C::P(4) * /*3 → 7*/ x) + /*13 → 16*/ C::P(5);
0091         const V x3 = x2 * x;  // 7 → 11
0092         const V x6 = x3 * x3; // 11 → 15
0093         const V x9 = x6 * x3; // 15 → 19
0094         V y = (y0 * /*19 → 23*/ x9 + /*23 → 26*/ y1 * /*16 → 20*/ x6) + /*26 → 29*/ y2 * /*14 → 18*/ x3;
0095 #elif defined Vc_LOG_ILP2
0096         /*
0097          *                            name start done
0098          *  movaps %xmm0, %xmm1     ; x     0     1
0099          *  movaps %xmm0, %xmm2     ; x     0     1
0100          *  mulps %xmm1, %xmm1      ; x2    1     5 *xmm1
0101          *  movaps <P8>, %xmm15     ; y8    1     2
0102          *  mulps %xmm1, %xmm2      ; x3    5     9 *xmm2
0103          *  movaps %xmm1, %xmm3     ; x2    5     6
0104          *  movaps %xmm1, %xmm4     ; x2    5     6
0105          *  mulps %xmm3, %xmm3      ; x4    6    10 *xmm3
0106          *  movaps %xmm2, %xmm5     ; x3    9    10
0107          *  movaps %xmm2, %xmm6     ; x3    9    10
0108          *  mulps %xmm2, %xmm4      ; x5    9    13 *xmm4
0109          *  movaps %xmm3, %xmm7     ; x4   10    11
0110          *  movaps %xmm3, %xmm8     ; x4   10    11
0111          *  movaps %xmm3, %xmm9     ; x4   10    11
0112          *  mulps %xmm5, %xmm5      ; x6   10    14 *xmm5
0113          *  mulps %xmm3, %xmm6      ; x7   11    15 *xmm6
0114          *  mulps %xmm7, %xmm7      ; x8   12    16 *xmm7
0115          *  movaps %xmm4, %xmm10    ; x5   13    14
0116          *  mulps %xmm4, %xmm8      ; x9   13    17 *xmm8
0117          *  mulps %xmm5, %xmm10     ; x11  14    18 *xmm10
0118          *  mulps %xmm5, %xmm9      ; x10  15    19 *xmm9
0119          *  mulps <P0>, %xmm10      ; y0   18    22
0120          *  mulps <P1>, %xmm9       ; y1   19    23
0121          *  mulps <P2>, %xmm8       ; y2   20    24
0122          *  mulps <P3>, %xmm7       ; y3   21    25
0123          *  addps %xmm10, %xmm9     ; y    23    26
0124          *  addps %xmm9, %xmm8      ; y    26    29
0125          *  addps %xmm8, %xmm7      ; y    29    32
0126          */
0127         const V x3 = x2 * x;  // 4  → 8
0128         const V x4 = x2 * x2; // 5  → 9
0129         const V x5 = x2 * x3; // 8  → 12
0130         const V x6 = x3 * x3; // 9  → 13
0131         const V x7 = x4 * x3; //
0132         const V x8 = x4 * x4;
0133         const V x9 = x5 * x4;
0134         const V x10 = x5 * x5;
0135         const V x11 = x5 * x6; // 13 → 17
0136         V y = C::P(0) * x11 + C::P(1) * x10 + C::P(2) * x9 + C::P(3) * x8 + C::P(4) * x7
0137             + C::P(5) * x6  + C::P(6) * x5  + C::P(7) * x4 + C::P(8) * x3;
0138 #else
0139         V y = C::P(0);
0140         Vc::Common::unrolled_loop<int, 1, 9>([&](int i) { y = y * x + C::P(i); });
0141         y *= x * x2;
0142 #endif
0143         switch (Base) {
0144         case BaseE:
0145             // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
0146             y += exponent * C::ln2_small();
0147             y -= x2 * C::_1_2(); // [0, 0.25[
0148             x += y;
0149             x += exponent * C::ln2_large();
0150             break;
0151         case Base10:
0152             y += exponent * C::ln2_small();
0153             y -= x2 * C::_1_2(); // [0, 0.25[
0154             x += y;
0155             x += exponent * C::ln2_large();
0156             x *= C::log10_e();
0157             break;
0158         case Base2:
0159             {
0160                 const V x_ = x;
0161                 x *= C::log2_e();
0162                 y *= C::log2_e();
0163                 y -= x_ * x * C::_1_2(); // [0, 0.25[
0164                 x += y;
0165                 x += exponent;
0166                 break;
0167             }
0168         }
0169     }
0170
0171 template <typename Abi>
0172 static Vc_ALWAYS_INLINE void log_series(Vector<double, Abi> &Vc_RESTRICT x,
0173                                         typename Vector<double, Abi>::AsArg exponent)
0174 {
0175     typedef Vector<double, Abi> V;
0176     typedef Detail::Const<double, Abi> C;
0177         const V x2 = x * x;
0178         V y = C::P(0);
0179         V y2 = C::Q(0) + x;
0180         Vc::Common::unrolled_loop<int, 1, 5>([&](int i) {
0181             y = y * x + C::P(i);
0182             y2 = y2 * x + C::Q(i);
0183         });
0184         y2 = x / y2;
0185         y = y * x + C::P(5);
0186         y = x2 * y * y2;
0187         // TODO: refactor the following with the float implementation:
0188         switch (Base) {
0189         case BaseE:
0190             // ln(2) is split in two parts to increase precision (i.e. ln2_small + ln2_large = ln(2))
0191             y += exponent * C::ln2_small();
0192             y -= x2 * C::_1_2(); // [0, 0.25[
0193             x += y;
0194             x += exponent * C::ln2_large();
0195             break;
0196         case Base10:
0197             y += exponent * C::ln2_small();
0198             y -= x2 * C::_1_2(); // [0, 0.25[
0199             x += y;
0200             x += exponent * C::ln2_large();
0201             x *= C::log10_e();
0202             break;
0203         case Base2:
0204             {
0205                 const V x_ = x;
0206                 x *= C::log2_e();
0207                 y *= C::log2_e();
0208                 y -= x_ * x * C::_1_2(); // [0, 0.25[
0209                 x += y;
0210                 x += exponent;
0211                 break;
0212             }
0213         }
0214     }
0215
0216 template <typename T, typename Abi, typename V = Vector<T, Abi>>
0217 static inline Vector<T, Abi> calc(V _x)
0218 {
0219         typedef typename V::Mask M;
0220     typedef Detail::Const<T, Abi> C;
0221
0222         V x(_x);
0223
0224         const M invalidMask = x < V::Zero();
0225         const M infinityMask = x == V::Zero();
0226         const M denormal = x <= C::min();
0227
0228         x(denormal) *= V(Vc::Detail::doubleConstant<1, 0, 54>()); // 2²⁵
0229         V exponent = Detail::exponent(x.data());                    // = ⎣log₂(x)⎦
0230         exponent(denormal) -= 54;
0231
0232         x.setZero(C::exponentMask()); // keep only the fractional part ⇒ x ∈ [1, 2[
0233         x = Detail::operator|(x,
0234                               C::_1_2());  // and set the exponent to 2⁻¹   ⇒ x ∈ [½, 1[
0235
0236         // split calculation in two cases:
0237         // A: x ∈ [½, √½[
0238         // B: x ∈ [√½, 1[
0239         // √½ defines the point where Δe(x) := log₂(x) - ⎣log₂(x)⎦ = ½, i.e.
0240         // log₂(√½) - ⎣log₂(√½)⎦ = ½ * -1 - ⎣½ * -1⎦ = -½ + 1 = ½
0241
0242         const M smallX = x < C::_1_sqrt2();
0243         x(smallX) += x; // => x ∈ [√½,     1[ ∪ [1.5, 1 + √½[
0244         x -= V::One();  // => x ∈ [√½ - 1, 0[ ∪ [0.5, √½[
0245         exponent(!smallX) += V::One();
0246
0247         log_series(x, exponent); // A: (ˣ⁄₂ᵉ - 1, e)  B: (ˣ⁄₂ᵉ⁺¹ - 1, e + 1)
0248
0249         x.setQnan(invalidMask);        // x < 0 → NaN
0250         x(infinityMask) = C::neginf(); // x = 0 → -∞
0251
0252         return x;
0253     }
0254 };
0255 }  // namespace Detail
0256
0257 template <typename T, typename Abi>
0258 Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log(
0259     const Vector<T, Abi> &x)
0260 {
0261     return Detail::LogImpl<BaseE>::calc<T, Abi>(x);
0262 }
0263 template <typename T, typename Abi>
0264 Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log10(
0265     const Vector<T, Abi> &x)
0266 {
0267     return Detail::LogImpl<Base10>::calc<T, Abi>(x);
0268 }
0269 template <typename T, typename Abi>
0270 Vc_INTRINSIC Vc_CONST Vector<T, detail::not_fixed_size_abi<Abi>> log2(
0271     const Vector<T, Abi> &x)
0272 {
0273     return Detail::LogImpl<Base2>::calc<T, Abi>(x);
0274 }
0275
0276 #endif // Vc_COMMON_MATH_H_INTERNAL