include/vdt/exp.h

0001 /*
0002  * exp.h
0003  * The basic idea is to exploit Pade polynomials.
0004  * A lot of ideas were inspired by the cephes math library (by Stephen L. Moshier
0005  * moshier@na-net.ornl.gov) as well as actual code.
0006  * The Cephes library can be found here:  http://www.netlib.org/cephes/
0007  *
0008  *  Created on: Jun 23, 2012
0009  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0010  */
0011
0012 /*
0013  * VDT is free software: you can redistribute it and/or modify
0014  * it under the terms of the GNU Lesser Public License as published by
0015  * the Free Software Foundation, either version 3 of the License, or
0016  * (at your option) any later version.
0017  *
0018  * This program is distributed in the hope that it will be useful,
0019  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0020  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0021  * GNU Lesser Public License for more details.
0022  *
0023  * You should have received a copy of the GNU Lesser Public License
0024  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0025  */
0026
0027 #ifndef _VDT_EXP_
0028 #define _VDT_EXP_
0029
0030 #include "vdtcore_common.h"
0031 #include <limits>
0032
0033 namespace vdt{
0034
0035 namespace details{
0036
0037   const double EXP_LIMIT = 708;
0038
0039   const double PX1exp = 1.26177193074810590878E-4;
0040   const double PX2exp = 3.02994407707441961300E-2;
0041   const double PX3exp = 9.99999999999999999910E-1;
0042   const double QX1exp = 3.00198505138664455042E-6;
0043   const double QX2exp = 2.52448340349684104192E-3;
0044   const double QX3exp = 2.27265548208155028766E-1;
0045   const double QX4exp = 2.00000000000000000009E0;
0046
0047   const double LOG2E = 1.4426950408889634073599; // 1/log(2)
0048
0049   const float MAXLOGF = 88.72283905206835f;
0050   const float MINLOGF = -88.f;
0051
0052   const float C1F =   0.693359375f;
0053   const float C2F =  -2.12194440e-4f;
0054
0055   const float PX1expf = 1.9875691500E-4f;
0056   const float PX2expf =1.3981999507E-3f;
0057   const float PX3expf =8.3334519073E-3f;
0058   const float PX4expf =4.1665795894E-2f;
0059   const float PX5expf =1.6666665459E-1f;
0060   const float PX6expf =5.0000001201E-1f;
0061
0062   const float LOG2EF = 1.44269504088896341f;
0063
0064 }
0065
0066 // Exp double precision --------------------------------------------------------
0067
0068
0069 /// Exponential Function double precision
0070 inline double fast_exp(double initial_x){
0071
0072     double x = initial_x;
0073     double px=details::fpfloor(details::LOG2E * x +0.5);
0074
0075     const int32_t n = int32_t(px);
0076
0077     x -= px * 6.93145751953125E-1;
0078     x -= px * 1.42860682030941723212E-6;
0079
0080     const double xx = x * x;
0081
0082     // px = x * P(x**2).
0083     px = details::PX1exp;
0084     px *= xx;
0085     px += details::PX2exp;
0086     px *= xx;
0087     px += details::PX3exp;
0088     px *= x;
0089
0090     // Evaluate Q(x**2).
0091     double qx = details::QX1exp;
0092     qx *= xx;
0093     qx += details::QX2exp;
0094     qx *= xx;
0095     qx += details::QX3exp;
0096     qx *= xx;
0097     qx += details::QX4exp;
0098
0099     // e**x = 1 + 2x P(x**2)/( Q(x**2) - P(x**2) )
0100     x = px / (qx - px);
0101     x = 1.0 + 2.0 * x;
0102
0103     // Build 2^n in double.
0104     x *= details::uint642dp(( ((uint64_t)n) +1023)<<52);
0105
0106     if (initial_x > details::EXP_LIMIT)
0107             x = std::numeric_limits<double>::infinity();
0108     if (initial_x < -details::EXP_LIMIT)
0109             x = 0.;
0110
0111     return x;
0112
0113 }
0114
0115 // Exp single precision --------------------------------------------------------
0116
0117 /// Exponential Function single precision
0118 inline float fast_expf(float initial_x) {
0119
0120     float x = initial_x;
0121
0122     float z = details::fpfloor( details::LOG2EF * x +0.5f ); /* floor() truncates toward -infinity. */
0123
0124     x -= z * details::C1F;
0125     x -= z * details::C2F;
0126     const int32_t n = int32_t ( z );
0127
0128     const float x2 = x * x;
0129
0130     z = x*details::PX1expf;
0131     z += details::PX2expf;
0132     z *= x;
0133     z += details::PX3expf;
0134     z *= x;
0135     z += details::PX4expf;
0136     z *= x;
0137     z += details::PX5expf;
0138     z *= x;
0139     z += details::PX6expf;
0140     z *= x2;
0141     z += x + 1.0f;
0142
0143     /* multiply by power of 2 */
0144     z *=  details::uint322sp((n+0x7f)<<23);
0145
0146     if (initial_x > details::MAXLOGF) z=std::numeric_limits<float>::infinity();
0147     if (initial_x < details::MINLOGF) z=0.f;
0148
0149     return z;
0150
0151   }
0152
0153 //------------------------------------------------------------------------------
0154
0155 void expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0156 void fast_expv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0157 void expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0158 void fast_expfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0159
0160 } // end namespace vdt
0161
0162 #endif