math/detail/Sincospi.hh

0001 //------------------------------- -*- C++ -*- -------------------------------//
0002 // Copyright Celeritas contributors: see top-level COPYRIGHT file for details
0003 // SPDX-License-Identifier: (Apache-2.0 OR MIT)
0004 //---------------------------------------------------------------------------//
0005 //! \file corecel/math/detail/Sincospi.hh
0006 //---------------------------------------------------------------------------//
0007 #pragma once
0008
0009 #include <cmath>
0010 #include <cstdint>
0011
0012 namespace celeritas
0013 {
0014 namespace detail
0015 {
0016 //---------------------------------------------------------------------------//
0017 /*!
0018  * Calculate sin and cosine of a fraction of pi.
0019  * From
0020  * https://stackoverflow.com/questions/42792939/implementation-of-sinpi-and-cospi-using-standard-c-math-library
0021  * from njuffa, (c) CC BY-SA
0022  *
0023  */
0024 inline void sincospi_impl(double a, double* sptr, double* cptr)
0025 {
0026     double c, r, s, t, az;
0027     std::int64_t i;
0028
0029     az = a * 0.0;  // must be evaluated with IEEE-754 semantics
0030     // for |a| >= 2**53, cospi(a) = 1.0, but cospi(Inf) = NaN
0031     a = (std::fabs(a) < 9.0071992547409920e+15) ? a : az;  // 0x1.0p53
0032     // reduce argument to primary approximation interval (-0.25, 0.25)
0033     r = std::nearbyint(a + a);  // must use IEEE-754 "to nearest" rounding
0034     i = static_cast<std::int64_t>(r);
0035     t = std::fma(-0.5, r, a);
0036     // compute core approximations
0037     s = t * t;
0038     // Approximate cos(pi*x) for x in [-0.25,0.25]
0039     r = -1.0369917389758117e-4;
0040     r = std::fma(r, s, 1.9294935641298806e-3);
0041     r = std::fma(r, s, -2.5806887942825395e-2);
0042     r = std::fma(r, s, 2.3533063028328211e-1);
0043     r = std::fma(r, s, -1.3352627688538006e+0);
0044     r = std::fma(r, s, 4.0587121264167623e+0);
0045     r = std::fma(r, s, -4.9348022005446790e+0);
0046     c = std::fma(r, s, 1.0000000000000000e+0);
0047     // Approximate sin(pi*x) for x in [-0.25,0.25]
0048     r = 4.6151442520157035e-4;
0049     r = std::fma(r, s, -7.3700183130883555e-3);
0050     r = std::fma(r, s, 8.2145868949323936e-2);
0051     r = std::fma(r, s, -5.9926452893214921e-1);
0052     r = std::fma(r, s, 2.5501640398732688e+0);
0053     r = std::fma(r, s, -5.1677127800499516e+0);
0054     s = s * t;
0055     r = r * s;
0056     s = std::fma(t, 3.1415926535897931e+0, r);
0057     // Map results according to quadrant
0058     if (i & 2)
0059     {
0060         s = 0.0 - s;  // must be evaluated with IEEE-754 semantics
0061         c = 0.0 - c;  // must be evaluated with IEEE-754 semantics
0062     }
0063     if (i & 1)
0064     {
0065         t = 0.0 - s;  // must be evaluated with IEEE-754 semantics
0066         s = c;
0067         c = t;
0068     }
0069     // IEEE-754: sinPi(+n) is +0 and sinPi(-n) is -0 for positive integers n
0070     if (a == std::floor(a))
0071         s = az;
0072     *sptr = s;
0073     *cptr = c;
0074 }
0075
0076 //---------------------------------------------------------------------------//
0077 /*!
0078  * Calculate sin and cosine of a fraction of pi.
0079  * From
0080  * https://stackoverflow.com/questions/42792939/implementation-of-sinpi-and-cospi-using-standard-c-math-library
0081  * from njuffa, (c) CC BY-SA
0082  */
0083 inline void sincospi_impl(float a, float* sptr, float* cptr)
0084 {
0085     // NOTE: some C++ library implementations (GCC?) don't define
0086     // single-precision functions in std namespace
0087     using namespace std;
0088
0089     float az, t, c, r, s;
0090     int32_t i;
0091
0092     az = a * 0.0f;  // Must be evaluated with IEEE-754 semantics
0093     // For |a| > 2**24, cospi(a) = 1.0f, but cospi(Inf) = NaN
0094     a = (fabsf(a) < 0x1.0p24f) ? a : az;
0095     r = nearbyintf(a + a);  // Must use IEEE-754 "to nearest" rounding
0096     i = static_cast<int32_t>(r);
0097     t = fmaf(-0.5f, r, a);
0098     // Compute core approximations
0099     s = t * t;
0100     // Approximate cos(pi*x) for x in [-0.25,0.25] */
0101     r = 0x1.d9e000p-3f;
0102     r = fmaf(r, s, -0x1.55c400p+0f);
0103     r = fmaf(r, s, 0x1.03c1cep+2f);
0104     r = fmaf(r, s, -0x1.3bd3ccp+2f);
0105     c = fmaf(r, s, 0x1.000000p+0f);
0106     // Approximate sin(pi*x) for x in [-0.25,0.25] */
0107     r = -0x1.310000p-1f;
0108     r = fmaf(r, s, 0x1.46737ep+1f);
0109     r = fmaf(r, s, -0x1.4abbfep+2f);
0110     r = (t * s) * r;
0111     s = fmaf(t, 0x1.921fb6p+1f, r);
0112     if (i & 2)
0113     {
0114         s = 0.0f - s;  // Must be evaluated with IEEE-754 semantics
0115         c = 0.0f - c;  // Must be evaluated with IEEE-754 semantics
0116     }
0117     if (i & 1)
0118     {
0119         t = 0.0f - s;  // Must be evaluated with IEEE-754 semantics
0120         s = c;
0121         c = t;
0122     }
0123     // IEEE-754: sinPi(+n) is +0 and sinPi(-n) is -0 for positive integers n
0124     if (a == floorf(a))
0125         s = az;
0126     *sptr = s;
0127     *cptr = c;
0128 }
0129
0130 //---------------------------------------------------------------------------//
0131 //! Lazy implementation of sin(x * pi)
0132 template<class T>
0133 CELER_FORCEINLINE T sinpi_impl(T v)
0134 {
0135     T result;
0136     T unused;
0137     sincospi_impl(v, &result, &unused);
0138     return result;
0139 }
0140
0141 //---------------------------------------------------------------------------//
0142 //! Lazy implementation of cos(x * pi)
0143 template<class T>
0144 CELER_FORCEINLINE T cospi_impl(T v)
0145 {
0146     T unused;
0147     T result;
0148     sincospi_impl(v, &unused, &result);
0149     return result;
0150 }
0151
0152 //---------------------------------------------------------------------------//
0153 //! Lazy implementation of sincos
0154 template<class T>
0155 CELER_FORCEINLINE void sincos_impl(T x, T* sptr, T* cptr)
0156 {
0157     *sptr = std::sin(x);
0158     *cptr = std::cos(x);
0159 }
0160
0161 //---------------------------------------------------------------------------//
0162
0163 CELER_FORCEINLINE void sincospif(float x, float* sptr, float* cptr)
0164 {
0165     return sincospi_impl(x, sptr, cptr);
0166 }
0167
0168 CELER_FORCEINLINE void sincosf(float x, float* sptr, float* cptr)
0169 {
0170     return sincos_impl(x, sptr, cptr);
0171 }
0172
0173 CELER_FORCEINLINE float sinpif(float x)
0174 {
0175     return sinpi_impl(x);
0176 }
0177
0178 CELER_FORCEINLINE float cospif(float x)
0179 {
0180     return cospi_impl(x);
0181 }
0182
0183 CELER_FORCEINLINE void sincospi(double x, double* sptr, double* cptr)
0184 {
0185     return sincospi_impl(x, sptr, cptr);
0186 }
0187
0188 CELER_FORCEINLINE void sincos(double x, double* sptr, double* cptr)
0189 {
0190     return sincos_impl(x, sptr, cptr);
0191 }
0192
0193 CELER_FORCEINLINE double sinpi(double x)
0194 {
0195     return sinpi_impl(x);
0196 }
0197
0198 CELER_FORCEINLINE double cospi(double x)
0199 {
0200     return cospi_impl(x);
0201 }
0202
0203 //---------------------------------------------------------------------------//
0204 }  // namespace detail
0205 }  // namespace celeritas