|
|
|||
File indexing completed on 2025-12-16 10:33:16
0001 /* 0002 * tanh.h 0003 * The basic idea is to exploit Pade polynomials. 0004 * Implemented by Manuel Schiller for LHCb. 0005 * 0006 * Created on: Sep 23, 2017 0007 * Author: Paul Seyfert, Manuel Schiller 0008 */ 0009 0010 /* 0011 * VDT is free software: you can redistribute it and/or modify 0012 * it under the terms of the GNU Lesser Public License as published by 0013 * the Free Software Foundation, either version 3 of the License, or 0014 * (at your option) any later version. 0015 * 0016 * This program is distributed in the hope that it will be useful, 0017 * but WITHOUT ANY WARRANTY; without even the implied warranty of 0018 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 0019 * GNU Lesser Public License for more details. 0020 * 0021 * You should have received a copy of the GNU Lesser Public License 0022 * along with this program. If not, see <http://www.gnu.org/licenses/>. 0023 */ 0024 0025 #ifndef TANH_H_ 0026 #define TANH_H_ 0027 0028 #include "vdtcore_common.h" 0029 0030 namespace vdt{ 0031 0032 0033 0034 0035 /// Fast tanh implementation double precision 0036 inline double fast_tanh(double x){ 0037 // for very large |x| > 20, tanh(x) is x/|x| anyway (at least to double 0038 // precision) 0039 // 0040 // NB: branch-free code takes longer to execute 0041 if (std::abs(x) > 20.) return std::copysign(1., x); 0042 // strategy for large arguments: tanh(2x) = 2 tanh(x)/(1 + tanh^2(x)) 0043 // idea is to use this "argument halving" a couple of times, and use a 0044 // very short Padé approximation for the rest of the way 0045 const auto xx = x * 0.125; 0046 const auto xx2 = xx * xx; 0047 const auto numer = 135135 + xx2 * (17325 + xx2 * ( 378 + xx2 * 1)); 0048 const auto denom = 135135 + xx2 * (62370 + xx2 * (3150 + xx2 * 28)); 0049 0050 auto tanh = xx * numer / denom; 0051 tanh = 2 * tanh / (tanh * tanh + 1); 0052 tanh = 2 * tanh / (tanh * tanh + 1); 0053 return 2 * tanh / (tanh * tanh + 1); 0054 } 0055 0056 //------------------------------------------------------------------------------ 0057 /// Fast tanh implementation single precision 0058 inline float fast_tanhf( float x ) { 0059 // same strategy as double version above, but even shorter Padé 0060 // approximation is sufficient for float 0061 // 0062 // NB: branch-free code takes longer to execute 0063 if (std::abs(x) > 9.1f) return std::copysign(1.f, x); 0064 const auto xx = x * 0.125f; 0065 const auto xx2 = xx * xx; 0066 auto tanh = xx * (xx2 + 15) / (6 * xx2 + 15); 0067 tanh = 2 * tanh / (tanh * tanh + 1); 0068 tanh = 2 * tanh / (tanh * tanh + 1); 0069 return 2 * tanh / (tanh * tanh + 1); 0070 } 0071 0072 //------------------------------------------------------------------------------ 0073 // Vector signatures 0074 0075 void tanhv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); 0076 void fast_tanhv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray); 0077 void tanhfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); 0078 void fast_tanhfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray); 0079 0080 }// end of vdt 0081 0082 #endif // end of tanh
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|