Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-02-21 10:16:42

0001 /*
0002  * sqrt.h
0003  * Implementations born on the Quake 3 fast inverse square root 
0004  * function.
0005  * http://en.wikipedia.org/wiki/Fast_inverse_square_root
0006  * 
0007  *  Created on: Jun 24, 2012
0008  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0009  */
0010 
0011 /* 
0012  * VDT is free software: you can redistribute it and/or modify
0013  * it under the terms of the GNU Lesser Public License as published by
0014  * the Free Software Foundation, either version 3 of the License, or
0015  * (at your option) any later version.
0016  * 
0017  * This program is distributed in the hope that it will be useful,
0018  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0019  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0020  * GNU Lesser Public License for more details.
0021  * 
0022  * You should have received a copy of the GNU Lesser Public License
0023  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0024  */
0025 
0026 #ifndef SQRT_H_
0027 #define SQRT_H_
0028 
0029 #include "vdtcore_common.h"
0030 
0031 namespace vdt{
0032 
0033 //------------------------------------------------------------------------------
0034 
0035 
0036 /// Sqrt implmentation from Quake3
0037 inline double fast_isqrt_general(double x, const uint32_t ISQRT_ITERATIONS) {
0038 
0039   const double threehalfs = 1.5;
0040   const double x2 = x * 0.5;
0041   double y  = x;
0042   uint64_t i  = details::dp2uint64(y);
0043   // Evil!
0044   i  = 0x5fe6eb50c7aa19f9ULL  - ( i >> 1 );
0045   y  = details::uint642dp(i);
0046   for (uint32_t j=0;j<ISQRT_ITERATIONS;++j)
0047       y *= threehalfs - ( x2 * y * y ) ;
0048 
0049   return y;
0050 }
0051 
0052 //------------------------------------------------------------------------------
0053 
0054 /// Four iterations
0055 inline double fast_isqrt(double x) {return fast_isqrt_general(x,4);}
0056 
0057 /// Three iterations
0058 inline double fast_approx_isqrt(double x) {return fast_isqrt_general(x,3);}
0059 
0060 //------------------------------------------------------------------------------
0061 
0062 /// For comparisons
0063 inline double isqrt (double x) {return 1./std::sqrt(x);}
0064 
0065 //------------------------------------------------------------------------------
0066 
0067 /// Sqrt implmentation from Quake3
0068 inline float fast_isqrtf_general(float x, const uint32_t ISQRT_ITERATIONS) {
0069 
0070     const float threehalfs = 1.5f;
0071     const float x2 = x * 0.5f;
0072     float y  = x;
0073     uint32_t i  = details::sp2uint32(y);
0074     i  = 0x5f3759df - ( i >> 1 );
0075     y  = details::uint322sp(i);
0076     for (uint32_t j=0;j<ISQRT_ITERATIONS;++j)
0077         y  *= ( threehalfs - ( x2 * y * y ) );
0078 
0079     return y;
0080 }
0081 
0082 //------------------------------------------------------------------------------
0083 
0084 /// Two iterations
0085 inline float fast_isqrtf(float x) {return fast_isqrtf_general(x,2);}
0086 
0087 /// One (!) iterations
0088 inline float fast_approx_isqrtf(float x) {return fast_isqrtf_general(x,1);}
0089 
0090 //------------------------------------------------------------------------------
0091 
0092 /// For comparisons
0093 inline float isqrtf (float x) {return 1.f/std::sqrt(x);}
0094 
0095 //------------------------------------------------------------------------------
0096 
0097 void isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0098 void fast_isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0099 void fast_approx_isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0100 void isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0101 void fast_isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0102 void fast_approx_isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0103 
0104 } // end namespace vdt
0105 
0106 #endif /* SQRT_H_ */