File indexing completed on 2025-02-21 10:16:42
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022
0023
0024
0025
0026 #ifndef SQRT_H_
0027 #define SQRT_H_
0028
0029 #include "vdtcore_common.h"
0030
0031 namespace vdt{
0032
0033
0034
0035
0036
0037 inline double fast_isqrt_general(double x, const uint32_t ISQRT_ITERATIONS) {
0038
0039 const double threehalfs = 1.5;
0040 const double x2 = x * 0.5;
0041 double y = x;
0042 uint64_t i = details::dp2uint64(y);
0043
0044 i = 0x5fe6eb50c7aa19f9ULL - ( i >> 1 );
0045 y = details::uint642dp(i);
0046 for (uint32_t j=0;j<ISQRT_ITERATIONS;++j)
0047 y *= threehalfs - ( x2 * y * y ) ;
0048
0049 return y;
0050 }
0051
0052
0053
0054
0055 inline double fast_isqrt(double x) {return fast_isqrt_general(x,4);}
0056
0057
0058 inline double fast_approx_isqrt(double x) {return fast_isqrt_general(x,3);}
0059
0060
0061
0062
0063 inline double isqrt (double x) {return 1./std::sqrt(x);}
0064
0065
0066
0067
0068 inline float fast_isqrtf_general(float x, const uint32_t ISQRT_ITERATIONS) {
0069
0070 const float threehalfs = 1.5f;
0071 const float x2 = x * 0.5f;
0072 float y = x;
0073 uint32_t i = details::sp2uint32(y);
0074 i = 0x5f3759df - ( i >> 1 );
0075 y = details::uint322sp(i);
0076 for (uint32_t j=0;j<ISQRT_ITERATIONS;++j)
0077 y *= ( threehalfs - ( x2 * y * y ) );
0078
0079 return y;
0080 }
0081
0082
0083
0084
0085 inline float fast_isqrtf(float x) {return fast_isqrtf_general(x,2);}
0086
0087
0088 inline float fast_approx_isqrtf(float x) {return fast_isqrtf_general(x,1);}
0089
0090
0091
0092
0093 inline float isqrtf (float x) {return 1.f/std::sqrt(x);}
0094
0095
0096
0097 void isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0098 void fast_isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0099 void fast_approx_isqrtv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0100 void isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0101 void fast_isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0102 void fast_approx_isqrtfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0103
0104 }
0105
0106 #endif