Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-02-21 10:16:42

0001 /*
0002  * inv.h
0003  * An experiment: implement division with the square fo the approximate 
0004  * inverse square root.
0005  * In other words one transforms a shift, multiplications and sums into a 
0006  * sqrt.
0007  * 
0008  *  Created on: Jun 24, 2012
0009  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0010  * 
0011  * VDT is free software: you can redistribute it and/or modify
0012  * it under the terms of the GNU Lesser Public License as published by
0013  * the Free Software Foundation, either version 3 of the License, or
0014  * (at your option) any later version.
0015  * 
0016  * This program is distributed in the hope that it will be useful,
0017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0019  * GNU Lesser Public License for more details.
0020  * 
0021  * You should have received a copy of the GNU Lesser Public License
0022  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0023  */
0024 
0025 #ifndef INV_H_
0026 #define INV_H_
0027 
0028 #include "vdtcore_common.h"
0029 #include "sqrt.h"
0030 #include <cmath>
0031 #include <limits>
0032 
0033 namespace vdt{
0034 
0035 //------------------------------------------------------------------------------
0036 
0037 /// General implementation of the inversion
0038 inline double fast_inv_general(double x, const uint32_t isqrt_iterations) {  
0039   const uint64_t sign_mask = details::getSignMask(x);
0040   const double sqrt_one_over_x = fast_isqrt_general(std::fabs(x),
0041                                                    isqrt_iterations);
0042   return sqrt_one_over_x*(details::dpORuint64(sqrt_one_over_x , sign_mask ));
0043 }
0044 
0045 //------------------------------------------------------------------------------
0046 
0047 /// Four iterations inversion
0048 inline double fast_inv(double x) {return fast_inv_general(x,4);}
0049 
0050 //------------------------------------------------------------------------------
0051 
0052 /// Three iterations
0053 inline double fast_approx_inv(double x) {return fast_inv_general(x,3);}
0054 
0055 //------------------------------------------------------------------------------
0056 
0057 /// For comparisons
0058 inline double inv (double x) {return 1./x;}
0059 
0060 //------------------------------------------------------------------------------
0061 // Single precision          
0062 
0063 
0064 
0065 /// General implementation of the inversion
0066 inline float fast_invf_general(float x, const uint32_t isqrt_iterations) { 
0067   const uint32_t sign_mask = details::getSignMask(x);
0068   const float sqrt_one_over_x = fast_isqrtf_general(std::fabs(x),
0069                                                    isqrt_iterations);
0070   return sqrt_one_over_x*(details::spORuint32(sqrt_one_over_x , sign_mask ));
0071 }
0072 
0073 //------------------------------------------------------------------------------
0074 
0075 /// Two iterations
0076 inline float fast_invf(float x) {return fast_invf_general(x,2);}
0077 
0078 //------------------------------------------------------------------------------
0079 
0080 /// One iterations
0081 inline float fast_approx_invf(float x) {return fast_invf_general(x,1);}
0082 
0083 //------------------------------------------------------------------------------
0084 
0085 /// For comparisons
0086 inline float invf (float x) {return 1.f/x;}
0087 
0088 //------------------------------------------------------------------------------
0089 
0090 void invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0091 void fast_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0092 void fast_approx_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0093 void invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0094 void fast_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0095 void fast_approx_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0096 
0097 } // end namespace vdt
0098 
0099 #endif /* INV_H_ */