include/vdt/inv.h

0001 /*
0002  * inv.h
0003  * An experiment: implement division with the square fo the approximate
0004  * inverse square root.
0005  * In other words one transforms a shift, multiplications and sums into a
0006  * sqrt.
0007  *
0008  *  Created on: Jun 24, 2012
0009  *      Author: Danilo Piparo, Thomas Hauth, Vincenzo Innocente
0010  *
0011  * VDT is free software: you can redistribute it and/or modify
0012  * it under the terms of the GNU Lesser Public License as published by
0013  * the Free Software Foundation, either version 3 of the License, or
0014  * (at your option) any later version.
0015  *
0016  * This program is distributed in the hope that it will be useful,
0017  * but WITHOUT ANY WARRANTY; without even the implied warranty of
0018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
0019  * GNU Lesser Public License for more details.
0020  *
0021  * You should have received a copy of the GNU Lesser Public License
0022  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
0023  */
0024
0025 #ifndef INV_H_
0026 #define INV_H_
0027
0028 #include "vdtcore_common.h"
0029 #include "sqrt.h"
0030 #include <cmath>
0031 #include <limits>
0032
0033 namespace vdt{
0034
0035 //------------------------------------------------------------------------------
0036
0037 /// General implementation of the inversion
0038 inline double fast_inv_general(double x, const uint32_t isqrt_iterations) {
0039   const uint64_t sign_mask = details::getSignMask(x);
0040   const double sqrt_one_over_x = fast_isqrt_general(std::fabs(x),
0041                                                    isqrt_iterations);
0042   return sqrt_one_over_x*(details::dpORuint64(sqrt_one_over_x , sign_mask ));
0043 }
0044
0045 //------------------------------------------------------------------------------
0046
0047 /// Four iterations inversion
0048 inline double fast_inv(double x) {return fast_inv_general(x,4);}
0049
0050 //------------------------------------------------------------------------------
0051
0052 /// Three iterations
0053 inline double fast_approx_inv(double x) {return fast_inv_general(x,3);}
0054
0055 //------------------------------------------------------------------------------
0056
0057 /// For comparisons
0058 inline double inv (double x) {return 1./x;}
0059
0060 //------------------------------------------------------------------------------
0061 // Single precision
0062
0063
0064
0065 /// General implementation of the inversion
0066 inline float fast_invf_general(float x, const uint32_t isqrt_iterations) {
0067   const uint32_t sign_mask = details::getSignMask(x);
0068   const float sqrt_one_over_x = fast_isqrtf_general(std::fabs(x),
0069                                                    isqrt_iterations);
0070   return sqrt_one_over_x*(details::spORuint32(sqrt_one_over_x , sign_mask ));
0071 }
0072
0073 //------------------------------------------------------------------------------
0074
0075 /// Two iterations
0076 inline float fast_invf(float x) {return fast_invf_general(x,2);}
0077
0078 //------------------------------------------------------------------------------
0079
0080 /// One iterations
0081 inline float fast_approx_invf(float x) {return fast_invf_general(x,1);}
0082
0083 //------------------------------------------------------------------------------
0084
0085 /// For comparisons
0086 inline float invf (float x) {return 1.f/x;}
0087
0088 //------------------------------------------------------------------------------
0089
0090 void invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0091 void fast_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0092 void fast_approx_invv(const uint32_t size, double const * __restrict__ iarray, double* __restrict__ oarray);
0093 void invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0094 void fast_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0095 void fast_approx_invfv(const uint32_t size, float const * __restrict__ iarray, float* __restrict__ oarray);
0096
0097 } // end namespace vdt
0098
0099 #endif /* INV_H_ */