Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-09 07:49:13

0001 // ./logTest.sh
0002 
0003 #include <cstdlib>
0004 #include <array>
0005 #include "NP.hh"
0006 
0007 #define KLUDGE_FASTMATH_LOGF(u) (u < 0.998f ? __logf(u) : __logf(u) - 0.46735790f*1e-7f )
0008 
0009 const char* FOLD = getenv("FOLD") ? getenv("FOLD") : "/tmp" ; 
0010 
0011 
0012 __global__ void test_log_(double* dd, unsigned ni, unsigned nj)
0013 {
0014     unsigned ix = blockIdx.x * blockDim.x + threadIdx.x;
0015 
0016     double d = double(ix)/double(ni-1) ; 
0017     float  f = float(d) ;
0018 
0019     double d0 = -1.*logf( d );
0020     float  f0 = -1.f*logf( f );
0021     float  f1 = -1.f*__logf( f );   
0022     float  f1k = -1.f*KLUDGE_FASTMATH_LOGF(f) ; 
0023 
0024     dd[ix*nj+0] = d ; 
0025     dd[ix*nj+1] = d0 ; 
0026     dd[ix*nj+2] = f0 ; 
0027     dd[ix*nj+3] = f1 ; 
0028     dd[ix*nj+4] = f1k ; 
0029 
0030     //printf("//test_log  (ix,iy,ni) (%2d, %2d, %2d) \n", ix, iy, ni );
0031 }
0032 
0033 
0034 void ConfigureLaunch(dim3& numBlocks, dim3& threadsPerBlock, unsigned width )
0035 { 
0036     threadsPerBlock.x = 512 ; 
0037     threadsPerBlock.y = 1 ; 
0038     threadsPerBlock.z = 1 ; 
0039 
0040     numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; 
0041     numBlocks.y = 1 ; 
0042     numBlocks.z = 1 ; 
0043 }
0044 
0045 
0046 
0047 void test_log_dev()
0048 {
0049     unsigned ni = 1000001 ; 
0050     unsigned nj = 5 ; 
0051 
0052     dim3 numBlocks ; 
0053     dim3 threadsPerBlock ; 
0054     ConfigureLaunch(numBlocks, threadsPerBlock, ni ); 
0055 
0056 
0057     NP* h = NP::Make<double>( ni, nj ) ; 
0058     unsigned arr_bytes = h->arr_bytes() ; 
0059     double* hh = h->values<double>(); 
0060 
0061     double* dd = nullptr ; 
0062     cudaMalloc(reinterpret_cast<void**>( &dd ), arr_bytes );     
0063 
0064     test_log_<<<numBlocks,threadsPerBlock>>>(dd, ni, nj );  
0065 
0066     cudaMemcpy( hh, dd, arr_bytes, cudaMemcpyDeviceToHost ) ; 
0067     cudaDeviceSynchronize();
0068 
0069     h->save(FOLD,"dev_scan.npy"); 
0070 }
0071 
0072 void test_log_host(double sc)
0073 {
0074      std::cout 
0075          << " sc " << std::setw(10) << std::fixed << std::setprecision(8) << sc
0076          << std::endl 
0077          ;
0078 
0079      std::array<double, 8> aa = {   1e-8,    1e-7,   1e-6,   1e-5,   1e-4,   1e-3,   1e-2,   1e-1 } ; 
0080      std::array<double, 8> uu = {1.-1e-8, 1.-1e-7,1.-1e-6,1.-1e-5,1.-1e-4,1.-1e-3,1.-1e-2,1.-1e-1 } ; 
0081      for(unsigned i=0 ; i < uu.size() ; i++)
0082      {
0083          double a  = aa[i] ; 
0084          double u  = uu[i] ;
0085          float  fu = uu[i] ; 
0086          //float  fa = 1.f - fu ; 
0087 
0088          double logu  = -log(u) ; 
0089          float  logfu = -log(fu) ; 
0090          float  alogfu  = a  ;     
0091          //float  alogfu  = fu > 0.999f ? a : logfu  ;     
0092 
0093          //  USING THIS APPROX LOOKS LIKE IT DOES BETTER
0094          //  BUT IT IS CHEATING AS IT USES DOUBLE PRECISION a 
0095          //  BUT IN REALITY NEED TO GET a  BY SUBTRACTING FROM 1.f   
0096          //
0097          //     -ln(1-x) is very close to x for small x
0098          // ie  -ln(u) is very close to 1-u for u close to 1.
0099          //
0100 
0101          double cf = logu - double(logfu); 
0102          double acf = logu - double(alogfu); 
0103 
0104          std::cout 
0105              << " a " << std::setw(10) << std::fixed << std::setprecision(8) << a 
0106              << " u " << std::setw(10) << std::fixed << std::setprecision(8) << u 
0107              << " fu " << std::setw(10) << std::fixed << std::setprecision(8) << fu 
0108              << " -log(u)*sc " << std::setw(10) << std::fixed << std::setprecision(8) << logu*sc
0109              << " -log(fu)*sc " << std::setw(10) << std::fixed << std::setprecision(8) << logfu*sc
0110              << " alogfu*sc " << std::setw(10) << std::fixed << std::setprecision(8) << alogfu*sc
0111              << " cf*sc " << std::setw(10) << std::fixed << std::setprecision(8) << cf*sc
0112              << " acf*sc " << std::setw(10) << std::fixed << std::setprecision(8) << acf*sc
0113              << std::endl
0114              ;  
0115      }
0116 }
0117 
0118 int main()
0119 {
0120     test_log_dev();
0121     //test_log_host(1.);
0122     //test_log_host(1e7);
0123     return 0 ; 
0124 }
0125 
0126