File indexing completed on 2026-04-09 07:49:13
0001
0002
0003 #include <cstdlib>
0004 #include <array>
0005 #include "NP.hh"
0006
0007 #define KLUDGE_FASTMATH_LOGF(u) (u < 0.998f ? __logf(u) : __logf(u) - 0.46735790f*1e-7f )
0008
0009 const char* FOLD = getenv("FOLD") ? getenv("FOLD") : "/tmp" ;
0010
0011
0012 __global__ void test_log_(double* dd, unsigned ni, unsigned nj)
0013 {
0014 unsigned ix = blockIdx.x * blockDim.x + threadIdx.x;
0015
0016 double d = double(ix)/double(ni-1) ;
0017 float f = float(d) ;
0018
0019 double d0 = -1.*logf( d );
0020 float f0 = -1.f*logf( f );
0021 float f1 = -1.f*__logf( f );
0022 float f1k = -1.f*KLUDGE_FASTMATH_LOGF(f) ;
0023
0024 dd[ix*nj+0] = d ;
0025 dd[ix*nj+1] = d0 ;
0026 dd[ix*nj+2] = f0 ;
0027 dd[ix*nj+3] = f1 ;
0028 dd[ix*nj+4] = f1k ;
0029
0030
0031 }
0032
0033
0034 void ConfigureLaunch(dim3& numBlocks, dim3& threadsPerBlock, unsigned width )
0035 {
0036 threadsPerBlock.x = 512 ;
0037 threadsPerBlock.y = 1 ;
0038 threadsPerBlock.z = 1 ;
0039
0040 numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ;
0041 numBlocks.y = 1 ;
0042 numBlocks.z = 1 ;
0043 }
0044
0045
0046
0047 void test_log_dev()
0048 {
0049 unsigned ni = 1000001 ;
0050 unsigned nj = 5 ;
0051
0052 dim3 numBlocks ;
0053 dim3 threadsPerBlock ;
0054 ConfigureLaunch(numBlocks, threadsPerBlock, ni );
0055
0056
0057 NP* h = NP::Make<double>( ni, nj ) ;
0058 unsigned arr_bytes = h->arr_bytes() ;
0059 double* hh = h->values<double>();
0060
0061 double* dd = nullptr ;
0062 cudaMalloc(reinterpret_cast<void**>( &dd ), arr_bytes );
0063
0064 test_log_<<<numBlocks,threadsPerBlock>>>(dd, ni, nj );
0065
0066 cudaMemcpy( hh, dd, arr_bytes, cudaMemcpyDeviceToHost ) ;
0067 cudaDeviceSynchronize();
0068
0069 h->save(FOLD,"dev_scan.npy");
0070 }
0071
0072 void test_log_host(double sc)
0073 {
0074 std::cout
0075 << " sc " << std::setw(10) << std::fixed << std::setprecision(8) << sc
0076 << std::endl
0077 ;
0078
0079 std::array<double, 8> aa = { 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1 } ;
0080 std::array<double, 8> uu = {1.-1e-8, 1.-1e-7,1.-1e-6,1.-1e-5,1.-1e-4,1.-1e-3,1.-1e-2,1.-1e-1 } ;
0081 for(unsigned i=0 ; i < uu.size() ; i++)
0082 {
0083 double a = aa[i] ;
0084 double u = uu[i] ;
0085 float fu = uu[i] ;
0086
0087
0088 double logu = -log(u) ;
0089 float logfu = -log(fu) ;
0090 float alogfu = a ;
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101 double cf = logu - double(logfu);
0102 double acf = logu - double(alogfu);
0103
0104 std::cout
0105 << " a " << std::setw(10) << std::fixed << std::setprecision(8) << a
0106 << " u " << std::setw(10) << std::fixed << std::setprecision(8) << u
0107 << " fu " << std::setw(10) << std::fixed << std::setprecision(8) << fu
0108 << " -log(u)*sc " << std::setw(10) << std::fixed << std::setprecision(8) << logu*sc
0109 << " -log(fu)*sc " << std::setw(10) << std::fixed << std::setprecision(8) << logfu*sc
0110 << " alogfu*sc " << std::setw(10) << std::fixed << std::setprecision(8) << alogfu*sc
0111 << " cf*sc " << std::setw(10) << std::fixed << std::setprecision(8) << cf*sc
0112 << " acf*sc " << std::setw(10) << std::fixed << std::setprecision(8) << acf*sc
0113 << std::endl
0114 ;
0115 }
0116 }
0117
0118 int main()
0119 {
0120 test_log_dev();
0121
0122
0123 return 0 ;
0124 }
0125
0126