Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-10 07:49:45

0001 /**
0002 
0003 ~/o/sysrap/tests/curanddr_uniform_test.sh
0004 
0005 **/
0006 
0007 
0008 
0009 #include <cstdlib>
0010 #include <array>
0011 #include "NP.hh"
0012 #include "scuda.h"
0013 
0014 #include "curand-done-right/curanddr.hxx"
0015 
0016 
0017 
0018 __global__ void _test_curanddr_uniform(float* ff, int ni, int nj)
0019 {
0020     uint ix = blockIdx.x * blockDim.x + threadIdx.x;
0021     uint nk = nj/4 ;  
0022     for(uint k=0 ; k < nk ; k++) 
0023     {
0024         float* ffk = ff + 4*(ix*nk + k) ;  
0025         curanddr::uniforms_into_buffer<4>( ffk, uint4{k,0,ix,0}, 0 ); 
0026     } 
0027 }
0028 
0029 void ConfigureLaunch(dim3& numBlocks, dim3& threadsPerBlock, unsigned width )
0030 { 
0031     threadsPerBlock.x = 512 ; 
0032     threadsPerBlock.y = 1 ; 
0033     threadsPerBlock.z = 1 ; 
0034 
0035     numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ; 
0036     numBlocks.y = 1 ; 
0037     numBlocks.z = 1 ; 
0038 }
0039 
0040 void test_curanddr_uniform()
0041 {
0042     int ni = 1000 ; 
0043     int nj = 16 ; 
0044 
0045     dim3 numBlocks ; 
0046     dim3 threadsPerBlock ; 
0047     ConfigureLaunch(numBlocks, threadsPerBlock, ni ); 
0048 
0049     printf("//test_curanddr_uniform   \n" ); 
0050     NP* h = NP::Make<float>( ni, nj ) ; 
0051     int arr_bytes = h->arr_bytes() ;
0052     float* hh = h->values<float>(); 
0053 
0054     float* dd = nullptr ; 
0055     cudaMalloc(reinterpret_cast<void**>( &dd ), arr_bytes );     
0056 
0057     _test_curanddr_uniform<<<numBlocks,threadsPerBlock>>>(dd, ni, nj );  
0058 
0059     cudaMemcpy( hh, dd, arr_bytes, cudaMemcpyDeviceToHost ) ; 
0060     cudaDeviceSynchronize();
0061 
0062     h->save("$FOLD/curanddr_uniform_test.npy"); 
0063 }
0064 int main()
0065 {
0066     test_curanddr_uniform();
0067     return 0 ; 
0068 }
0069 
0070