Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-04-10 07:49:40

0001 #include <cstdio>
0002 #include "curand_kernel.h"
0003 #include "QUDA_CHECK.h"
0004 #include "SLaunchSequence.h"
0005 #include "scurandref.h"
0006 #include "qrng.h"
0007 
0008 
0009 /**
0010 _QCurandState_curand_init_chunk
0011 ---------------------------------
0012 
0013 id 
0014    [0:threads_per_launch]
0015 
0016 states_thread_offset 
0017    enables multiple launches to write into the correct output slot
0018 
0019 **/
0020 
0021 
0022 __global__ void _QCurandState_curand_init_chunk(int threads_per_launch, int id_offset, scurandref<XORWOW>* cr, XORWOW* states_thread_offset )
0023 {
0024     int id = blockIdx.x*blockDim.x + threadIdx.x;
0025     if (id >= threads_per_launch) return;
0026     curand_init(cr->seed, id+id_offset, cr->offset, states_thread_offset + id );  
0027 
0028     //if( id == 0 ) printf("// _QCurandState_curand_init_chunk id_offset %d \n", id_offset ); 
0029 }
0030 
0031 
0032 /**
0033 QCurandState_curand_init_chunk
0034 --------------------------------
0035 
0036 NB cr and d_cr hold the same values, however cr is host pointer and d_cr is device pointer
0037 cr->states is device pointer, note that pointer arithmetic works on device pointer 
0038 
0039 Because are writing states just for a chunk do not need a chunk_offset on the output side,
0040 but do need chunk_offset for the input side. 
0041 
0042 **/
0043 
0044 extern "C" void QCurandState_curand_init_chunk(SLaunchSequence* seq,  scurandref<XORWOW>* cr, scurandref<XORWOW>* d_cr) 
0045 {
0046     // NB this is still on CPU, dereferencing d_cs here will BUS_ERROR 
0047 
0048     printf("//QCurandState_curand_init_chunk seq.items %d cr %p  d_cr %p cr.num %llu cr.chunk_offset %llu \n", seq->items, cr, d_cr, cr->num, cr->chunk_offset );  
0049 
0050     cudaEvent_t start, stop ;
0051 
0052     for(unsigned i=0 ; i < seq->launches.size() ; i++)
0053     {
0054         SLaunch& l = seq->launches[i] ; 
0055 
0056         if(0) printf("// l.sequence_index %d  l.blocks_per_launch %d l.threads_per_block %d  l.threads_per_launch %d l.thread_offset %d  \n", 
0057                          l.sequence_index,    l.blocks_per_launch,   l.threads_per_block,    l.threads_per_launch,   l.thread_offset  );  
0058 
0059         int id_offset = l.thread_offset + cr->chunk_offset ;   
0060 
0061         XORWOW* states_thread_offset = cr->states  + l.thread_offset ; 
0062         // THIS: IS NOT GENERAL : STATE LOADING IS ONLY RELEVANT FOR XORWOW     
0063 
0064 
0065         QUDA::before_kernel( start, stop );
0066 
0067         _QCurandState_curand_init_chunk<<<l.blocks_per_launch,l.threads_per_block>>>( l.threads_per_launch, id_offset, d_cr, states_thread_offset  );  
0068 
0069         l.kernel_time = QUDA::after_kernel( start, stop ); 
0070     }
0071 } 
0072 
0073