File indexing completed on 2026-04-10 07:49:40
0001 #include <cstdio>
0002 #include "curand_kernel.h"
0003 #include "QUDA_CHECK.h"
0004 #include "SLaunchSequence.h"
0005 #include "scurandref.h"
0006 #include "qrng.h"
0007
0008
0009
0010
0011
0012
0013
0014
0015
0016
0017
0018
0019
0020
0021
0022 __global__ void _QCurandState_curand_init_chunk(int threads_per_launch, int id_offset, scurandref<XORWOW>* cr, XORWOW* states_thread_offset )
0023 {
0024 int id = blockIdx.x*blockDim.x + threadIdx.x;
0025 if (id >= threads_per_launch) return;
0026 curand_init(cr->seed, id+id_offset, cr->offset, states_thread_offset + id );
0027
0028
0029 }
0030
0031
0032
0033
0034
0035
0036
0037
0038
0039
0040
0041
0042
0043
0044 extern "C" void QCurandState_curand_init_chunk(SLaunchSequence* seq, scurandref<XORWOW>* cr, scurandref<XORWOW>* d_cr)
0045 {
0046
0047
0048 printf("//QCurandState_curand_init_chunk seq.items %d cr %p d_cr %p cr.num %llu cr.chunk_offset %llu \n", seq->items, cr, d_cr, cr->num, cr->chunk_offset );
0049
0050 cudaEvent_t start, stop ;
0051
0052 for(unsigned i=0 ; i < seq->launches.size() ; i++)
0053 {
0054 SLaunch& l = seq->launches[i] ;
0055
0056 if(0) printf("// l.sequence_index %d l.blocks_per_launch %d l.threads_per_block %d l.threads_per_launch %d l.thread_offset %d \n",
0057 l.sequence_index, l.blocks_per_launch, l.threads_per_block, l.threads_per_launch, l.thread_offset );
0058
0059 int id_offset = l.thread_offset + cr->chunk_offset ;
0060
0061 XORWOW* states_thread_offset = cr->states + l.thread_offset ;
0062
0063
0064
0065 QUDA::before_kernel( start, stop );
0066
0067 _QCurandState_curand_init_chunk<<<l.blocks_per_launch,l.threads_per_block>>>( l.threads_per_launch, id_offset, d_cr, states_thread_offset );
0068
0069 l.kernel_time = QUDA::after_kernel( start, stop );
0070 }
0071 }
0072
0073