File indexing completed on 2026-04-09 07:49:09
0001
0002 #include "SLOG.hh"
0003 #include "scuda.h"
0004
0005 #include "QUDA_CHECK.h"
0006 #include "QPoly.hh"
0007 #include <cuda_runtime.h>
0008
0009
0010 QPoly::QPoly()
0011 {
0012 }
0013
0014
0015 extern "C" void QPoly_demo(dim3 numBlocks, dim3 threadsPerBlock) ;
0016 extern "C" void QPoly_tmpl_demo(dim3 numBlocks, dim3 threadsPerBlock) ;
0017
0018
0019 void QPoly::configureLaunch( dim3& numBlocks, dim3& threadsPerBlock, unsigned width, unsigned height )
0020 {
0021 threadsPerBlock.x = height == 1 && width < 512 ? width : 512 ;
0022 threadsPerBlock.y = 1 ;
0023 threadsPerBlock.z = 1 ;
0024
0025 numBlocks.x = (width + threadsPerBlock.x - 1) / threadsPerBlock.x ;
0026 numBlocks.y = (height + threadsPerBlock.y - 1) / threadsPerBlock.y ;
0027 numBlocks.z = 1 ;
0028 }
0029
0030 void QPoly::demo()
0031 {
0032 dim3 numBlocks ;
0033 dim3 threadsPerBlock ;
0034 configureLaunch( numBlocks, threadsPerBlock, 1, 1 );
0035 QPoly_demo(numBlocks, threadsPerBlock);
0036
0037 cudaDeviceSynchronize();
0038 }
0039
0040 void QPoly::tmpl_demo()
0041 {
0042 dim3 numBlocks ;
0043 dim3 threadsPerBlock ;
0044 configureLaunch( numBlocks, threadsPerBlock, 1, 1 );
0045
0046 QPoly_tmpl_demo(numBlocks, threadsPerBlock);
0047
0048 cudaDeviceSynchronize();
0049 }
0050
0051
0052