Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:13:08

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008 
0009 #include <boost/test/unit_test.hpp>
0010 
0011 #include "Acts/Plugins/Cuda/Cuda.hpp"
0012 
0013 #include <Eigen/Dense>
0014 #include <cuda_profiler_api.h>
0015 
0016 namespace Acts::Test {
0017 
0018 template <typename AFloat, int row, int col>
0019 __global__ void MatrixLoadStore1(const Eigen::Matrix<AFloat, row, col>* input,
0020                                  Eigen::Matrix<AFloat, row, col>* output) {
0021   int globalId = blockIdx.x * blockDim.x + threadIdx.x;
0022   output[globalId] = input[globalId];
0023 }
0024 
0025 template <typename AFloat, int row, int col>
0026 __global__ void MatrixLoadStore2(const Eigen::Matrix<AFloat, row, col>* input,
0027                                  Eigen::Matrix<AFloat, row, col>* output) {
0028   for (int i = 0; i < col; i++) {
0029     output[blockIdx.x](threadIdx.x, i) = input[blockIdx.x](threadIdx.x, i);
0030   }
0031 }
0032 
0033 BOOST_AUTO_TEST_SUITE(Utilities)
0034 BOOST_AUTO_TEST_CASE(CUDAOBJ_TEST) {
0035   // Profiler
0036   cudaProfilerStart();
0037 
0038   using AFloat = float;
0039 
0040   const int vecDim = 16;  // Vector imension
0041   const int nVec = 128;   // Number of vectors
0042 
0043   dim3 gridSize;
0044   dim3 blockSize;
0045   int bufSize;
0046 
0047   // Case 1) For misaligned memory access
0048   // global memory load/store efficiency is 4Bytes(float size)/32Bytes(cache
0049   // line) ~ 12.5%
0050   gridSize = dim3(1, 1, 1);
0051   blockSize = dim3(nVec, 1, 1);
0052   bufSize = gridSize.x * blockSize.x;
0053   Eigen::Matrix<AFloat, vecDim, 1> inMat1_cpu[bufSize];
0054   for (int i = 0; i < bufSize; i++) {
0055     inMat1_cpu[i] = Eigen::Matrix<AFloat, vecDim, 1>::Random();
0056   }
0057 
0058   CudaVector<Eigen::Matrix<AFloat, vecDim, 1>> inMat1_cuda(bufSize, inMat1_cpu,
0059                                                            bufSize, 0);
0060   CudaVector<Eigen::Matrix<AFloat, vecDim, 1>> outMat1_cuda(bufSize);
0061 
0062   MatrixLoadStore1<AFloat, vecDim, 1>
0063       <<<gridSize, blockSize>>>(inMat1_cuda.get(), outMat1_cuda.get());
0064   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0065 
0066   CpuVector<Eigen::Matrix<AFloat, vecDim, 1>> outMat1_cpu(bufSize,
0067                                                           &outMat1_cuda);
0068 
0069   // Case 2) For aligned memory access
0070   // global memory load/store efficiency ~ 100%
0071   gridSize = dim3(1, 1, 1);
0072   blockSize = dim3(vecDim, 1, 1);
0073   bufSize = gridSize.x * blockSize.x;
0074 
0075   Eigen::Matrix<AFloat, vecDim, nVec> inMat2_cpu[bufSize];
0076   for (int i = 0; i < bufSize; i++) {
0077     inMat2_cpu[i] = Eigen::Matrix<AFloat, vecDim, nVec>::Random();
0078   }
0079 
0080   CudaVector<Eigen::Matrix<AFloat, vecDim, nVec>> inMat2_cuda(
0081       bufSize, inMat2_cpu, bufSize, 0);
0082   CudaVector<Eigen::Matrix<AFloat, vecDim, nVec>> outMat2_cuda(bufSize);
0083 
0084   MatrixLoadStore2<AFloat, vecDim, nVec>
0085       <<<gridSize, blockSize>>>(inMat2_cuda.get(), outMat2_cuda.get());
0086 
0087   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0088 
0089   CpuVector<Eigen::Matrix<AFloat, vecDim, nVec>> outMat2_cpu(bufSize,
0090                                                              &outMat2_cuda);
0091 
0092   cudaProfilerStop();
0093 
0094   for (int i = 0; i < nVec; i++) {
0095     BOOST_REQUIRE_EQUAL(inMat1_cpu[i], *outMat1_cpu.get(i));
0096   }
0097   BOOST_REQUIRE_EQUAL(inMat2_cpu[0], *outMat2_cpu.get(0));
0098 }
0099 BOOST_AUTO_TEST_SUITE_END()
0100 
0101 }  // namespace Acts::Test