Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:12:16

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008 
0009 #include "Acts/Plugins/Cuda/Cuda.hpp"
0010 #include "Acts/Plugins/Cuda/Seeding/Kernels.cuh"
0011 
0012 #include <iostream>
0013 
0014 #include <cuda.h>
0015 #include <cuda_runtime.h>
0016 
0017 __global__ void cuSearchDoublet(
0018     const int* nSpM, const float* spMmat, const int* nSpB, const float* spBmat,
0019     const int* nSpT, const float* spTmat, const float* deltaRMin,
0020     const float* deltaRMax, const float* cotThetaMax,
0021     const float* collisionRegionMin, const float* collisionRegionMax,
0022     int* nSpMcomp, int* nSpBcompPerSpM_Max, int* nSpTcompPerSpM_Max,
0023     int* nSpBcompPerSpM, int* nSpTcompPerSpM, int* McompIndex, int* BcompIndex,
0024     int* tmpBcompIndex, int* TcompIndex, int* tmpTcompIndex);
0025 
0026 __global__ void cuTransformCoordinate(
0027     const int* nSpM, const float* spMmat, const int* McompIndex,
0028     const int* nSpB, const float* spBmat, const int* nSpBcompPerSpM_Max,
0029     const int* BcompIndex, const int* nSpT, const float* spTmat,
0030     const int* nSpTcompPerSpM_Max, const int* TcompIndex, float* spMcompMat,
0031     float* spBcompMatPerSpM, float* circBcompMatPerSpM, float* spTcompMatPerSpM,
0032     float* circTcompMatPerSpM);
0033 
0034 __device__ void sp2circle(bool isBottom, const float* spM, const float* spB,
0035                           float* circB);
0036 
0037 __global__ void cuSearchTriplet(
0038     const int* nSpTcompPerSpM, const int* nSpMcomp, const float* spMcompMat,
0039     const int* nSpBcompPerSpM_Max, const int* BcompIndex,
0040     const float* circBcompMatPerSpM, const int* nSpTcompPerSpM_Max,
0041     const int* TcompIndex, const float* spTcompMatPerSpM,
0042     const float* circTcompMatPerSpM, const float* maxScatteringAngle2,
0043     const float* sigmaScattering, const float* minHelixDiameter2,
0044     const float* pT2perRadius, const float* impactMax,
0045     const int* nTrplPerSpMLimit, const int* nTrplPerSpBLimit,
0046     const float* deltaInvHelixDiameter, const float* impactWeightFactor,
0047     const float* deltaRMin, const float* compatSeedWeight,
0048     const std::size_t* compatSeedLimit, int* nTrplPerSpM,
0049     Triplet* TripletsPerSpM);
0050 
0051 namespace Acts {
0052 
0053 void searchDoublet(const dim3 grid, const dim3 block, const int* nSpM,
0054                    const float* spMmat, const int* nSpB, const float* spBmat,
0055                    const int* nSpT, const float* spTmat, const float* deltaRMin,
0056                    const float* deltaRMax, const float* cotThetaMax,
0057                    const float* collisionRegionMin,
0058                    const float* collisionRegionMax, int* nSpMcomp,
0059                    int* nSpBcompPerSpM_Max, int* nSpTcompPerSpM_Max,
0060                    int* nSpBcompPerSpM, int* nSpTcompPerSpM, int* McompIndex,
0061                    int* BcompIndex, int* tmpBcompIndex, int* TcompIndex,
0062                    int* tmpTcompIndex) {
0063   if (grid.x == 0) {
0064     return;
0065   }
0066   // int sharedMemSize = (2*sizeof(int))*block.x + 2*sizeof(int);
0067   int sharedMemSize = 2 * sizeof(int);
0068 
0069   cuSearchDoublet<<<grid, block, sharedMemSize>>>(
0070       nSpM, spMmat, nSpB, spBmat, nSpT, spTmat, deltaRMin, deltaRMax,
0071       cotThetaMax, collisionRegionMin, collisionRegionMax, nSpMcomp,
0072       nSpBcompPerSpM_Max, nSpTcompPerSpM_Max, nSpBcompPerSpM, nSpTcompPerSpM,
0073       McompIndex, BcompIndex, tmpBcompIndex, TcompIndex, tmpTcompIndex);
0074   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0075 }
0076 
0077 void transformCoordinate(const dim3 grid, const dim3 block, const int* nSpM,
0078                          const float* spMmat, const int* McompIndex,
0079                          const int* nSpB, const float* spBmat,
0080                          const int* nSpBcompPerSpM_Max, const int* BcompIndex,
0081                          const int* nSpT, const float* spTmat,
0082                          const int* nSpTcompPerSpM_Max, const int* TcompIndex,
0083                          float* spMcompMat, float* spBcompMatPerSpM,
0084                          float* circBcompMatPerSpM, float* spTcompMatPerSpM,
0085                          float* circTcompMatPerSpM) {
0086   if (grid.x == 0) {
0087     return;
0088   }
0089   int sharedMemSize = sizeof(float) * 6;
0090   cuTransformCoordinate<<<grid, block, sharedMemSize>>>(
0091       nSpM, spMmat, McompIndex, nSpB, spBmat, nSpBcompPerSpM_Max, BcompIndex,
0092       nSpT, spTmat, nSpTcompPerSpM_Max, TcompIndex, spMcompMat,
0093       spBcompMatPerSpM, circBcompMatPerSpM, spTcompMatPerSpM,
0094       circTcompMatPerSpM);
0095   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0096 }
0097 
0098 void searchTriplet(
0099     const dim3 grid, const dim3 block, const int* nSpTcompPerSpM_cpu,
0100     const int* nSpTcompPerSpM_cuda, const int* nSpMcomp,
0101     const float* spMcompMat, const int* nSpBcompPerSpM_Max,
0102     const int* BcompIndex, const float* circBcompMatPerSpM,
0103     const int* nSpTcompPerSpM_Max, const int* TcompIndex,
0104     const float* spTcompMatPerSpM, const float* circTcompMatPerSpM,
0105     const float* maxScatteringAngle2, const float* sigmaScattering,
0106     const float* minHelixDiameter2, const float* pT2perRadius,
0107     const float* impactMax, const int* nTrplPerSpMLimit,
0108     const int* nTrplPerSpBLimit_cpu, const int* nTrplPerSpBLimit_cuda,
0109     const float* deltaInvHelixDiameter, const float* impactWeightFactor,
0110     const float* deltaRMin, const float* compatSeedWeight,
0111     const std::size_t* compatSeedLimit_cpu,
0112     const std::size_t* compatSeedLimit_cuda, int* nTrplPerSpM,
0113     Triplet* TripletsPerSpM, cudaStream_t* stream) {
0114   if (grid.x == 0) {
0115     return;
0116   }
0117   int sharedMemSize = sizeof(Triplet) * (*nTrplPerSpBLimit_cpu);
0118   sharedMemSize += sizeof(float) * (*compatSeedLimit_cpu);
0119   sharedMemSize += sizeof(int);
0120 
0121   cuSearchTriplet<<<grid, block, sharedMemSize, *stream>>>(
0122       nSpTcompPerSpM_cuda, nSpMcomp, spMcompMat, nSpBcompPerSpM_Max, BcompIndex,
0123       circBcompMatPerSpM, nSpTcompPerSpM_Max, TcompIndex, spTcompMatPerSpM,
0124       circTcompMatPerSpM, maxScatteringAngle2, sigmaScattering,
0125       minHelixDiameter2, pT2perRadius, impactMax, nTrplPerSpMLimit,
0126       nTrplPerSpBLimit_cuda, deltaInvHelixDiameter, impactWeightFactor,
0127       deltaRMin, compatSeedWeight, compatSeedLimit_cuda, nTrplPerSpM,
0128       TripletsPerSpM);
0129   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0130 }
0131 
0132 }  // namespace Acts
0133 
0134 __global__ void cuSearchDoublet(
0135     const int* nSpM, const float* spMmat, const int* nSpB, const float* spBmat,
0136     const int* nSpT, const float* spTmat, const float* deltaRMin,
0137     const float* deltaRMax, const float* cotThetaMax,
0138     const float* collisionRegionMin, const float* collisionRegionMax,
0139     int* nSpMcomp, int* nSpBcompPerSpM_Max, int* nSpTcompPerSpM_Max,
0140     int* nSpBcompPerSpM, int* nSpTcompPerSpM, int* McompIndex, int* BcompIndex,
0141     int* tmpBcompIndex, int* TcompIndex, int* tmpTcompIndex) {
0142   extern __shared__ float sharedMem[];
0143   int* mPos = (int*)sharedMem;
0144   int* isMcompat = (int*)&mPos[1];
0145 
0146   if (threadIdx.x == 0) {
0147     *isMcompat = false;
0148   }
0149   __syncthreads();
0150 
0151   float rM = spMmat[blockIdx.x + (*nSpM) * 3];
0152   float zM = spMmat[blockIdx.x + (*nSpM) * 2];
0153 
0154   bool isBcompat(true);
0155   bool isTcompat(true);
0156 
0157   int offset(0);
0158 
0159   while (offset < max(*nSpB, *nSpT)) {
0160     isBcompat = true;
0161 
0162     // Doublet search for bottom hits
0163     if (threadIdx.x + offset < *nSpB) {
0164       float rB = spBmat[threadIdx.x + offset + (*nSpB) * 3];
0165       float zB = spBmat[threadIdx.x + offset + (*nSpB) * 2];
0166 
0167       float deltaR = rM - rB;
0168       if (deltaR > *deltaRMax) {
0169         isBcompat = false;
0170       }
0171 
0172       if (deltaR < *deltaRMin) {
0173         isBcompat = false;
0174       }
0175 
0176       float cotTheta = (zM - zB) / deltaR;
0177       if (fabsf(cotTheta) > *cotThetaMax) {
0178         isBcompat = false;
0179       }
0180 
0181       float zOrigin = zM - rM * cotTheta;
0182       if (zOrigin < *collisionRegionMin || zOrigin > *collisionRegionMax) {
0183         isBcompat = false;
0184       }
0185 
0186       if (isBcompat == true) {
0187         int bPos = atomicAdd(&nSpBcompPerSpM[blockIdx.x], 1);
0188         tmpBcompIndex[bPos + (*nSpB) * blockIdx.x] = threadIdx.x + offset;
0189       }
0190     }
0191 
0192     isTcompat = true;
0193 
0194     // Doublet search for top hits
0195     if (threadIdx.x + offset < *nSpT) {
0196       float rT = spTmat[threadIdx.x + offset + (*nSpT) * 3];
0197       float zT = spTmat[threadIdx.x + offset + (*nSpT) * 2];
0198       float deltaR = rT - rM;
0199       if (deltaR < *deltaRMin) {
0200         isTcompat = false;
0201       }
0202 
0203       if (deltaR > *deltaRMax) {
0204         isTcompat = false;
0205       }
0206 
0207       if (isTcompat == true) {
0208         float cotTheta = (zT - zM) / deltaR;
0209         if (fabsf(cotTheta) > *cotThetaMax) {
0210           isTcompat = false;
0211         }
0212 
0213         float zOrigin = zM - rM * cotTheta;
0214         if (zOrigin < *collisionRegionMin || zOrigin > *collisionRegionMax) {
0215           isTcompat = false;
0216         }
0217       }
0218 
0219       if (isTcompat == true) {
0220         int tPos = atomicAdd(&nSpTcompPerSpM[blockIdx.x], 1);
0221         tmpTcompIndex[tPos + (*nSpT) * blockIdx.x] = threadIdx.x + offset;
0222       }
0223     }
0224 
0225     offset += blockDim.x;
0226   }
0227 
0228   __syncthreads();
0229 
0230   if (threadIdx.x == 0) {
0231     if (nSpBcompPerSpM[blockIdx.x] > 0 && nSpTcompPerSpM[blockIdx.x] > 0) {
0232       *mPos = atomicAdd(nSpMcomp, 1);
0233       *isMcompat = true;
0234       McompIndex[*mPos] = blockIdx.x;
0235 
0236       int bMax = atomicMax(nSpBcompPerSpM_Max, nSpBcompPerSpM[blockIdx.x]);
0237       int tMax = atomicMax(nSpTcompPerSpM_Max, nSpTcompPerSpM[blockIdx.x]);
0238     }
0239   }
0240 
0241   __syncthreads();
0242 
0243   if (*isMcompat == true) {
0244     offset = 0;
0245     while (offset <
0246            max(nSpBcompPerSpM[blockIdx.x], nSpTcompPerSpM[blockIdx.x])) {
0247       if (threadIdx.x + offset < nSpBcompPerSpM[blockIdx.x]) {
0248         BcompIndex[threadIdx.x + offset + (*nSpB) * (*mPos)] =
0249             tmpBcompIndex[threadIdx.x + offset + (*nSpB) * blockIdx.x];
0250       }
0251 
0252       if (threadIdx.x + offset < nSpTcompPerSpM[blockIdx.x]) {
0253         TcompIndex[threadIdx.x + offset + (*nSpT) * (*mPos)] =
0254             tmpTcompIndex[threadIdx.x + offset + (*nSpT) * blockIdx.x];
0255       }
0256       offset += blockDim.x;
0257     }
0258   }
0259 }
0260 
0261 __global__ void cuTransformCoordinate(
0262     const int* nSpM, const float* spMmat, const int* McompIndex,
0263     const int* nSpB, const float* spBmat, const int* nSpBcompPerSpM_Max,
0264     const int* BcompIndex, const int* nSpT, const float* spTmat,
0265     const int* nSpTcompPerSpM_Max, const int* TcompIndex, float* spMcompMat,
0266     float* spBcompMatPerSpM, float* circBcompMatPerSpM, float* spTcompMatPerSpM,
0267     float* circTcompMatPerSpM) {
0268   extern __shared__ float spM[];
0269 
0270   if (threadIdx.x == 0) {
0271     int mIndex = McompIndex[blockIdx.x];
0272     for (int i = 0; i < 6; i++) {
0273       spM[i] = spMcompMat[blockIdx.x + gridDim.x * i] =
0274           spMmat[mIndex + (*nSpM) * i];
0275     }
0276   }
0277 
0278   __syncthreads();
0279 
0280   int offset(0);
0281   while (offset < max(*nSpBcompPerSpM_Max, *nSpTcompPerSpM_Max)) {
0282     if (threadIdx.x + offset < *nSpBcompPerSpM_Max) {
0283       float spB[6];
0284       float circB[6];
0285       int bIndex = BcompIndex[threadIdx.x + offset + (*nSpB) * blockIdx.x];
0286 
0287       // matrix reduction
0288       for (int i = 0; i < 6; i++) {
0289         spB[i] =
0290             spBcompMatPerSpM[threadIdx.x + offset +
0291                              (*nSpBcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0292                 spBmat[bIndex + (*nSpB) * i];
0293       }
0294 
0295       // do transform coordinate (xy->uv)
0296       sp2circle(true, spM, spB, circB);
0297       for (int i = 0; i < 6; i++) {
0298         circBcompMatPerSpM[threadIdx.x + offset +
0299                            (*nSpBcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0300             circB[i];
0301       }
0302     }
0303 
0304     if (threadIdx.x + offset < *nSpTcompPerSpM_Max) {
0305       float spT[6];
0306       float circT[6];
0307       int tIndex = TcompIndex[threadIdx.x + offset + (*nSpT) * blockIdx.x];
0308 
0309       // matrix reduction
0310       for (int i = 0; i < 6; i++) {
0311         spT[i] =
0312             spTcompMatPerSpM[threadIdx.x + offset +
0313                              (*nSpTcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0314                 spTmat[tIndex + (*nSpT) * i];
0315       }
0316 
0317       // do transform coordinate (xy->uv)
0318       sp2circle(false, spM, spT, circT);
0319       for (int i = 0; i < 6; i++) {
0320         circTcompMatPerSpM[threadIdx.x + offset +
0321                            (*nSpTcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0322             circT[i];
0323       }
0324     }
0325 
0326     offset += blockDim.x;
0327   }
0328 }
0329 
0330 __device__ void sp2circle(bool isBottom, const float* spM, const float* spB,
0331                           float* circB) {
0332   float xM = spM[0];
0333   float yM = spM[1];
0334   float zM = spM[2];
0335   float rM = spM[3];
0336   float varianceRM = spM[4];
0337   float varianceZM = spM[5];
0338 
0339   float cosPhiM = xM / rM;
0340   float sinPhiM = yM / rM;
0341 
0342   float xB = spB[0];
0343   float yB = spB[1];
0344   float zB = spB[2];
0345   // float rB = spB[3];
0346   float varianceRB = spB[4];
0347   float varianceZB = spB[5];
0348 
0349   float deltaX = xB - xM;
0350   float deltaY = yB - yM;
0351   float deltaZ = zB - zM;
0352 
0353   // calculate projection fraction of spM->sp vector pointing in same
0354   // direction as
0355   // vector origin->spM (x) and projection fraction of spM->sp vector pointing
0356   // orthogonal to origin->spM (y)
0357   float x = deltaX * cosPhiM + deltaY * sinPhiM;
0358   float y = deltaY * cosPhiM - deltaX * sinPhiM;
0359   // 1/(length of M -> SP)
0360   float iDeltaR2 = 1. / (deltaX * deltaX + deltaY * deltaY);
0361   float iDeltaR = sqrtf(iDeltaR2);
0362 
0363   int bottomFactor = 1 * (int(!(isBottom))) - 1 * (int(isBottom));
0364 
0365   // cot_theta = (deltaZ/deltaR)
0366   float cot_theta = deltaZ * iDeltaR * bottomFactor;
0367   // VERY frequent (SP^3) access
0368 
0369   // location on z-axis of this SP-duplet
0370   float Zo = zM - rM * cot_theta;
0371 
0372   // transformation of circle equation (x,y) into linear equation (u,v)
0373   // x^2 + y^2 - 2x_0*x - 2y_0*y = 0
0374   // is transformed into
0375   // 1 - 2x_0*u - 2y_0*v = 0
0376   // using the following m_U and m_V
0377   // (u = A + B*v); A and B are created later on
0378   float U = x * iDeltaR2;
0379   float V = y * iDeltaR2;
0380   // error term for sp-pair without correlation of middle space point
0381   float Er = ((varianceZM + varianceZB) +
0382               (cot_theta * cot_theta) * (varianceRM + varianceRB)) *
0383              iDeltaR2;
0384 
0385   circB[0] = Zo;
0386   circB[1] = cot_theta;
0387   circB[2] = iDeltaR;
0388   circB[3] = Er;
0389   circB[4] = U;
0390   circB[5] = V;
0391 }
0392 
0393 __global__ void cuSearchTriplet(
0394     const int* nSpTcompPerSpM, const int* nSpMcomp, const float* spMcompMat,
0395     const int* nSpBcompPerSpM_Max, const int* BcompIndex,
0396     const float* circBcompMatPerSpM, const int* nSpTcompPerSpM_Max,
0397     const int* TcompIndex, const float* spTcompMatPerSpM,
0398     const float* circTcompMatPerSpM, const float* maxScatteringAngle2,
0399     const float* sigmaScattering, const float* minHelixDiameter2,
0400     const float* pT2perRadius, const float* impactMax,
0401     const int* nTrplPerSpMLimit, const int* nTrplPerSpBLimit,
0402     const float* deltaInvHelixDiameter, const float* impactWeightFactor,
0403     const float* deltaRMin, const float* compatSeedWeight,
0404     const std::size_t* compatSeedLimit, int* nTrplPerSpM,
0405     Triplet* TripletsPerSpM) {
0406   extern __shared__ Triplet sh[];
0407   Triplet* triplets = (Triplet*)sh;
0408   float* compatibleSeedR = (float*)&triplets[*nTrplPerSpBLimit];
0409   int* nTrplPerSpB = (int*)&compatibleSeedR[*compatSeedLimit];
0410 
0411   if (threadIdx.x == 0) {
0412     *nTrplPerSpB = 0;
0413   }
0414 
0415   float rM = spMcompMat[(*nSpMcomp) * 3];
0416   float varianceRM = spMcompMat[(*nSpMcomp) * 4];
0417   float varianceZM = spMcompMat[(*nSpMcomp) * 5];
0418   // Zob values from CPU and CUDA are slightly different
0419   // float Zob        = circBcompMatPerSpM[blockId+(*nSpBcompPerSpM_Max)*0];
0420   float cotThetaB = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 1];
0421   float iDeltaRB = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 2];
0422   float ErB = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 3];
0423   float Ub = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 4];
0424   float Vb = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 5];
0425   float iSinTheta2 = (1. + cotThetaB * cotThetaB);
0426   float scatteringInRegion2 = (*maxScatteringAngle2) * iSinTheta2;
0427   scatteringInRegion2 *= (*sigmaScattering) * (*sigmaScattering);
0428 
0429   int offset(0);
0430 
0431   while (offset < *nSpTcompPerSpM) {
0432     bool isPassed(1);
0433     float impact;
0434     float invHelix;
0435     if (threadIdx.x + offset < *nSpTcompPerSpM) {
0436       // float Zot        = circTcompMatPerSpM[threadId+(*nSpTcompPerSpM)*0];
0437       float cotThetaT =
0438           circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 1];
0439       float iDeltaRT =
0440           circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 2];
0441       float ErT =
0442           circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 3];
0443       float Ut =
0444           circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 4];
0445       float Vt =
0446           circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 5];
0447 
0448       // add errors of spB-spM and spM-spT pairs and add the correlation term
0449       // for errors on spM
0450       float error2 = ErT + ErB +
0451                      2 * (cotThetaB * cotThetaT * varianceRM + varianceZM) *
0452                          iDeltaRB * iDeltaRT;
0453 
0454       float deltaCotTheta = cotThetaB - cotThetaT;
0455       float deltaCotTheta2 = deltaCotTheta * deltaCotTheta;
0456       float error;
0457       float dCotThetaMinusError2;
0458 
0459       // if the error is larger than the difference in theta, no need to
0460       // compare with scattering
0461       if (deltaCotTheta2 - error2 > 0) {
0462         deltaCotTheta = fabsf(deltaCotTheta);
0463         // if deltaTheta larger than the scattering for the lower pT cut, skip
0464         error = sqrtf(error2);
0465         dCotThetaMinusError2 =
0466             deltaCotTheta2 + error2 - 2 * deltaCotTheta * error;
0467         // avoid taking root of scatteringInRegion
0468         // if left side of ">" is positive, both sides of inequality can be
0469         // squared
0470         // (scattering is always positive)
0471 
0472         if (dCotThetaMinusError2 > scatteringInRegion2) {
0473           isPassed = 0;
0474         }
0475       }
0476 
0477       // protects against division by 0
0478       float dU = Ut - Ub;
0479       if (dU == 0.) {
0480         isPassed = 0;
0481       }
0482 
0483       // A and B are evaluated as a function of the circumference parameters
0484       // x_0 and y_0
0485       float A = (Vt - Vb) / dU;
0486       float S2 = 1. + A * A;
0487       float B = Vb - A * Ub;
0488       float B2 = B * B;
0489       // sqrtf(S2)/B = 2 * helixradius
0490       // calculated radius must not be smaller than minimum radius
0491       if (S2 < B2 * (*minHelixDiameter2)) {
0492         isPassed = 0;
0493       }
0494 
0495       // 1/helixradius: (B/sqrtf(S2))/2 (we leave everything squared)
0496       float iHelixDiameter2 = B2 / S2;
0497       // calculate scattering for p(T) calculated from seed curvature
0498       float pT2scatter = 4 * iHelixDiameter2 * (*pT2perRadius);
0499       // TODO: include upper pT limit for scatter calc
0500       // convert p(T) to p scaling by sin^2(theta) AND scale by 1/sin^4(theta)
0501       // from rad to deltaCotTheta
0502       float p2scatter = pT2scatter * iSinTheta2;
0503       // if deltaTheta larger than allowed scattering for calculated pT, skip
0504       if ((deltaCotTheta2 - error2 > 0) &&
0505           (dCotThetaMinusError2 >
0506            p2scatter * (*sigmaScattering) * (*sigmaScattering))) {
0507         isPassed = 0;
0508       }
0509       // A and B allow calculation of impact params in U/V plane with linear
0510       // function
0511       // (in contrast to having to solve a quadratic function in x/y plane)
0512       impact = fabsf((A - B * rM) * rM);
0513       invHelix = B / sqrtf(S2);
0514       if (impact > (*impactMax)) {
0515         isPassed = 0;
0516       }
0517     }
0518 
0519     __syncthreads();
0520 
0521     if (threadIdx.x + offset < *nSpTcompPerSpM) {
0522       // The index will be different (and not deterministic) because of atomic
0523       // operation It will be resorted after kernel call
0524       if (isPassed == 1) {
0525         int tPos = atomicAdd(nTrplPerSpB, 1);
0526 
0527         if (tPos < *nTrplPerSpBLimit) {
0528           triplets[tPos].weight = 0;
0529           triplets[tPos].bIndex = BcompIndex[blockIdx.x];
0530           triplets[tPos].tIndex = TcompIndex[threadIdx.x + offset];
0531           triplets[tPos].topRadius =
0532               spTcompMatPerSpM[threadIdx.x + offset +
0533                                (*nSpTcompPerSpM_Max) * 3];
0534           triplets[tPos].impactParameter = impact;
0535           triplets[tPos].invHelixDiameter = invHelix;
0536         }
0537       }
0538     }
0539     offset += blockDim.x;
0540   }
0541   __syncthreads();
0542 
0543   if (threadIdx.x == 0 && *nTrplPerSpB > *nTrplPerSpBLimit) {
0544     *nTrplPerSpB = *nTrplPerSpBLimit;
0545   }
0546 
0547   __syncthreads();
0548   int jj = threadIdx.x;
0549 
0550   // bubble sort tIndex
0551   for (int i = 0; i < *nTrplPerSpB / 2 + 1; i++) {
0552     if (threadIdx.x < *nTrplPerSpB) {
0553       if (jj % 2 == 0 && jj < *nTrplPerSpB - 1) {
0554         if (triplets[jj + 1].tIndex < triplets[jj].tIndex) {
0555           Triplet tempVal = triplets[jj];
0556           triplets[jj] = triplets[jj + 1];
0557           triplets[jj + 1] = tempVal;
0558         }
0559       }
0560     }
0561     __syncthreads();
0562     if (threadIdx.x < *nTrplPerSpB) {
0563       if (jj % 2 == 1 && jj < *nTrplPerSpB - 1) {
0564         if (triplets[jj + 1].tIndex < triplets[jj].tIndex) {
0565           Triplet tempVal = triplets[jj];
0566           triplets[jj] = triplets[jj + 1];
0567           triplets[jj + 1] = tempVal;
0568         }
0569       }
0570     }
0571     __syncthreads();
0572   }
0573   __syncthreads();
0574 
0575   // serial algorithm for seed filtering
0576   // Need to optimize later
0577   if (threadIdx.x == 0) {
0578     int nCompatibleSeedR;
0579     float lowerLimitCurv;
0580     float upperLimitCurv;
0581     float deltaR;
0582     bool newCompSeed;
0583 
0584     for (int i = 0; i < *nTrplPerSpB; i++) {
0585       nCompatibleSeedR = 0;
0586       lowerLimitCurv = triplets[i].invHelixDiameter - *deltaInvHelixDiameter;
0587       upperLimitCurv = triplets[i].invHelixDiameter + *deltaInvHelixDiameter;
0588       float& currentTop_r = triplets[i].topRadius;
0589       float& weight = triplets[i].weight;
0590       weight = -(triplets[i].impactParameter * (*impactWeightFactor));
0591 
0592       for (int j = 0; j < *nTrplPerSpB; j++) {
0593         if (i == j) {
0594           continue;
0595         }
0596         float& otherTop_r = triplets[j].topRadius;
0597         deltaR = currentTop_r - otherTop_r;
0598 
0599         if (fabsf(deltaR) < *deltaRMin) {
0600           continue;
0601         }
0602         // curvature difference within limits?
0603         // TODO: how much slower than sorting all vectors by curvature
0604         // and breaking out of loop? i.e. is vector size large (e.g. in jets?)
0605         if (triplets[j].invHelixDiameter < lowerLimitCurv) {
0606           continue;
0607         }
0608         if (triplets[j].invHelixDiameter > upperLimitCurv) {
0609           continue;
0610         }
0611         newCompSeed = true;
0612 
0613         for (int k = 0; k < nCompatibleSeedR; k++) {
0614           // original ATLAS code uses higher min distance for 2nd found
0615           // compatible seed (20mm instead of 5mm) add new compatible seed only
0616           // if distance larger than rmin to all other compatible seeds
0617           float& previousDiameter = compatibleSeedR[k];
0618           if (fabsf(previousDiameter - otherTop_r) < *deltaRMin) {
0619             newCompSeed = false;
0620             break;
0621           }
0622         }
0623 
0624         if (newCompSeed) {
0625           compatibleSeedR[nCompatibleSeedR] = otherTop_r;
0626           nCompatibleSeedR++;
0627           weight += *compatSeedWeight;
0628         }
0629         if (nCompatibleSeedR >= *compatSeedLimit) {
0630           break;
0631         }
0632       }
0633 
0634       int pos = atomicAdd(nTrplPerSpM, 1);
0635 
0636       if (pos < *nTrplPerSpMLimit) {
0637         TripletsPerSpM[pos] = triplets[i];
0638       }
0639     }
0640   }
0641 
0642   if (threadIdx.x == 0 && *nTrplPerSpM > *nTrplPerSpMLimit) {
0643     *nTrplPerSpM = *nTrplPerSpMLimit;
0644   }
0645 }