File indexing completed on 2025-01-18 09:12:16
0001
0002
0003
0004
0005
0006
0007
0008
0009 #include "Acts/Plugins/Cuda/Cuda.hpp"
0010 #include "Acts/Plugins/Cuda/Seeding/Kernels.cuh"
0011
0012 #include <iostream>
0013
0014 #include <cuda.h>
0015 #include <cuda_runtime.h>
0016
0017 __global__ void cuSearchDoublet(
0018 const int* nSpM, const float* spMmat, const int* nSpB, const float* spBmat,
0019 const int* nSpT, const float* spTmat, const float* deltaRMin,
0020 const float* deltaRMax, const float* cotThetaMax,
0021 const float* collisionRegionMin, const float* collisionRegionMax,
0022 int* nSpMcomp, int* nSpBcompPerSpM_Max, int* nSpTcompPerSpM_Max,
0023 int* nSpBcompPerSpM, int* nSpTcompPerSpM, int* McompIndex, int* BcompIndex,
0024 int* tmpBcompIndex, int* TcompIndex, int* tmpTcompIndex);
0025
0026 __global__ void cuTransformCoordinate(
0027 const int* nSpM, const float* spMmat, const int* McompIndex,
0028 const int* nSpB, const float* spBmat, const int* nSpBcompPerSpM_Max,
0029 const int* BcompIndex, const int* nSpT, const float* spTmat,
0030 const int* nSpTcompPerSpM_Max, const int* TcompIndex, float* spMcompMat,
0031 float* spBcompMatPerSpM, float* circBcompMatPerSpM, float* spTcompMatPerSpM,
0032 float* circTcompMatPerSpM);
0033
0034 __device__ void sp2circle(bool isBottom, const float* spM, const float* spB,
0035 float* circB);
0036
0037 __global__ void cuSearchTriplet(
0038 const int* nSpTcompPerSpM, const int* nSpMcomp, const float* spMcompMat,
0039 const int* nSpBcompPerSpM_Max, const int* BcompIndex,
0040 const float* circBcompMatPerSpM, const int* nSpTcompPerSpM_Max,
0041 const int* TcompIndex, const float* spTcompMatPerSpM,
0042 const float* circTcompMatPerSpM, const float* maxScatteringAngle2,
0043 const float* sigmaScattering, const float* minHelixDiameter2,
0044 const float* pT2perRadius, const float* impactMax,
0045 const int* nTrplPerSpMLimit, const int* nTrplPerSpBLimit,
0046 const float* deltaInvHelixDiameter, const float* impactWeightFactor,
0047 const float* deltaRMin, const float* compatSeedWeight,
0048 const std::size_t* compatSeedLimit, int* nTrplPerSpM,
0049 Triplet* TripletsPerSpM);
0050
0051 namespace Acts {
0052
0053 void searchDoublet(const dim3 grid, const dim3 block, const int* nSpM,
0054 const float* spMmat, const int* nSpB, const float* spBmat,
0055 const int* nSpT, const float* spTmat, const float* deltaRMin,
0056 const float* deltaRMax, const float* cotThetaMax,
0057 const float* collisionRegionMin,
0058 const float* collisionRegionMax, int* nSpMcomp,
0059 int* nSpBcompPerSpM_Max, int* nSpTcompPerSpM_Max,
0060 int* nSpBcompPerSpM, int* nSpTcompPerSpM, int* McompIndex,
0061 int* BcompIndex, int* tmpBcompIndex, int* TcompIndex,
0062 int* tmpTcompIndex) {
0063 if (grid.x == 0) {
0064 return;
0065 }
0066
0067 int sharedMemSize = 2 * sizeof(int);
0068
0069 cuSearchDoublet<<<grid, block, sharedMemSize>>>(
0070 nSpM, spMmat, nSpB, spBmat, nSpT, spTmat, deltaRMin, deltaRMax,
0071 cotThetaMax, collisionRegionMin, collisionRegionMax, nSpMcomp,
0072 nSpBcompPerSpM_Max, nSpTcompPerSpM_Max, nSpBcompPerSpM, nSpTcompPerSpM,
0073 McompIndex, BcompIndex, tmpBcompIndex, TcompIndex, tmpTcompIndex);
0074 ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0075 }
0076
0077 void transformCoordinate(const dim3 grid, const dim3 block, const int* nSpM,
0078 const float* spMmat, const int* McompIndex,
0079 const int* nSpB, const float* spBmat,
0080 const int* nSpBcompPerSpM_Max, const int* BcompIndex,
0081 const int* nSpT, const float* spTmat,
0082 const int* nSpTcompPerSpM_Max, const int* TcompIndex,
0083 float* spMcompMat, float* spBcompMatPerSpM,
0084 float* circBcompMatPerSpM, float* spTcompMatPerSpM,
0085 float* circTcompMatPerSpM) {
0086 if (grid.x == 0) {
0087 return;
0088 }
0089 int sharedMemSize = sizeof(float) * 6;
0090 cuTransformCoordinate<<<grid, block, sharedMemSize>>>(
0091 nSpM, spMmat, McompIndex, nSpB, spBmat, nSpBcompPerSpM_Max, BcompIndex,
0092 nSpT, spTmat, nSpTcompPerSpM_Max, TcompIndex, spMcompMat,
0093 spBcompMatPerSpM, circBcompMatPerSpM, spTcompMatPerSpM,
0094 circTcompMatPerSpM);
0095 ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0096 }
0097
0098 void searchTriplet(
0099 const dim3 grid, const dim3 block, const int* nSpTcompPerSpM_cpu,
0100 const int* nSpTcompPerSpM_cuda, const int* nSpMcomp,
0101 const float* spMcompMat, const int* nSpBcompPerSpM_Max,
0102 const int* BcompIndex, const float* circBcompMatPerSpM,
0103 const int* nSpTcompPerSpM_Max, const int* TcompIndex,
0104 const float* spTcompMatPerSpM, const float* circTcompMatPerSpM,
0105 const float* maxScatteringAngle2, const float* sigmaScattering,
0106 const float* minHelixDiameter2, const float* pT2perRadius,
0107 const float* impactMax, const int* nTrplPerSpMLimit,
0108 const int* nTrplPerSpBLimit_cpu, const int* nTrplPerSpBLimit_cuda,
0109 const float* deltaInvHelixDiameter, const float* impactWeightFactor,
0110 const float* deltaRMin, const float* compatSeedWeight,
0111 const std::size_t* compatSeedLimit_cpu,
0112 const std::size_t* compatSeedLimit_cuda, int* nTrplPerSpM,
0113 Triplet* TripletsPerSpM, cudaStream_t* stream) {
0114 if (grid.x == 0) {
0115 return;
0116 }
0117 int sharedMemSize = sizeof(Triplet) * (*nTrplPerSpBLimit_cpu);
0118 sharedMemSize += sizeof(float) * (*compatSeedLimit_cpu);
0119 sharedMemSize += sizeof(int);
0120
0121 cuSearchTriplet<<<grid, block, sharedMemSize, *stream>>>(
0122 nSpTcompPerSpM_cuda, nSpMcomp, spMcompMat, nSpBcompPerSpM_Max, BcompIndex,
0123 circBcompMatPerSpM, nSpTcompPerSpM_Max, TcompIndex, spTcompMatPerSpM,
0124 circTcompMatPerSpM, maxScatteringAngle2, sigmaScattering,
0125 minHelixDiameter2, pT2perRadius, impactMax, nTrplPerSpMLimit,
0126 nTrplPerSpBLimit_cuda, deltaInvHelixDiameter, impactWeightFactor,
0127 deltaRMin, compatSeedWeight, compatSeedLimit_cuda, nTrplPerSpM,
0128 TripletsPerSpM);
0129 ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0130 }
0131
0132 }
0133
0134 __global__ void cuSearchDoublet(
0135 const int* nSpM, const float* spMmat, const int* nSpB, const float* spBmat,
0136 const int* nSpT, const float* spTmat, const float* deltaRMin,
0137 const float* deltaRMax, const float* cotThetaMax,
0138 const float* collisionRegionMin, const float* collisionRegionMax,
0139 int* nSpMcomp, int* nSpBcompPerSpM_Max, int* nSpTcompPerSpM_Max,
0140 int* nSpBcompPerSpM, int* nSpTcompPerSpM, int* McompIndex, int* BcompIndex,
0141 int* tmpBcompIndex, int* TcompIndex, int* tmpTcompIndex) {
0142 extern __shared__ float sharedMem[];
0143 int* mPos = (int*)sharedMem;
0144 int* isMcompat = (int*)&mPos[1];
0145
0146 if (threadIdx.x == 0) {
0147 *isMcompat = false;
0148 }
0149 __syncthreads();
0150
0151 float rM = spMmat[blockIdx.x + (*nSpM) * 3];
0152 float zM = spMmat[blockIdx.x + (*nSpM) * 2];
0153
0154 bool isBcompat(true);
0155 bool isTcompat(true);
0156
0157 int offset(0);
0158
0159 while (offset < max(*nSpB, *nSpT)) {
0160 isBcompat = true;
0161
0162
0163 if (threadIdx.x + offset < *nSpB) {
0164 float rB = spBmat[threadIdx.x + offset + (*nSpB) * 3];
0165 float zB = spBmat[threadIdx.x + offset + (*nSpB) * 2];
0166
0167 float deltaR = rM - rB;
0168 if (deltaR > *deltaRMax) {
0169 isBcompat = false;
0170 }
0171
0172 if (deltaR < *deltaRMin) {
0173 isBcompat = false;
0174 }
0175
0176 float cotTheta = (zM - zB) / deltaR;
0177 if (fabsf(cotTheta) > *cotThetaMax) {
0178 isBcompat = false;
0179 }
0180
0181 float zOrigin = zM - rM * cotTheta;
0182 if (zOrigin < *collisionRegionMin || zOrigin > *collisionRegionMax) {
0183 isBcompat = false;
0184 }
0185
0186 if (isBcompat == true) {
0187 int bPos = atomicAdd(&nSpBcompPerSpM[blockIdx.x], 1);
0188 tmpBcompIndex[bPos + (*nSpB) * blockIdx.x] = threadIdx.x + offset;
0189 }
0190 }
0191
0192 isTcompat = true;
0193
0194
0195 if (threadIdx.x + offset < *nSpT) {
0196 float rT = spTmat[threadIdx.x + offset + (*nSpT) * 3];
0197 float zT = spTmat[threadIdx.x + offset + (*nSpT) * 2];
0198 float deltaR = rT - rM;
0199 if (deltaR < *deltaRMin) {
0200 isTcompat = false;
0201 }
0202
0203 if (deltaR > *deltaRMax) {
0204 isTcompat = false;
0205 }
0206
0207 if (isTcompat == true) {
0208 float cotTheta = (zT - zM) / deltaR;
0209 if (fabsf(cotTheta) > *cotThetaMax) {
0210 isTcompat = false;
0211 }
0212
0213 float zOrigin = zM - rM * cotTheta;
0214 if (zOrigin < *collisionRegionMin || zOrigin > *collisionRegionMax) {
0215 isTcompat = false;
0216 }
0217 }
0218
0219 if (isTcompat == true) {
0220 int tPos = atomicAdd(&nSpTcompPerSpM[blockIdx.x], 1);
0221 tmpTcompIndex[tPos + (*nSpT) * blockIdx.x] = threadIdx.x + offset;
0222 }
0223 }
0224
0225 offset += blockDim.x;
0226 }
0227
0228 __syncthreads();
0229
0230 if (threadIdx.x == 0) {
0231 if (nSpBcompPerSpM[blockIdx.x] > 0 && nSpTcompPerSpM[blockIdx.x] > 0) {
0232 *mPos = atomicAdd(nSpMcomp, 1);
0233 *isMcompat = true;
0234 McompIndex[*mPos] = blockIdx.x;
0235
0236 int bMax = atomicMax(nSpBcompPerSpM_Max, nSpBcompPerSpM[blockIdx.x]);
0237 int tMax = atomicMax(nSpTcompPerSpM_Max, nSpTcompPerSpM[blockIdx.x]);
0238 }
0239 }
0240
0241 __syncthreads();
0242
0243 if (*isMcompat == true) {
0244 offset = 0;
0245 while (offset <
0246 max(nSpBcompPerSpM[blockIdx.x], nSpTcompPerSpM[blockIdx.x])) {
0247 if (threadIdx.x + offset < nSpBcompPerSpM[blockIdx.x]) {
0248 BcompIndex[threadIdx.x + offset + (*nSpB) * (*mPos)] =
0249 tmpBcompIndex[threadIdx.x + offset + (*nSpB) * blockIdx.x];
0250 }
0251
0252 if (threadIdx.x + offset < nSpTcompPerSpM[blockIdx.x]) {
0253 TcompIndex[threadIdx.x + offset + (*nSpT) * (*mPos)] =
0254 tmpTcompIndex[threadIdx.x + offset + (*nSpT) * blockIdx.x];
0255 }
0256 offset += blockDim.x;
0257 }
0258 }
0259 }
0260
0261 __global__ void cuTransformCoordinate(
0262 const int* nSpM, const float* spMmat, const int* McompIndex,
0263 const int* nSpB, const float* spBmat, const int* nSpBcompPerSpM_Max,
0264 const int* BcompIndex, const int* nSpT, const float* spTmat,
0265 const int* nSpTcompPerSpM_Max, const int* TcompIndex, float* spMcompMat,
0266 float* spBcompMatPerSpM, float* circBcompMatPerSpM, float* spTcompMatPerSpM,
0267 float* circTcompMatPerSpM) {
0268 extern __shared__ float spM[];
0269
0270 if (threadIdx.x == 0) {
0271 int mIndex = McompIndex[blockIdx.x];
0272 for (int i = 0; i < 6; i++) {
0273 spM[i] = spMcompMat[blockIdx.x + gridDim.x * i] =
0274 spMmat[mIndex + (*nSpM) * i];
0275 }
0276 }
0277
0278 __syncthreads();
0279
0280 int offset(0);
0281 while (offset < max(*nSpBcompPerSpM_Max, *nSpTcompPerSpM_Max)) {
0282 if (threadIdx.x + offset < *nSpBcompPerSpM_Max) {
0283 float spB[6];
0284 float circB[6];
0285 int bIndex = BcompIndex[threadIdx.x + offset + (*nSpB) * blockIdx.x];
0286
0287
0288 for (int i = 0; i < 6; i++) {
0289 spB[i] =
0290 spBcompMatPerSpM[threadIdx.x + offset +
0291 (*nSpBcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0292 spBmat[bIndex + (*nSpB) * i];
0293 }
0294
0295
0296 sp2circle(true, spM, spB, circB);
0297 for (int i = 0; i < 6; i++) {
0298 circBcompMatPerSpM[threadIdx.x + offset +
0299 (*nSpBcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0300 circB[i];
0301 }
0302 }
0303
0304 if (threadIdx.x + offset < *nSpTcompPerSpM_Max) {
0305 float spT[6];
0306 float circT[6];
0307 int tIndex = TcompIndex[threadIdx.x + offset + (*nSpT) * blockIdx.x];
0308
0309
0310 for (int i = 0; i < 6; i++) {
0311 spT[i] =
0312 spTcompMatPerSpM[threadIdx.x + offset +
0313 (*nSpTcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0314 spTmat[tIndex + (*nSpT) * i];
0315 }
0316
0317
0318 sp2circle(false, spM, spT, circT);
0319 for (int i = 0; i < 6; i++) {
0320 circTcompMatPerSpM[threadIdx.x + offset +
0321 (*nSpTcompPerSpM_Max) * (6 * blockIdx.x + i)] =
0322 circT[i];
0323 }
0324 }
0325
0326 offset += blockDim.x;
0327 }
0328 }
0329
0330 __device__ void sp2circle(bool isBottom, const float* spM, const float* spB,
0331 float* circB) {
0332 float xM = spM[0];
0333 float yM = spM[1];
0334 float zM = spM[2];
0335 float rM = spM[3];
0336 float varianceRM = spM[4];
0337 float varianceZM = spM[5];
0338
0339 float cosPhiM = xM / rM;
0340 float sinPhiM = yM / rM;
0341
0342 float xB = spB[0];
0343 float yB = spB[1];
0344 float zB = spB[2];
0345
0346 float varianceRB = spB[4];
0347 float varianceZB = spB[5];
0348
0349 float deltaX = xB - xM;
0350 float deltaY = yB - yM;
0351 float deltaZ = zB - zM;
0352
0353
0354
0355
0356
0357 float x = deltaX * cosPhiM + deltaY * sinPhiM;
0358 float y = deltaY * cosPhiM - deltaX * sinPhiM;
0359
0360 float iDeltaR2 = 1. / (deltaX * deltaX + deltaY * deltaY);
0361 float iDeltaR = sqrtf(iDeltaR2);
0362
0363 int bottomFactor = 1 * (int(!(isBottom))) - 1 * (int(isBottom));
0364
0365
0366 float cot_theta = deltaZ * iDeltaR * bottomFactor;
0367
0368
0369
0370 float Zo = zM - rM * cot_theta;
0371
0372
0373
0374
0375
0376
0377
0378 float U = x * iDeltaR2;
0379 float V = y * iDeltaR2;
0380
0381 float Er = ((varianceZM + varianceZB) +
0382 (cot_theta * cot_theta) * (varianceRM + varianceRB)) *
0383 iDeltaR2;
0384
0385 circB[0] = Zo;
0386 circB[1] = cot_theta;
0387 circB[2] = iDeltaR;
0388 circB[3] = Er;
0389 circB[4] = U;
0390 circB[5] = V;
0391 }
0392
0393 __global__ void cuSearchTriplet(
0394 const int* nSpTcompPerSpM, const int* nSpMcomp, const float* spMcompMat,
0395 const int* nSpBcompPerSpM_Max, const int* BcompIndex,
0396 const float* circBcompMatPerSpM, const int* nSpTcompPerSpM_Max,
0397 const int* TcompIndex, const float* spTcompMatPerSpM,
0398 const float* circTcompMatPerSpM, const float* maxScatteringAngle2,
0399 const float* sigmaScattering, const float* minHelixDiameter2,
0400 const float* pT2perRadius, const float* impactMax,
0401 const int* nTrplPerSpMLimit, const int* nTrplPerSpBLimit,
0402 const float* deltaInvHelixDiameter, const float* impactWeightFactor,
0403 const float* deltaRMin, const float* compatSeedWeight,
0404 const std::size_t* compatSeedLimit, int* nTrplPerSpM,
0405 Triplet* TripletsPerSpM) {
0406 extern __shared__ Triplet sh[];
0407 Triplet* triplets = (Triplet*)sh;
0408 float* compatibleSeedR = (float*)&triplets[*nTrplPerSpBLimit];
0409 int* nTrplPerSpB = (int*)&compatibleSeedR[*compatSeedLimit];
0410
0411 if (threadIdx.x == 0) {
0412 *nTrplPerSpB = 0;
0413 }
0414
0415 float rM = spMcompMat[(*nSpMcomp) * 3];
0416 float varianceRM = spMcompMat[(*nSpMcomp) * 4];
0417 float varianceZM = spMcompMat[(*nSpMcomp) * 5];
0418
0419
0420 float cotThetaB = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 1];
0421 float iDeltaRB = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 2];
0422 float ErB = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 3];
0423 float Ub = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 4];
0424 float Vb = circBcompMatPerSpM[blockIdx.x + (*nSpBcompPerSpM_Max) * 5];
0425 float iSinTheta2 = (1. + cotThetaB * cotThetaB);
0426 float scatteringInRegion2 = (*maxScatteringAngle2) * iSinTheta2;
0427 scatteringInRegion2 *= (*sigmaScattering) * (*sigmaScattering);
0428
0429 int offset(0);
0430
0431 while (offset < *nSpTcompPerSpM) {
0432 bool isPassed(1);
0433 float impact;
0434 float invHelix;
0435 if (threadIdx.x + offset < *nSpTcompPerSpM) {
0436
0437 float cotThetaT =
0438 circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 1];
0439 float iDeltaRT =
0440 circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 2];
0441 float ErT =
0442 circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 3];
0443 float Ut =
0444 circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 4];
0445 float Vt =
0446 circTcompMatPerSpM[threadIdx.x + offset + (*nSpTcompPerSpM_Max) * 5];
0447
0448
0449
0450 float error2 = ErT + ErB +
0451 2 * (cotThetaB * cotThetaT * varianceRM + varianceZM) *
0452 iDeltaRB * iDeltaRT;
0453
0454 float deltaCotTheta = cotThetaB - cotThetaT;
0455 float deltaCotTheta2 = deltaCotTheta * deltaCotTheta;
0456 float error;
0457 float dCotThetaMinusError2;
0458
0459
0460
0461 if (deltaCotTheta2 - error2 > 0) {
0462 deltaCotTheta = fabsf(deltaCotTheta);
0463
0464 error = sqrtf(error2);
0465 dCotThetaMinusError2 =
0466 deltaCotTheta2 + error2 - 2 * deltaCotTheta * error;
0467
0468
0469
0470
0471
0472 if (dCotThetaMinusError2 > scatteringInRegion2) {
0473 isPassed = 0;
0474 }
0475 }
0476
0477
0478 float dU = Ut - Ub;
0479 if (dU == 0.) {
0480 isPassed = 0;
0481 }
0482
0483
0484
0485 float A = (Vt - Vb) / dU;
0486 float S2 = 1. + A * A;
0487 float B = Vb - A * Ub;
0488 float B2 = B * B;
0489
0490
0491 if (S2 < B2 * (*minHelixDiameter2)) {
0492 isPassed = 0;
0493 }
0494
0495
0496 float iHelixDiameter2 = B2 / S2;
0497
0498 float pT2scatter = 4 * iHelixDiameter2 * (*pT2perRadius);
0499
0500
0501
0502 float p2scatter = pT2scatter * iSinTheta2;
0503
0504 if ((deltaCotTheta2 - error2 > 0) &&
0505 (dCotThetaMinusError2 >
0506 p2scatter * (*sigmaScattering) * (*sigmaScattering))) {
0507 isPassed = 0;
0508 }
0509
0510
0511
0512 impact = fabsf((A - B * rM) * rM);
0513 invHelix = B / sqrtf(S2);
0514 if (impact > (*impactMax)) {
0515 isPassed = 0;
0516 }
0517 }
0518
0519 __syncthreads();
0520
0521 if (threadIdx.x + offset < *nSpTcompPerSpM) {
0522
0523
0524 if (isPassed == 1) {
0525 int tPos = atomicAdd(nTrplPerSpB, 1);
0526
0527 if (tPos < *nTrplPerSpBLimit) {
0528 triplets[tPos].weight = 0;
0529 triplets[tPos].bIndex = BcompIndex[blockIdx.x];
0530 triplets[tPos].tIndex = TcompIndex[threadIdx.x + offset];
0531 triplets[tPos].topRadius =
0532 spTcompMatPerSpM[threadIdx.x + offset +
0533 (*nSpTcompPerSpM_Max) * 3];
0534 triplets[tPos].impactParameter = impact;
0535 triplets[tPos].invHelixDiameter = invHelix;
0536 }
0537 }
0538 }
0539 offset += blockDim.x;
0540 }
0541 __syncthreads();
0542
0543 if (threadIdx.x == 0 && *nTrplPerSpB > *nTrplPerSpBLimit) {
0544 *nTrplPerSpB = *nTrplPerSpBLimit;
0545 }
0546
0547 __syncthreads();
0548 int jj = threadIdx.x;
0549
0550
0551 for (int i = 0; i < *nTrplPerSpB / 2 + 1; i++) {
0552 if (threadIdx.x < *nTrplPerSpB) {
0553 if (jj % 2 == 0 && jj < *nTrplPerSpB - 1) {
0554 if (triplets[jj + 1].tIndex < triplets[jj].tIndex) {
0555 Triplet tempVal = triplets[jj];
0556 triplets[jj] = triplets[jj + 1];
0557 triplets[jj + 1] = tempVal;
0558 }
0559 }
0560 }
0561 __syncthreads();
0562 if (threadIdx.x < *nTrplPerSpB) {
0563 if (jj % 2 == 1 && jj < *nTrplPerSpB - 1) {
0564 if (triplets[jj + 1].tIndex < triplets[jj].tIndex) {
0565 Triplet tempVal = triplets[jj];
0566 triplets[jj] = triplets[jj + 1];
0567 triplets[jj + 1] = tempVal;
0568 }
0569 }
0570 }
0571 __syncthreads();
0572 }
0573 __syncthreads();
0574
0575
0576
0577 if (threadIdx.x == 0) {
0578 int nCompatibleSeedR;
0579 float lowerLimitCurv;
0580 float upperLimitCurv;
0581 float deltaR;
0582 bool newCompSeed;
0583
0584 for (int i = 0; i < *nTrplPerSpB; i++) {
0585 nCompatibleSeedR = 0;
0586 lowerLimitCurv = triplets[i].invHelixDiameter - *deltaInvHelixDiameter;
0587 upperLimitCurv = triplets[i].invHelixDiameter + *deltaInvHelixDiameter;
0588 float& currentTop_r = triplets[i].topRadius;
0589 float& weight = triplets[i].weight;
0590 weight = -(triplets[i].impactParameter * (*impactWeightFactor));
0591
0592 for (int j = 0; j < *nTrplPerSpB; j++) {
0593 if (i == j) {
0594 continue;
0595 }
0596 float& otherTop_r = triplets[j].topRadius;
0597 deltaR = currentTop_r - otherTop_r;
0598
0599 if (fabsf(deltaR) < *deltaRMin) {
0600 continue;
0601 }
0602
0603
0604
0605 if (triplets[j].invHelixDiameter < lowerLimitCurv) {
0606 continue;
0607 }
0608 if (triplets[j].invHelixDiameter > upperLimitCurv) {
0609 continue;
0610 }
0611 newCompSeed = true;
0612
0613 for (int k = 0; k < nCompatibleSeedR; k++) {
0614
0615
0616
0617 float& previousDiameter = compatibleSeedR[k];
0618 if (fabsf(previousDiameter - otherTop_r) < *deltaRMin) {
0619 newCompSeed = false;
0620 break;
0621 }
0622 }
0623
0624 if (newCompSeed) {
0625 compatibleSeedR[nCompatibleSeedR] = otherTop_r;
0626 nCompatibleSeedR++;
0627 weight += *compatSeedWeight;
0628 }
0629 if (nCompatibleSeedR >= *compatSeedLimit) {
0630 break;
0631 }
0632 }
0633
0634 int pos = atomicAdd(nTrplPerSpM, 1);
0635
0636 if (pos < *nTrplPerSpMLimit) {
0637 TripletsPerSpM[pos] = triplets[i];
0638 }
0639 }
0640 }
0641
0642 if (threadIdx.x == 0 && *nTrplPerSpM > *nTrplPerSpMLimit) {
0643 *nTrplPerSpM = *nTrplPerSpMLimit;
0644 }
0645 }