File indexing completed on 2025-01-18 09:12:16
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010 #include "Acts/Plugins/Cuda/Seeding2/Details/FindTriplets.hpp"
0011 #include "Acts/Plugins/Cuda/Seeding2/Details/Types.hpp"
0012 #include "Acts/Plugins/Cuda/Seeding2/TripletFilterConfig.hpp"
0013 #include "Acts/Plugins/Cuda/Utilities/MemoryManager.hpp"
0014
0015 #include "../Utilities/ErrorCheck.cuh"
0016 #include "../Utilities/MatrixMacros.hpp"
0017
0018
0019 #include "Acts/Seeding/SeedFilterConfig.hpp"
0020
0021
0022 #include <cuda_runtime.h>
0023
0024
0025 #include <cassert>
0026 #include <cmath>
0027 #include <cstring>
0028
0029 namespace Acts {
0030 namespace Cuda {
0031 namespace Kernels {
0032
0033
0034
0035
0036
0037
0038
0039 __device__ Details::LinCircle transformCoordinates(
0040 const Details::SpacePoint& spM, const Details::SpacePoint& sp,
0041 bool bottom) {
0042
0043 Details::LinCircle result;
0044
0045
0046 const float cosPhiM = spM.x / spM.radius;
0047 const float sinPhiM = spM.y / spM.radius;
0048
0049
0050 const float deltaX = sp.x - spM.x;
0051 const float deltaY = sp.y - spM.y;
0052 const float deltaZ = sp.z - spM.z;
0053
0054
0055
0056
0057
0058 const float x = deltaX * cosPhiM + deltaY * sinPhiM;
0059 const float y = deltaY * cosPhiM - deltaX * sinPhiM;
0060
0061 const float iDeltaR2 = 1. / (deltaX * deltaX + deltaY * deltaY);
0062 const float iDeltaR = sqrtf(iDeltaR2);
0063
0064 const int bottomFactor = 1 * (int(!bottom)) - 1 * (int(bottom));
0065
0066 const float cot_theta = deltaZ * iDeltaR * bottomFactor;
0067
0068 result.cotTheta = cot_theta;
0069
0070 result.Zo = spM.z - spM.radius * cot_theta;
0071 result.iDeltaR = iDeltaR;
0072
0073
0074
0075
0076
0077
0078 result.U = x * iDeltaR2;
0079 result.V = y * iDeltaR2;
0080
0081 result.Er = ((spM.varianceZ + sp.varianceZ) +
0082 (cot_theta * cot_theta) * (spM.varianceR + sp.varianceR)) *
0083 iDeltaR2;
0084 return result;
0085 }
0086
0087
0088
0089
0090
0091
0092
0093
0094
0095
0096
0097
0098
0099
0100
0101
0102
0103
0104
0105
0106
0107
0108
0109
0110
0111
0112
0113
0114
0115 __global__ void transformCoordinates(
0116 unsigned int nDublets, unsigned int maxMBDublets, unsigned int maxMTDublets,
0117 std::size_t nBottomSPs, const Details::SpacePoint* bottomSPs,
0118 std::size_t nMiddleSPs, const Details::SpacePoint* middleSPs,
0119 std::size_t nTopSPs, const Details::SpacePoint* topSPs,
0120 const unsigned int* middleBottomCounts,
0121 const std::size_t* middleBottomDublets, const unsigned int* middleTopCounts,
0122 const std::size_t* middleTopDublets,
0123 Details::LinCircle* bottomSPLinTransArray,
0124 Details::LinCircle* topSPLinTransArray) {
0125
0126 const int dubletIndex = blockIdx.x * blockDim.x + threadIdx.x;
0127
0128
0129 if (dubletIndex >= nDublets) {
0130 return;
0131 }
0132
0133
0134 std::size_t middleIndex = 0;
0135 int runningIndex = dubletIndex;
0136 int tmpValue = 0;
0137 while (runningIndex >= (tmpValue = (middleBottomCounts[middleIndex] +
0138 middleTopCounts[middleIndex]))) {
0139 middleIndex += 1;
0140 assert(middleIndex < nMiddleSPs);
0141 runningIndex -= tmpValue;
0142 }
0143 const bool transformBottom =
0144 ((runningIndex < middleBottomCounts[middleIndex]) ? true : false);
0145 const std::size_t bottomMatrixIndex = (transformBottom ? runningIndex : 0);
0146 const std::size_t topMatrixIndex =
0147 (transformBottom ? 0 : runningIndex - middleBottomCounts[middleIndex]);
0148
0149
0150 if (transformBottom) {
0151 const std::size_t bottomIndex =
0152 ACTS_CUDA_MATRIX2D_ELEMENT(middleBottomDublets, nMiddleSPs, nBottomSPs,
0153 middleIndex, bottomMatrixIndex);
0154 assert(bottomIndex < nBottomSPs);
0155 ACTS_CUDA_MATRIX2D_ELEMENT(bottomSPLinTransArray, nMiddleSPs, maxMBDublets,
0156 middleIndex, bottomMatrixIndex) =
0157 transformCoordinates(middleSPs[middleIndex], bottomSPs[bottomIndex],
0158 true);
0159 } else {
0160 const std::size_t topIndex = ACTS_CUDA_MATRIX2D_ELEMENT(
0161 middleTopDublets, nMiddleSPs, nTopSPs, middleIndex, topMatrixIndex);
0162 assert(topIndex < nTopSPs);
0163 ACTS_CUDA_MATRIX2D_ELEMENT(topSPLinTransArray, nMiddleSPs, maxMTDublets,
0164 middleIndex, topMatrixIndex) =
0165 transformCoordinates(middleSPs[middleIndex], topSPs[topIndex], false);
0166 }
0167
0168 return;
0169 }
0170
0171
0172
0173
0174
0175
0176
0177
0178
0179
0180
0181
0182
0183
0184
0185
0186
0187
0188
0189
0190
0191
0192
0193
0194
0195
0196
0197
0198
0199
0200
0201
0202
0203
0204
0205
0206
0207
0208
0209
0210
0211
0212
0213
0214
0215
0216
0217
0218
0219
0220
0221
0222 __global__ void findTriplets(
0223 std::size_t middleIndexStart, unsigned int maxMBDublets,
0224 unsigned int maxMTDublets, unsigned int maxTriplets,
0225 std::size_t nParallelMiddleSPs, std::size_t nMiddleSPsProcessed,
0226 std::size_t nBottomSPs, const Details::SpacePoint* bottomSPs,
0227 std::size_t nMiddleSPs, const Details::SpacePoint* middleSPs,
0228 std::size_t nTopSPs, const Details::SpacePoint* topSPs,
0229 const unsigned int* middleBottomCounts,
0230 const std::size_t* middleBottomDublets, const unsigned int* middleTopCounts,
0231 const std::size_t* middleTopDublets,
0232 const Details::LinCircle* bottomSPLinTransArray,
0233 const Details::LinCircle* topSPLinTransArray, float maxScatteringAngle2,
0234 float sigmaScattering, float minHelixDiameter2, float pT2perRadius,
0235 float impactMax, float impactWeightFactor,
0236 unsigned int* tripletsPerBottomDublet, std::size_t* tripletIndices,
0237 unsigned int* maxTripletsPerSpB, unsigned int* tripletCount,
0238 Details::Triplet* triplets) {
0239
0240 assert(middleIndexStart + nMiddleSPsProcessed <= nMiddleSPs);
0241
0242
0243 const unsigned int middleIndexOffset = blockIdx.x * blockDim.x + threadIdx.x;
0244 if (middleIndexOffset >= nMiddleSPsProcessed) {
0245 return;
0246 }
0247 const unsigned int middleIndex = middleIndexStart + middleIndexOffset;
0248 assert(middleIndex < nMiddleSPs);
0249
0250
0251 const unsigned int middleBottomPairCount = middleBottomCounts[middleIndex];
0252 const unsigned int middleTopPairCount = middleTopCounts[middleIndex];
0253
0254
0255 const unsigned int tripletCandidateIndex =
0256 blockIdx.y * blockDim.y + threadIdx.y;
0257 if (tripletCandidateIndex >= middleBottomPairCount * middleTopPairCount) {
0258 return;
0259 }
0260 const unsigned int bottomDubletIndex =
0261 tripletCandidateIndex / middleTopPairCount;
0262 assert(bottomDubletIndex < middleBottomPairCount);
0263 const unsigned int topDubletIndex =
0264 tripletCandidateIndex - bottomDubletIndex * middleTopPairCount;
0265 assert(topDubletIndex < middleTopPairCount);
0266
0267
0268 const unsigned int bottomIndex =
0269 ACTS_CUDA_MATRIX2D_ELEMENT(middleBottomDublets, nMiddleSPs, nBottomSPs,
0270 middleIndex, bottomDubletIndex);
0271 assert(bottomIndex < nBottomSPs);
0272 const unsigned int topIndex = ACTS_CUDA_MATRIX2D_ELEMENT(
0273 middleTopDublets, nMiddleSPs, nTopSPs, middleIndex, topDubletIndex);
0274 assert(topIndex < nTopSPs);
0275
0276
0277 const Details::LinCircle lb =
0278 ACTS_CUDA_MATRIX2D_ELEMENT(bottomSPLinTransArray, nMiddleSPs,
0279 maxMBDublets, middleIndex, bottomDubletIndex);
0280
0281
0282 float iSinTheta2 = (1. + lb.cotTheta * lb.cotTheta);
0283
0284
0285
0286
0287
0288
0289
0290
0291
0292 float scatteringInRegion2 = maxScatteringAngle2 * iSinTheta2;
0293
0294 scatteringInRegion2 *= sigmaScattering * sigmaScattering;
0295
0296
0297 const Details::LinCircle lt =
0298 ACTS_CUDA_MATRIX2D_ELEMENT(topSPLinTransArray, nMiddleSPs, maxMTDublets,
0299 middleIndex, topDubletIndex);
0300
0301
0302 const Details::SpacePoint spM = middleSPs[middleIndex];
0303
0304
0305
0306 float error2 =
0307 lt.Er + lb.Er +
0308 2 * (lb.cotTheta * lt.cotTheta * spM.varianceR + spM.varianceZ) *
0309 lb.iDeltaR * lt.iDeltaR;
0310
0311 float deltaCotTheta = lb.cotTheta - lt.cotTheta;
0312 float deltaCotTheta2 = deltaCotTheta * deltaCotTheta;
0313 float dCotThetaMinusError2 = 0.0f;
0314
0315
0316
0317 if (deltaCotTheta2 - error2 > 0) {
0318 deltaCotTheta = fabs(deltaCotTheta);
0319
0320 float error = sqrtf(error2);
0321 dCotThetaMinusError2 = deltaCotTheta2 + error2 - 2 * deltaCotTheta * error;
0322
0323
0324
0325
0326 if (dCotThetaMinusError2 > scatteringInRegion2) {
0327 return;
0328 }
0329 }
0330
0331
0332 float dU = lt.U - lb.U;
0333 if (dU == 0.) {
0334 return;
0335 }
0336
0337
0338 float A = (lt.V - lb.V) / dU;
0339 float S2 = 1. + A * A;
0340 float B = lb.V - A * lb.U;
0341 float B2 = B * B;
0342
0343
0344 if (S2 < B2 * minHelixDiameter2) {
0345 return;
0346 }
0347
0348 float iHelixDiameter2 = B2 / S2;
0349
0350 float pT2scatter = 4 * iHelixDiameter2 * pT2perRadius;
0351
0352
0353
0354 float p2scatter = pT2scatter * iSinTheta2;
0355
0356 if ((deltaCotTheta2 - error2 > 0) &&
0357 (dCotThetaMinusError2 > p2scatter * sigmaScattering * sigmaScattering)) {
0358 return;
0359 }
0360
0361
0362
0363 float Im = fabs((A - B * spM.radius) * spM.radius);
0364
0365
0366 if (Im > impactMax) {
0367 return;
0368 }
0369
0370
0371 unsigned int* tripletIndexRowPtr = &(ACTS_CUDA_MATRIX2D_ELEMENT(
0372 tripletsPerBottomDublet, nParallelMiddleSPs, maxMBDublets,
0373 middleIndexOffset, bottomDubletIndex));
0374 const unsigned int tripletIndexRow = atomicAdd(tripletIndexRowPtr, 1);
0375 assert(tripletIndexRow < maxMTDublets);
0376 const unsigned int tripletIndex = atomicAdd(tripletCount, 1);
0377 assert(tripletIndex < maxTriplets);
0378
0379
0380
0381 atomicMax(maxTripletsPerSpB, tripletIndexRow + 1);
0382
0383
0384 ACTS_CUDA_MATRIX3D_ELEMENT(tripletIndices, nParallelMiddleSPs, maxMBDublets,
0385 maxMTDublets, middleIndexOffset, bottomDubletIndex,
0386 tripletIndexRow) = tripletIndex;
0387
0388
0389 Details::Triplet triplet = {bottomIndex, middleIndex,
0390 topIndex, Im,
0391 B / sqrtf(S2), -(Im * impactWeightFactor)};
0392 triplets[tripletIndex] = triplet;
0393
0394 return;
0395 }
0396
0397
0398
0399
0400
0401
0402
0403
0404
0405
0406
0407
0408
0409
0410
0411
0412
0413
0414
0415
0416
0417
0418
0419
0420
0421
0422
0423
0424
0425
0426
0427
0428
0429
0430
0431
0432
0433
0434
0435
0436
0437
0438
0439 __global__ void filterTriplets2Sp(
0440 TripletFilterConfig::seedWeightFunc_t seedWeight,
0441 TripletFilterConfig::singleSeedCutFunc_t singleSeedCut,
0442 std::size_t middleIndexStart, unsigned int maxMBDublets,
0443 unsigned int maxMTDublets, unsigned int maxTriplets,
0444 unsigned int nAllTriplets, std::size_t nParallelMiddleSPs,
0445 std::size_t nMiddleSPsProcessed, unsigned int* middleBottomCounts,
0446 std::size_t nBottomSPs, const Details::SpacePoint* bottomSPs,
0447 std::size_t nMiddleSPs, const Details::SpacePoint* middleSPs,
0448 std::size_t nTopSPs, const Details::SpacePoint* topSPs,
0449 const unsigned int* tripletsPerBottomDublet,
0450 const std::size_t* tripletIndices, const Details::Triplet* allTriplets,
0451 float deltaInvHelixDiameter, float deltaRMin, float compatSeedWeight,
0452 std::size_t compatSeedLimit, unsigned int* nFilteredTriplets,
0453 Details::Triplet* filteredTriplets) {
0454
0455 assert(seedWeight != nullptr);
0456 assert(singleSeedCut != nullptr);
0457 assert(middleIndexStart + nMiddleSPsProcessed <= nMiddleSPs);
0458
0459
0460 const unsigned int middleIndexOffset = blockIdx.x * blockDim.x + threadIdx.x;
0461 if (middleIndexOffset >= nMiddleSPsProcessed) {
0462 return;
0463 }
0464 const unsigned int middleIndex = middleIndexStart + middleIndexOffset;
0465 assert(middleIndex < nMiddleSPs);
0466
0467
0468 const unsigned int middleBottomPairCount = middleBottomCounts[middleIndex];
0469 const unsigned int bottomDubletIndex = blockIdx.y * blockDim.y + threadIdx.y;
0470 if (bottomDubletIndex >= middleBottomPairCount) {
0471 return;
0472 }
0473
0474
0475 const unsigned int nTripletsForMiddleBottom = ACTS_CUDA_MATRIX2D_ELEMENT(
0476 tripletsPerBottomDublet, nParallelMiddleSPs, maxMBDublets,
0477 middleIndexOffset, bottomDubletIndex);
0478 const unsigned int tripletCandidateIndex =
0479 blockIdx.z * blockDim.z + threadIdx.z;
0480 if (tripletCandidateIndex >= nTripletsForMiddleBottom) {
0481 return;
0482 }
0483
0484
0485 const std::size_t triplet1Index = ACTS_CUDA_MATRIX3D_ELEMENT(
0486 tripletIndices, nParallelMiddleSPs, maxMBDublets, maxMTDublets,
0487 middleIndexOffset, bottomDubletIndex, tripletCandidateIndex);
0488 assert(triplet1Index < nAllTriplets);
0489
0490
0491 Details::Triplet triplet1 = allTriplets[triplet1Index];
0492
0493
0494 float lowerLimitCurv = triplet1.invHelixDiameter - deltaInvHelixDiameter;
0495 float upperLimitCurv = triplet1.invHelixDiameter + deltaInvHelixDiameter;
0496 float currentTop_r = topSPs[triplet1.topIndex].radius;
0497
0498
0499
0500
0501
0502 static constexpr std::size_t MAX_TOP_SP = 10;
0503 assert(compatSeedLimit < MAX_TOP_SP);
0504 float compatibleSeedR[MAX_TOP_SP];
0505 std::size_t nCompatibleSeedR = 0;
0506
0507
0508 for (std::size_t i = 0; i < nTripletsForMiddleBottom; ++i) {
0509
0510
0511 if (i == tripletCandidateIndex) {
0512 continue;
0513 }
0514
0515 const std::size_t triplet2Index = ACTS_CUDA_MATRIX3D_ELEMENT(
0516 tripletIndices, nParallelMiddleSPs, maxMBDublets, maxMTDublets,
0517 middleIndexOffset, bottomDubletIndex, i);
0518 assert(triplet2Index < nAllTriplets);
0519 assert(triplet2Index != triplet1Index);
0520
0521
0522 const Details::Triplet triplet2 = allTriplets[triplet2Index];
0523 assert(triplet1.bottomIndex == triplet2.bottomIndex);
0524
0525
0526 float otherTop_r = topSPs[triplet2.topIndex].radius;
0527 float deltaR = currentTop_r - otherTop_r;
0528 if (fabs(deltaR) < deltaRMin) {
0529 continue;
0530 }
0531
0532
0533
0534
0535 if (triplet2.invHelixDiameter < lowerLimitCurv) {
0536 continue;
0537 }
0538 if (triplet2.invHelixDiameter > upperLimitCurv) {
0539 continue;
0540 }
0541
0542 bool newCompSeed = true;
0543 for (std::size_t k = 0; k < nCompatibleSeedR; ++k) {
0544
0545
0546
0547
0548 if (fabs(compatibleSeedR[k] - otherTop_r) < deltaRMin) {
0549 newCompSeed = false;
0550 break;
0551 }
0552 }
0553 if (newCompSeed) {
0554 compatibleSeedR[nCompatibleSeedR++] = otherTop_r;
0555 assert(nCompatibleSeedR < MAX_TOP_SP);
0556 triplet1.weight += compatSeedWeight;
0557 }
0558 if (nCompatibleSeedR >= compatSeedLimit) {
0559 break;
0560 }
0561 }
0562
0563
0564 triplet1.weight +=
0565 seedWeight(bottomSPs[triplet1.bottomIndex], middleSPs[middleIndex],
0566 topSPs[triplet1.topIndex]);
0567 if (!singleSeedCut(triplet1.weight, bottomSPs[triplet1.bottomIndex],
0568 middleSPs[middleIndex], topSPs[triplet1.topIndex])) {
0569 return;
0570 }
0571
0572
0573 const unsigned int tripletRow = atomicAdd(nFilteredTriplets, 1);
0574 assert(tripletRow < nAllTriplets);
0575 filteredTriplets[tripletRow] = triplet1;
0576 return;
0577 }
0578
0579 }
0580
0581 namespace Details {
0582
0583 std::vector<std::vector<Triplet>> findTriplets(
0584 const Info::Device& device, std::size_t maxBlockSize,
0585 const DubletCounts& dubletCounts, const SeedFilterConfig& seedConfig,
0586 const TripletFilterConfig& filterConfig, std::size_t nBottomSPs,
0587 const device_array<SpacePoint>& bottomSPs, std::size_t nMiddleSPs,
0588 const device_array<SpacePoint>& middleSPs, std::size_t nTopSPs,
0589 const device_array<SpacePoint>& topSPs,
0590 const device_array<unsigned int>& middleBottomCounts,
0591 const device_array<std::size_t>& middleBottomDublets,
0592 const device_array<unsigned int>& middleTopCounts,
0593 const device_array<std::size_t>& middleTopDublets,
0594 float maxScatteringAngle2, float sigmaScattering, float minHelixDiameter2,
0595 float pT2perRadius, float impactMax) {
0596
0597 const int numBlocksLT =
0598 (dubletCounts.nDublets + maxBlockSize - 1) / maxBlockSize;
0599
0600
0601 auto bottomSPLinTransArray =
0602 make_device_array<LinCircle>(nMiddleSPs * dubletCounts.maxMBDublets);
0603 auto topSPLinTransArray =
0604 make_device_array<LinCircle>(nMiddleSPs * dubletCounts.maxMTDublets);
0605
0606
0607 Kernels::transformCoordinates<<<numBlocksLT, maxBlockSize>>>(
0608 dubletCounts.nDublets, dubletCounts.maxMBDublets,
0609 dubletCounts.maxMTDublets, nBottomSPs, bottomSPs.get(), nMiddleSPs,
0610 middleSPs.get(), nTopSPs, topSPs.get(), middleBottomCounts.get(),
0611 middleBottomDublets.get(), middleTopCounts.get(), middleTopDublets.get(),
0612 bottomSPLinTransArray.get(), topSPLinTransArray.get());
0613 ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0614 ACTS_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0615
0616
0617
0618
0619
0620
0621 const std::size_t memorySizePerMiddleSP =
0622
0623 2 * dubletCounts.maxTriplets * sizeof(Triplet) +
0624
0625
0626 dubletCounts.maxMBDublets * sizeof(unsigned int) +
0627 dubletCounts.maxMBDublets * dubletCounts.maxMTDublets *
0628 sizeof(std::size_t) +
0629
0630
0631 sizeof(unsigned int);
0632
0633
0634 const std::size_t nParallelMiddleSPs =
0635 std::min(MemoryManager::instance().availableMemory(device.id) /
0636 memorySizePerMiddleSP,
0637 nMiddleSPs);
0638 assert(nParallelMiddleSPs > 0);
0639
0640
0641 enum ObjectCountType : int {
0642 AllTriplets = 0,
0643 FilteredTriplets = 1,
0644 MaxTripletsPerSpB = 2,
0645 NObjectCountTypes = 3
0646 };
0647
0648
0649
0650 auto objectCountsHostNull = make_host_array<unsigned int>(NObjectCountTypes);
0651 memset(objectCountsHostNull.get(), 0,
0652 NObjectCountTypes * sizeof(unsigned int));
0653 auto objectCountsHost = make_host_array<unsigned int>(NObjectCountTypes);
0654 auto objectCounts = make_device_array<unsigned int>(NObjectCountTypes);
0655
0656
0657
0658 auto allTriplets =
0659 make_device_array<Triplet>(nParallelMiddleSPs * dubletCounts.maxTriplets);
0660 auto filteredTriplets =
0661 make_device_array<Triplet>(nParallelMiddleSPs * dubletCounts.maxTriplets);
0662 auto filteredTripletsHost =
0663 make_host_array<Triplet>(nParallelMiddleSPs * dubletCounts.maxTriplets);
0664
0665
0666
0667 auto tripletsPerBottomDubletHost = make_host_array<unsigned int>(
0668 nParallelMiddleSPs * dubletCounts.maxMBDublets);
0669 memset(tripletsPerBottomDubletHost.get(), 0,
0670 nParallelMiddleSPs * dubletCounts.maxMBDublets * sizeof(unsigned int));
0671 auto tripletsPerBottomDublet = make_device_array<unsigned int>(
0672 nParallelMiddleSPs * dubletCounts.maxMBDublets);
0673
0674
0675
0676 auto tripletIndices = make_device_array<std::size_t>(
0677 nParallelMiddleSPs * dubletCounts.maxMBDublets *
0678 dubletCounts.maxMTDublets);
0679
0680
0681
0682 auto filteredTripletCountsHostNull =
0683 make_host_array<unsigned int>(nParallelMiddleSPs);
0684 memset(filteredTripletCountsHostNull.get(), 0,
0685 nParallelMiddleSPs * sizeof(unsigned int));
0686 auto filteredTripletCountsHost =
0687 make_host_array<unsigned int>(nParallelMiddleSPs);
0688 auto filteredTripletCounts =
0689 make_device_array<unsigned int>(nParallelMiddleSPs);
0690
0691
0692 const std::size_t blockSize = std::sqrt(maxBlockSize);
0693
0694
0695 std::vector<std::vector<Triplet>> result(nMiddleSPs);
0696
0697
0698 auto middleBottomCountsHost = make_host_array<unsigned int>(nMiddleSPs);
0699 copyToHost(middleBottomCountsHost, middleBottomCounts, nMiddleSPs);
0700 auto middleTopCountsHost = make_host_array<unsigned int>(nMiddleSPs);
0701 copyToHost(middleTopCountsHost, middleTopCounts, nMiddleSPs);
0702
0703
0704
0705 for (std::size_t middleIndex = 0; middleIndex < nMiddleSPs;
0706 middleIndex += nParallelMiddleSPs) {
0707
0708 copyToDevice(objectCounts, objectCountsHostNull, NObjectCountTypes);
0709 copyToDevice(tripletsPerBottomDublet, tripletsPerBottomDubletHost,
0710 nParallelMiddleSPs * dubletCounts.maxMBDublets);
0711
0712
0713 const std::size_t nMiddleSPsProcessed =
0714 std::min(nParallelMiddleSPs, nMiddleSPs - middleIndex);
0715
0716
0717
0718 const dim3 blockSizeFT(1, maxBlockSize);
0719 const dim3 numBlocksFT(
0720 (nMiddleSPsProcessed + blockSizeFT.x - 1) / blockSizeFT.x,
0721 (dubletCounts.maxTriplets + blockSizeFT.y - 1) / blockSizeFT.y);
0722 assert(dubletCounts.maxTriplets > 0);
0723
0724
0725 Kernels::findTriplets<<<numBlocksFT, blockSizeFT>>>(
0726
0727 middleIndex, dubletCounts.maxMBDublets, dubletCounts.maxMTDublets,
0728 dubletCounts.maxTriplets, nParallelMiddleSPs, nMiddleSPsProcessed,
0729
0730 nBottomSPs, bottomSPs.get(), nMiddleSPs, middleSPs.get(), nTopSPs,
0731 topSPs.get(),
0732
0733 middleBottomCounts.get(), middleBottomDublets.get(),
0734 middleTopCounts.get(), middleTopDublets.get(),
0735
0736
0737 bottomSPLinTransArray.get(), topSPLinTransArray.get(),
0738
0739 maxScatteringAngle2, sigmaScattering, minHelixDiameter2, pT2perRadius,
0740 impactMax, seedConfig.impactWeightFactor,
0741
0742 tripletsPerBottomDublet.get(), tripletIndices.get(),
0743 objectCounts.get() + MaxTripletsPerSpB,
0744 objectCounts.get() + AllTriplets, allTriplets.get());
0745 ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0746 ACTS_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0747
0748
0749 copyToHost(objectCountsHost, objectCounts, NObjectCountTypes);
0750 const unsigned int nAllTriplets = objectCountsHost.get()[AllTriplets];
0751 const unsigned int nMaxTripletsPerSpB =
0752 objectCountsHost.get()[MaxTripletsPerSpB];
0753
0754
0755 if (nAllTriplets == 0) {
0756 continue;
0757 }
0758
0759
0760
0761 const dim3 blockSizeF2SP(1, blockSize, blockSize);
0762 const dim3 numBlocksF2SP(
0763 (nMiddleSPsProcessed + blockSizeF2SP.x - 1) / blockSizeF2SP.x,
0764 (dubletCounts.maxMBDublets + blockSizeF2SP.y - 1) / blockSizeF2SP.y,
0765 (nMaxTripletsPerSpB + blockSizeF2SP.z - 1) / blockSizeF2SP.z);
0766 assert(dubletCounts.maxMBDublets > 0);
0767 assert(nMaxTripletsPerSpB > 0);
0768
0769
0770 assert(filterConfig.seedWeight != nullptr);
0771 assert(filterConfig.singleSeedCut != nullptr);
0772 Kernels::filterTriplets2Sp<<<numBlocksF2SP, blockSizeF2SP>>>(
0773
0774 filterConfig.seedWeight, filterConfig.singleSeedCut,
0775
0776 middleIndex, dubletCounts.maxMBDublets, dubletCounts.maxMTDublets,
0777 dubletCounts.maxTriplets, nAllTriplets, nParallelMiddleSPs,
0778 nMiddleSPsProcessed, middleBottomCounts.get(),
0779
0780 nBottomSPs, bottomSPs.get(), nMiddleSPs, middleSPs.get(), nTopSPs,
0781 topSPs.get(),
0782
0783 tripletsPerBottomDublet.get(), tripletIndices.get(), allTriplets.get(),
0784
0785 seedConfig.deltaInvHelixDiameter, seedConfig.deltaRMin,
0786 seedConfig.compatSeedWeight, seedConfig.compatSeedLimit,
0787
0788 objectCounts.get() + FilteredTriplets, filteredTriplets.get());
0789 ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0790 ACTS_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0791
0792
0793 copyToHost(objectCountsHost, objectCounts, NObjectCountTypes);
0794
0795
0796 const unsigned int nFilteredTriplets =
0797 objectCountsHost.get()[FilteredTriplets];
0798 if (nFilteredTriplets == 0) {
0799 continue;
0800 }
0801
0802
0803 ACTS_CUDA_ERROR_CHECK(cudaMemcpy(
0804 filteredTripletsHost.get(), filteredTriplets.get(),
0805 nFilteredTriplets * sizeof(Triplet), cudaMemcpyDeviceToHost));
0806
0807
0808 for (std::size_t i = 0; i < nFilteredTriplets; ++i) {
0809
0810 const Triplet& triplet = filteredTripletsHost.get()[i];
0811
0812 result[triplet.middleIndex].push_back(triplet);
0813 }
0814 }
0815
0816
0817 assert(result.size() == nMiddleSPs);
0818 return result;
0819 }
0820
0821 }
0822 }
0823 }