Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2025-01-18 09:12:16

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008 
0009 // CUDA plugin include(s).
0010 #include "Acts/Plugins/Cuda/Seeding2/Details/FindTriplets.hpp"
0011 #include "Acts/Plugins/Cuda/Seeding2/Details/Types.hpp"
0012 #include "Acts/Plugins/Cuda/Seeding2/TripletFilterConfig.hpp"
0013 #include "Acts/Plugins/Cuda/Utilities/MemoryManager.hpp"
0014 
0015 #include "../Utilities/ErrorCheck.cuh"
0016 #include "../Utilities/MatrixMacros.hpp"
0017 
0018 // Acts include(s).
0019 #include "Acts/Seeding/SeedFilterConfig.hpp"
0020 
0021 // CUDA include(s).
0022 #include <cuda_runtime.h>
0023 
0024 // System include(s).
0025 #include <cassert>
0026 #include <cmath>
0027 #include <cstring>
0028 
0029 namespace Acts {
0030 namespace Cuda {
0031 namespace Kernels {
0032 
0033 /// Function performing coordinate transformation for one spacepoint pair
0034 ///
0035 /// @param spM    The middle spacepoint to use
0036 /// @param sp     The "other" spacepoint to use
0037 /// @param bottom @c true If the "other" spacepoint is a bottom one, @c false
0038 ///               otherwise
0039 __device__ Details::LinCircle transformCoordinates(
0040     const Details::SpacePoint& spM, const Details::SpacePoint& sp,
0041     bool bottom) {
0042   // Create the result object.
0043   Details::LinCircle result;
0044 
0045   // Parameters of the middle spacepoint.
0046   const float cosPhiM = spM.x / spM.radius;
0047   const float sinPhiM = spM.y / spM.radius;
0048 
0049   // (Relative) Parameters of the spacepoint being transformed.
0050   const float deltaX = sp.x - spM.x;
0051   const float deltaY = sp.y - spM.y;
0052   const float deltaZ = sp.z - spM.z;
0053 
0054   // calculate projection fraction of spM->sp vector pointing in same
0055   // direction as
0056   // vector origin->spM (x) and projection fraction of spM->sp vector pointing
0057   // orthogonal to origin->spM (y)
0058   const float x = deltaX * cosPhiM + deltaY * sinPhiM;
0059   const float y = deltaY * cosPhiM - deltaX * sinPhiM;
0060   // 1/(length of M -> SP)
0061   const float iDeltaR2 = 1. / (deltaX * deltaX + deltaY * deltaY);
0062   const float iDeltaR = sqrtf(iDeltaR2);
0063   //
0064   const int bottomFactor = 1 * (int(!bottom)) - 1 * (int(bottom));
0065   // cot_theta = (deltaZ/deltaR)
0066   const float cot_theta = deltaZ * iDeltaR * bottomFactor;
0067   // VERY frequent (SP^3) access
0068   result.cotTheta = cot_theta;
0069   // location on z-axis of this SP-duplet
0070   result.Zo = spM.z - spM.radius * cot_theta;
0071   result.iDeltaR = iDeltaR;
0072   // transformation of circle equation (x,y) into linear equation (u,v)
0073   // x^2 + y^2 - 2x_0*x - 2y_0*y = 0
0074   // is transformed into
0075   // 1 - 2x_0*u - 2y_0*v = 0
0076   // using the following m_U and m_V
0077   // (u = A + B*v); A and B are created later on
0078   result.U = x * iDeltaR2;
0079   result.V = y * iDeltaR2;
0080   // error term for sp-pair without correlation of middle space point
0081   result.Er = ((spM.varianceZ + sp.varianceZ) +
0082                (cot_theta * cot_theta) * (spM.varianceR + sp.varianceR)) *
0083               iDeltaR2;
0084   return result;
0085 }
0086 
0087 /// Kernel performing coordinate transformation on all created dublets
0088 ///
0089 /// @param[in] nDublets The total number of dublets found
0090 /// @param[in] maxMBDublets The maximal number of middle-bottom dublets found
0091 ///            for any middle spacepoint
0092 /// @param[in] maxMTDublets The maximal number of middle-top dublets found for
0093 ///            any middle spacepoint
0094 /// @param[in] nBottomSPs The number of bottom spacepoints in @c bottomSPs
0095 /// @param[in] bottomSPs Properties of all of the bottom spacepoints
0096 /// @param[in] nMiddleSPs The number of middle spacepoints in @c middleSPs
0097 /// @param[in] middleSPs Properties of all of the middle spacepoints
0098 /// @param[in] nTopSPs The number of top spacepoints in @c topSPs
0099 /// @param[in] topSPs Properties of all of the top spacepoints
0100 /// @param[in] middleBottomCounts 1-D array of the number of middle-bottom
0101 ///            dublets found for each middle spacepoint
0102 /// @param[in] middleBottomDublets 2-D matrix of size
0103 ///            @c nMiddleSPs x @c nBottomSPs, holding the bottom spacepoint
0104 ///            indices for the identified middle-bottom dublets
0105 /// @param[in] middleTopCounts 1-D array of the number of middle-top dublets
0106 ///            found for each middle spacepoint
0107 /// @param[in] middleTopDublets 2-D matrix of size
0108 ///            @c nMiddleSPs x @c nTopSPs, holding the top spacepoint
0109 ///            indices for the identified middle-top dublets
0110 /// @param[out] bottomSPLinTransArray 2-dimensional matrix indexed the same way
0111 ///             as @c middleBottomDublets
0112 /// @param[out] topSPLinTransArray 2-dimensional matrix indexed the same way as
0113 ///             @c middleTopDublets
0114 ///
0115 __global__ void transformCoordinates(
0116     unsigned int nDublets, unsigned int maxMBDublets, unsigned int maxMTDublets,
0117     std::size_t nBottomSPs, const Details::SpacePoint* bottomSPs,
0118     std::size_t nMiddleSPs, const Details::SpacePoint* middleSPs,
0119     std::size_t nTopSPs, const Details::SpacePoint* topSPs,
0120     const unsigned int* middleBottomCounts,
0121     const std::size_t* middleBottomDublets, const unsigned int* middleTopCounts,
0122     const std::size_t* middleTopDublets,
0123     Details::LinCircle* bottomSPLinTransArray,
0124     Details::LinCircle* topSPLinTransArray) {
0125   // Get the global index.
0126   const int dubletIndex = blockIdx.x * blockDim.x + threadIdx.x;
0127 
0128   // If we're out of bounds, finish right away.
0129   if (dubletIndex >= nDublets) {
0130     return;
0131   }
0132 
0133   // Find the dublet to transform.
0134   std::size_t middleIndex = 0;
0135   int runningIndex = dubletIndex;
0136   int tmpValue = 0;
0137   while (runningIndex >= (tmpValue = (middleBottomCounts[middleIndex] +
0138                                       middleTopCounts[middleIndex]))) {
0139     middleIndex += 1;
0140     assert(middleIndex < nMiddleSPs);
0141     runningIndex -= tmpValue;
0142   }
0143   const bool transformBottom =
0144       ((runningIndex < middleBottomCounts[middleIndex]) ? true : false);
0145   const std::size_t bottomMatrixIndex = (transformBottom ? runningIndex : 0);
0146   const std::size_t topMatrixIndex =
0147       (transformBottom ? 0 : runningIndex - middleBottomCounts[middleIndex]);
0148 
0149   // Perform the transformation.
0150   if (transformBottom) {
0151     const std::size_t bottomIndex =
0152         ACTS_CUDA_MATRIX2D_ELEMENT(middleBottomDublets, nMiddleSPs, nBottomSPs,
0153                                    middleIndex, bottomMatrixIndex);
0154     assert(bottomIndex < nBottomSPs);
0155     ACTS_CUDA_MATRIX2D_ELEMENT(bottomSPLinTransArray, nMiddleSPs, maxMBDublets,
0156                                middleIndex, bottomMatrixIndex) =
0157         transformCoordinates(middleSPs[middleIndex], bottomSPs[bottomIndex],
0158                              true);
0159   } else {
0160     const std::size_t topIndex = ACTS_CUDA_MATRIX2D_ELEMENT(
0161         middleTopDublets, nMiddleSPs, nTopSPs, middleIndex, topMatrixIndex);
0162     assert(topIndex < nTopSPs);
0163     ACTS_CUDA_MATRIX2D_ELEMENT(topSPLinTransArray, nMiddleSPs, maxMTDublets,
0164                                middleIndex, topMatrixIndex) =
0165         transformCoordinates(middleSPs[middleIndex], topSPs[topIndex], false);
0166   }
0167 
0168   return;
0169 }
0170 
0171 /// Kernel used for finding all the triplet candidates
0172 ///
0173 /// @param[in] middleIndexStart The middle spacepoint index that the kernel was
0174 ///            "started from"
0175 /// @param[in] maxMBDublets The maximal number of middle-bottom dublets found
0176 ///            for any middle spacepoint
0177 /// @param[in] maxMTDublets The maximal number of middle-top dublets found for
0178 ///            any middle spacepoint
0179 /// @param[in] maxTriplets The maximum number of triplets for which memory is
0180 ///            booked
0181 /// @param[in] nParallelMiddleSPs The number of middle spacepoints that the
0182 ///            "largest" kernels may be started on in parallel
0183 /// @param[in] nMiddleSPsProcessed The number of middle spacepoints that the
0184 ///            kernel was started on in parallel
0185 /// @param[in] nBottomSPs The number of bottom spacepoints in @c bottomSPs
0186 /// @param[in] bottomSPs Properties of all of the bottom spacepoints
0187 /// @param[in] nMiddleSPs The number of middle spacepoints in @c middleSPs
0188 /// @param[in] middleSPs Properties of all of the middle spacepoints
0189 /// @param[in] nTopSPs The number of top spacepoints in @c topSPs
0190 /// @param[in] topSPs Properties of all of the top spacepoints
0191 /// @param[in] middleBottomCounts 1-D array of the number of middle-bottom
0192 ///            dublets found for each middle spacepoint
0193 /// @param[in] middleBottomDublets 2-D matrix of size
0194 ///            @c nMiddleSPs x @c nBottomSPs, holding the bottom spacepoint
0195 ///            indices for the identified middle-bottom dublets
0196 /// @param[in] middleTopCounts 1-D array of the number of middle-top dublets
0197 ///            found for each middle spacepoint
0198 /// @param[in] middleTopDublets 2-D matrix of size
0199 ///            @c nMiddleSPs x @c nTopSPs, holding the top spacepoint
0200 ///            indices for the identified middle-top dublets
0201 /// @param[in] bottomSPLinTransArray 2-dimensional matrix indexed the same way
0202 ///            as @c middleBottomArray
0203 /// @param[in] topSPLinTransArray 2-dimensional matrix indexed the same way as
0204 ///            @c middleTopArray
0205 /// @param[in] maxScatteringAngle2 Parameter from @c Acts::SeedFinderConfig
0206 /// @param[in] sigmaScattering Parameter from @c Acts::SeedFinderConfig
0207 /// @param[in] minHelixDiameter2 Parameter from @c Acts::SeedFinderConfig
0208 /// @param[in] pT2perRadius Parameter from @c Acts::SeedFinderConfig
0209 /// @param[in] impactMax Parameter from @c Acts::SeedFinderConfig
0210 /// @param[in] impactWeightFactor Parameter from @c Acts::SeedFinderConfig
0211 /// @param[out] tripletsPerBottomDublet 1-dimensional array of the triplet
0212 ///             counts for each bottom spacepoint
0213 /// @param[out] tripletIndices 2-dimensional matrix of the indices of the
0214 ///             triplets created for each middle-bottom spacepoint dublet
0215 /// @param[out] maxTripletsPerSpB Pointer to the scalar outputting the maximum
0216 ///             number of triplets found for any bottom spacepoint dublet
0217 /// @param[out] tripletCount Pointer to the scalar counting the total number of
0218 ///             triplets created by the kernel
0219 /// @param[out] triplets 1-dimensional array of all reconstructed triplet
0220 ///             candidates
0221 ///
0222 __global__ void findTriplets(
0223     std::size_t middleIndexStart, unsigned int maxMBDublets,
0224     unsigned int maxMTDublets, unsigned int maxTriplets,
0225     std::size_t nParallelMiddleSPs, std::size_t nMiddleSPsProcessed,
0226     std::size_t nBottomSPs, const Details::SpacePoint* bottomSPs,
0227     std::size_t nMiddleSPs, const Details::SpacePoint* middleSPs,
0228     std::size_t nTopSPs, const Details::SpacePoint* topSPs,
0229     const unsigned int* middleBottomCounts,
0230     const std::size_t* middleBottomDublets, const unsigned int* middleTopCounts,
0231     const std::size_t* middleTopDublets,
0232     const Details::LinCircle* bottomSPLinTransArray,
0233     const Details::LinCircle* topSPLinTransArray, float maxScatteringAngle2,
0234     float sigmaScattering, float minHelixDiameter2, float pT2perRadius,
0235     float impactMax, float impactWeightFactor,
0236     unsigned int* tripletsPerBottomDublet, std::size_t* tripletIndices,
0237     unsigned int* maxTripletsPerSpB, unsigned int* tripletCount,
0238     Details::Triplet* triplets) {
0239   // A sanity check.
0240   assert(middleIndexStart + nMiddleSPsProcessed <= nMiddleSPs);
0241 
0242   // Find the middle spacepoint index to operate on.
0243   const unsigned int middleIndexOffset = blockIdx.x * blockDim.x + threadIdx.x;
0244   if (middleIndexOffset >= nMiddleSPsProcessed) {
0245     return;
0246   }
0247   const unsigned int middleIndex = middleIndexStart + middleIndexOffset;
0248   assert(middleIndex < nMiddleSPs);
0249 
0250   // Counts of middle-bottom and middle-top pairs for this middle spacepoint.
0251   const unsigned int middleBottomPairCount = middleBottomCounts[middleIndex];
0252   const unsigned int middleTopPairCount = middleTopCounts[middleIndex];
0253 
0254   // Find the indices of the middle-bottom and middle-top pairs to operate on.
0255   const unsigned int tripletCandidateIndex =
0256       blockIdx.y * blockDim.y + threadIdx.y;
0257   if (tripletCandidateIndex >= middleBottomPairCount * middleTopPairCount) {
0258     return;
0259   }
0260   const unsigned int bottomDubletIndex =
0261       tripletCandidateIndex / middleTopPairCount;
0262   assert(bottomDubletIndex < middleBottomPairCount);
0263   const unsigned int topDubletIndex =
0264       tripletCandidateIndex - bottomDubletIndex * middleTopPairCount;
0265   assert(topDubletIndex < middleTopPairCount);
0266 
0267   // Get the indices of the spacepoints to operate on.
0268   const unsigned int bottomIndex =
0269       ACTS_CUDA_MATRIX2D_ELEMENT(middleBottomDublets, nMiddleSPs, nBottomSPs,
0270                                  middleIndex, bottomDubletIndex);
0271   assert(bottomIndex < nBottomSPs);
0272   const unsigned int topIndex = ACTS_CUDA_MATRIX2D_ELEMENT(
0273       middleTopDublets, nMiddleSPs, nTopSPs, middleIndex, topDubletIndex);
0274   assert(topIndex < nTopSPs);
0275 
0276   // Load the transformed coordinates of the bottom spacepoint into the thread.
0277   const Details::LinCircle lb =
0278       ACTS_CUDA_MATRIX2D_ELEMENT(bottomSPLinTransArray, nMiddleSPs,
0279                                  maxMBDublets, middleIndex, bottomDubletIndex);
0280 
0281   // 1+(cot^2(theta)) = 1/sin^2(theta)
0282   float iSinTheta2 = (1. + lb.cotTheta * lb.cotTheta);
0283   // calculate max scattering for min momentum at the seed's theta angle
0284   // scaling scatteringAngle^2 by sin^2(theta) to convert pT^2 to p^2
0285   // accurate would be taking 1/atan(thetaBottom)-1/atan(thetaTop) <
0286   // scattering
0287   // but to avoid trig functions we approximate cot by scaling by
0288   // 1/sin^4(theta)
0289   // resolving with pT to p scaling --> only divide by sin^2(theta)
0290   // max approximation error for allowed scattering angles of 0.04 rad at
0291   // eta=infinity: ~8.5%
0292   float scatteringInRegion2 = maxScatteringAngle2 * iSinTheta2;
0293   // multiply the squared sigma onto the squared scattering
0294   scatteringInRegion2 *= sigmaScattering * sigmaScattering;
0295 
0296   // Load the transformed coordinates of the top spacepoint into the thread.
0297   const Details::LinCircle lt =
0298       ACTS_CUDA_MATRIX2D_ELEMENT(topSPLinTransArray, nMiddleSPs, maxMTDublets,
0299                                  middleIndex, topDubletIndex);
0300 
0301   // Load the parameters of the middle spacepoint into the thread.
0302   const Details::SpacePoint spM = middleSPs[middleIndex];
0303 
0304   // add errors of spB-spM and spM-spT pairs and add the correlation term
0305   // for errors on spM
0306   float error2 =
0307       lt.Er + lb.Er +
0308       2 * (lb.cotTheta * lt.cotTheta * spM.varianceR + spM.varianceZ) *
0309           lb.iDeltaR * lt.iDeltaR;
0310 
0311   float deltaCotTheta = lb.cotTheta - lt.cotTheta;
0312   float deltaCotTheta2 = deltaCotTheta * deltaCotTheta;
0313   float dCotThetaMinusError2 = 0.0f;
0314 
0315   // if the error is larger than the difference in theta, no need to
0316   // compare with scattering
0317   if (deltaCotTheta2 - error2 > 0) {
0318     deltaCotTheta = fabs(deltaCotTheta);
0319     // if deltaTheta larger than the scattering for the lower pT cut, skip
0320     float error = sqrtf(error2);
0321     dCotThetaMinusError2 = deltaCotTheta2 + error2 - 2 * deltaCotTheta * error;
0322     // avoid taking root of scatteringInRegion
0323     // if left side of ">" is positive, both sides of inequality can be
0324     // squared
0325     // (scattering is always positive)
0326     if (dCotThetaMinusError2 > scatteringInRegion2) {
0327       return;
0328     }
0329   }
0330 
0331   // protects against division by 0
0332   float dU = lt.U - lb.U;
0333   if (dU == 0.) {
0334     return;
0335   }
0336   // A and B are evaluated as a function of the circumference parameters
0337   // x_0 and y_0
0338   float A = (lt.V - lb.V) / dU;
0339   float S2 = 1. + A * A;
0340   float B = lb.V - A * lb.U;
0341   float B2 = B * B;
0342   // sqrt(S2)/B = 2 * helixradius
0343   // calculated radius must not be smaller than minimum radius
0344   if (S2 < B2 * minHelixDiameter2) {
0345     return;
0346   }
0347   // 1/helixradius: (B/sqrt(S2))/2 (we leave everything squared)
0348   float iHelixDiameter2 = B2 / S2;
0349   // calculate scattering for p(T) calculated from seed curvature
0350   float pT2scatter = 4 * iHelixDiameter2 * pT2perRadius;
0351   // TODO: include upper pT limit for scatter calc
0352   // convert p(T) to p scaling by sin^2(theta) AND scale by 1/sin^4(theta)
0353   // from rad to deltaCotTheta
0354   float p2scatter = pT2scatter * iSinTheta2;
0355   // if deltaTheta larger than allowed scattering for calculated pT, skip
0356   if ((deltaCotTheta2 - error2 > 0) &&
0357       (dCotThetaMinusError2 > p2scatter * sigmaScattering * sigmaScattering)) {
0358     return;
0359   }
0360   // A and B allow calculation of impact params in U/V plane with linear
0361   // function
0362   // (in contrast to having to solve a quadratic function in x/y plane)
0363   float Im = fabs((A - B * spM.radius) * spM.radius);
0364 
0365   // Check if the triplet candidate should be accepted.
0366   if (Im > impactMax) {
0367     return;
0368   }
0369 
0370   // Reserve elements (positions) in the global matrices/arrays.
0371   unsigned int* tripletIndexRowPtr = &(ACTS_CUDA_MATRIX2D_ELEMENT(
0372       tripletsPerBottomDublet, nParallelMiddleSPs, maxMBDublets,
0373       middleIndexOffset, bottomDubletIndex));
0374   const unsigned int tripletIndexRow = atomicAdd(tripletIndexRowPtr, 1);
0375   assert(tripletIndexRow < maxMTDublets);
0376   const unsigned int tripletIndex = atomicAdd(tripletCount, 1);
0377   assert(tripletIndex < maxTriplets);
0378 
0379   // Collect the maximal value of tripletIndexRow + 1 (since we want the
0380   // count, not the index values) for the next kernel.
0381   atomicMax(maxTripletsPerSpB, tripletIndexRow + 1);
0382 
0383   // Save the index of the triplet candidate, which will be created now.
0384   ACTS_CUDA_MATRIX3D_ELEMENT(tripletIndices, nParallelMiddleSPs, maxMBDublets,
0385                              maxMTDublets, middleIndexOffset, bottomDubletIndex,
0386                              tripletIndexRow) = tripletIndex;
0387 
0388   // Now store the triplet in the above mentioned location.
0389   Details::Triplet triplet = {bottomIndex,   middleIndex,
0390                               topIndex,      Im,
0391                               B / sqrtf(S2), -(Im * impactWeightFactor)};
0392   triplets[tripletIndex] = triplet;
0393 
0394   return;
0395 }
0396 
0397 /// Kernel performing the "2 fixed spacepoint filtering" of the triplets
0398 ///
0399 /// @param[in] seedWeight Pointer to the user-provided seed weight calculating
0400 ///            function
0401 /// @param[in] singleSeedCut Pointer to the user-provided seed filtering
0402 ///            function
0403 /// @param[in] middleIndexStart The middle spacepoint index that the kernel was
0404 ///            "started from"
0405 /// @param[in] maxMBDublets The maximal number of middle-bottom dublets found
0406 ///            for any middle spacepoint
0407 /// @param[in] maxMTDublets The maximal number of middle-top dublets found for
0408 ///            any middle spacepoint
0409 /// @param[in] maxTriplets The maximum number of triplets for which memory is
0410 ///            booked
0411 /// @param[in] nAllTriplets The number of triplets that were reconstructed for
0412 ///            this middle spacepoint group
0413 /// @param[in] nParallelMiddleSPs The number of middle spacepoints that the
0414 ///            "largest" kernels may be started on in parallel
0415 /// @param[in] nMiddleSPsProcessed The number of middle spacepoints that the
0416 ///            kernel was started on in parallel
0417 /// @param[in] middleBottomCounts 1-D array of the number of middle-bottom
0418 ///            dublets found for each middle spacepoint
0419 /// @param[in] nBottomSPs The number of bottom spacepoints in @c bottomSPs
0420 /// @param[in] bottomSPs Properties of all of the bottom spacepoints
0421 /// @param[in] nMiddleSPs The number of middle spacepoints in @c middleSPs
0422 /// @param[in] middleSPs Properties of all of the middle spacepoints
0423 /// @param[in] nTopSPs The number of top spacepoints in @c topSPs
0424 /// @param[in] topSPs Properties of all of the top spacepoints
0425 /// @param[in] tripletsPerBottomDublet 1-dimensional array of the triplet
0426 ///            counts for each bottom spacepoint
0427 /// @param[in] tripletIndices 2-dimensional matrix of the indices of the
0428 ///            triplets created for each middle-bottom spacepoint dublet
0429 /// @param[in] allTriplets 1-dimensional array of all the found triplets
0430 /// @param[in] deltaInvHelixDiameter Parameter from @c Acts::SeedFilterConfig
0431 /// @param[in] deltaRMin Parameter from @c Acts::SeedFilterConfig
0432 /// @param[in] compatSeedWeight Parameter from @c Acts::SeedFilterConfig
0433 /// @param[in] compatSeedLimit Parameter from @c Acts::SeedFilterConfig
0434 /// @param[out] nFilteredTriplets Pointer to the scalar counting all triplets
0435 ///             that survive this filter
0436 /// @param[out] filteredTriplets 1-dimensional array of triplets that survive
0437 ///             this filter
0438 ///
0439 __global__ void filterTriplets2Sp(
0440     TripletFilterConfig::seedWeightFunc_t seedWeight,
0441     TripletFilterConfig::singleSeedCutFunc_t singleSeedCut,
0442     std::size_t middleIndexStart, unsigned int maxMBDublets,
0443     unsigned int maxMTDublets, unsigned int maxTriplets,
0444     unsigned int nAllTriplets, std::size_t nParallelMiddleSPs,
0445     std::size_t nMiddleSPsProcessed, unsigned int* middleBottomCounts,
0446     std::size_t nBottomSPs, const Details::SpacePoint* bottomSPs,
0447     std::size_t nMiddleSPs, const Details::SpacePoint* middleSPs,
0448     std::size_t nTopSPs, const Details::SpacePoint* topSPs,
0449     const unsigned int* tripletsPerBottomDublet,
0450     const std::size_t* tripletIndices, const Details::Triplet* allTriplets,
0451     float deltaInvHelixDiameter, float deltaRMin, float compatSeedWeight,
0452     std::size_t compatSeedLimit, unsigned int* nFilteredTriplets,
0453     Details::Triplet* filteredTriplets) {
0454   // Sanity checks.
0455   assert(seedWeight != nullptr);
0456   assert(singleSeedCut != nullptr);
0457   assert(middleIndexStart + nMiddleSPsProcessed <= nMiddleSPs);
0458 
0459   // Find the middle spacepoint index to operate on.
0460   const unsigned int middleIndexOffset = blockIdx.x * blockDim.x + threadIdx.x;
0461   if (middleIndexOffset >= nMiddleSPsProcessed) {
0462     return;
0463   }
0464   const unsigned int middleIndex = middleIndexStart + middleIndexOffset;
0465   assert(middleIndex < nMiddleSPs);
0466 
0467   // Find the middle-bottom dublet to operate on.
0468   const unsigned int middleBottomPairCount = middleBottomCounts[middleIndex];
0469   const unsigned int bottomDubletIndex = blockIdx.y * blockDim.y + threadIdx.y;
0470   if (bottomDubletIndex >= middleBottomPairCount) {
0471     return;
0472   }
0473 
0474   // Find the triplet to operate on.
0475   const unsigned int nTripletsForMiddleBottom = ACTS_CUDA_MATRIX2D_ELEMENT(
0476       tripletsPerBottomDublet, nParallelMiddleSPs, maxMBDublets,
0477       middleIndexOffset, bottomDubletIndex);
0478   const unsigned int tripletCandidateIndex =
0479       blockIdx.z * blockDim.z + threadIdx.z;
0480   if (tripletCandidateIndex >= nTripletsForMiddleBottom) {
0481     return;
0482   }
0483 
0484   // Get the index of this triplet.
0485   const std::size_t triplet1Index = ACTS_CUDA_MATRIX3D_ELEMENT(
0486       tripletIndices, nParallelMiddleSPs, maxMBDublets, maxMTDublets,
0487       middleIndexOffset, bottomDubletIndex, tripletCandidateIndex);
0488   assert(triplet1Index < nAllTriplets);
0489 
0490   // Load this triplet into the thread.
0491   Details::Triplet triplet1 = allTriplets[triplet1Index];
0492 
0493   // Pre-compute some variables.
0494   float lowerLimitCurv = triplet1.invHelixDiameter - deltaInvHelixDiameter;
0495   float upperLimitCurv = triplet1.invHelixDiameter + deltaInvHelixDiameter;
0496   float currentTop_r = topSPs[triplet1.topIndex].radius;
0497 
0498   // Allow only a maximum number of top spacepoints in the filtering. Since a
0499   // limit is coming from @c compatSeedLimit anyway, this could potentially be
0500   // re-written with an array allocation, instead of statically defining the
0501   // array's size.
0502   static constexpr std::size_t MAX_TOP_SP = 10;
0503   assert(compatSeedLimit < MAX_TOP_SP);
0504   float compatibleSeedR[MAX_TOP_SP];
0505   std::size_t nCompatibleSeedR = 0;
0506 
0507   // Loop over all the other triplets found for this bottom-middle dublet.
0508   for (std::size_t i = 0; i < nTripletsForMiddleBottom; ++i) {
0509     // Don't consider the same triplet that the thread is evaluating in the
0510     // first place.
0511     if (i == tripletCandidateIndex) {
0512       continue;
0513     }
0514     // Get the index of the second triplet.
0515     const std::size_t triplet2Index = ACTS_CUDA_MATRIX3D_ELEMENT(
0516         tripletIndices, nParallelMiddleSPs, maxMBDublets, maxMTDublets,
0517         middleIndexOffset, bottomDubletIndex, i);
0518     assert(triplet2Index < nAllTriplets);
0519     assert(triplet2Index != triplet1Index);
0520 
0521     // Load the second triplet into the thread.
0522     const Details::Triplet triplet2 = allTriplets[triplet2Index];
0523     assert(triplet1.bottomIndex == triplet2.bottomIndex);
0524 
0525     // compared top SP should have at least deltaRMin distance
0526     float otherTop_r = topSPs[triplet2.topIndex].radius;
0527     float deltaR = currentTop_r - otherTop_r;
0528     if (fabs(deltaR) < deltaRMin) {
0529       continue;
0530     }
0531 
0532     // curvature difference within limits?
0533     // TODO: how much slower than sorting all vectors by curvature
0534     // and breaking out of loop? i.e. is vector size large (e.g. in jets?)
0535     if (triplet2.invHelixDiameter < lowerLimitCurv) {
0536       continue;
0537     }
0538     if (triplet2.invHelixDiameter > upperLimitCurv) {
0539       continue;
0540     }
0541 
0542     bool newCompSeed = true;
0543     for (std::size_t k = 0; k < nCompatibleSeedR; ++k) {
0544       // original ATLAS code uses higher min distance for 2nd found compatible
0545       // seed (20mm instead of 5mm)
0546       // add new compatible seed only if distance larger than rmin to all
0547       // other compatible seeds
0548       if (fabs(compatibleSeedR[k] - otherTop_r) < deltaRMin) {
0549         newCompSeed = false;
0550         break;
0551       }
0552     }
0553     if (newCompSeed) {
0554       compatibleSeedR[nCompatibleSeedR++] = otherTop_r;
0555       assert(nCompatibleSeedR < MAX_TOP_SP);
0556       triplet1.weight += compatSeedWeight;
0557     }
0558     if (nCompatibleSeedR >= compatSeedLimit) {
0559       break;
0560     }
0561   }
0562 
0563   // Decide whether to keep the triplet or not.
0564   triplet1.weight +=
0565       seedWeight(bottomSPs[triplet1.bottomIndex], middleSPs[middleIndex],
0566                  topSPs[triplet1.topIndex]);
0567   if (!singleSeedCut(triplet1.weight, bottomSPs[triplet1.bottomIndex],
0568                      middleSPs[middleIndex], topSPs[triplet1.topIndex])) {
0569     return;
0570   }
0571 
0572   // Put the triplet into the "filtered list".
0573   const unsigned int tripletRow = atomicAdd(nFilteredTriplets, 1);
0574   assert(tripletRow < nAllTriplets);
0575   filteredTriplets[tripletRow] = triplet1;
0576   return;
0577 }
0578 
0579 }  // namespace Kernels
0580 
0581 namespace Details {
0582 
0583 std::vector<std::vector<Triplet>> findTriplets(
0584     const Info::Device& device, std::size_t maxBlockSize,
0585     const DubletCounts& dubletCounts, const SeedFilterConfig& seedConfig,
0586     const TripletFilterConfig& filterConfig, std::size_t nBottomSPs,
0587     const device_array<SpacePoint>& bottomSPs, std::size_t nMiddleSPs,
0588     const device_array<SpacePoint>& middleSPs, std::size_t nTopSPs,
0589     const device_array<SpacePoint>& topSPs,
0590     const device_array<unsigned int>& middleBottomCounts,
0591     const device_array<std::size_t>& middleBottomDublets,
0592     const device_array<unsigned int>& middleTopCounts,
0593     const device_array<std::size_t>& middleTopDublets,
0594     float maxScatteringAngle2, float sigmaScattering, float minHelixDiameter2,
0595     float pT2perRadius, float impactMax) {
0596   // Calculate the parallelisation for the parameter transformation.
0597   const int numBlocksLT =
0598       (dubletCounts.nDublets + maxBlockSize - 1) / maxBlockSize;
0599 
0600   // Create the arrays holding the linear transformed spacepoint parameters.
0601   auto bottomSPLinTransArray =
0602       make_device_array<LinCircle>(nMiddleSPs * dubletCounts.maxMBDublets);
0603   auto topSPLinTransArray =
0604       make_device_array<LinCircle>(nMiddleSPs * dubletCounts.maxMTDublets);
0605 
0606   // Launch the coordinate transformations.
0607   Kernels::transformCoordinates<<<numBlocksLT, maxBlockSize>>>(
0608       dubletCounts.nDublets, dubletCounts.maxMBDublets,
0609       dubletCounts.maxMTDublets, nBottomSPs, bottomSPs.get(), nMiddleSPs,
0610       middleSPs.get(), nTopSPs, topSPs.get(), middleBottomCounts.get(),
0611       middleBottomDublets.get(), middleTopCounts.get(), middleTopDublets.get(),
0612       bottomSPLinTransArray.get(), topSPLinTransArray.get());
0613   ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0614   ACTS_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0615 
0616   // With the information from @c Acts::Cuda::Details::DubletCounts, figure out
0617   // how many middle spacepoints we could handle at the same time in the triplet
0618   // finding/filtering.
0619 
0620   // For one middle spacepoint we need the following amount:
0621   const std::size_t memorySizePerMiddleSP =
0622       // First let's consider the storage of the triplet objects themselves.
0623       2 * dubletCounts.maxTriplets * sizeof(Triplet) +
0624       // Then the objects holding indices to the triplets per middle-bottom
0625       // dublet.
0626       dubletCounts.maxMBDublets * sizeof(unsigned int) +
0627       dubletCounts.maxMBDublets * dubletCounts.maxMTDublets *
0628           sizeof(std::size_t) +
0629       // Finally the array holding the filtered triplet counts per middle
0630       // spacepoint.
0631       sizeof(unsigned int);
0632 
0633   // See how many we can fit into the (still) available memory.
0634   const std::size_t nParallelMiddleSPs =
0635       std::min(MemoryManager::instance().availableMemory(device.id) /
0636                    memorySizePerMiddleSP,
0637                nMiddleSPs);
0638   assert(nParallelMiddleSPs > 0);
0639 
0640   // Helper variables for handling the various object counts in device memory.
0641   enum ObjectCountType : int {
0642     AllTriplets = 0,        ///< All viable triplets
0643     FilteredTriplets = 1,   ///< Triplets after the "2SpFixed" filtering
0644     MaxTripletsPerSpB = 2,  ///< Maximal number of triplets found per SpB
0645     NObjectCountTypes = 3   ///< The number of different object/counter types
0646   };
0647 
0648   // Set up the object counters in device memory. The host array is only used to
0649   // reset the device memory before every iteration.
0650   auto objectCountsHostNull = make_host_array<unsigned int>(NObjectCountTypes);
0651   memset(objectCountsHostNull.get(), 0,
0652          NObjectCountTypes * sizeof(unsigned int));
0653   auto objectCountsHost = make_host_array<unsigned int>(NObjectCountTypes);
0654   auto objectCounts = make_device_array<unsigned int>(NObjectCountTypes);
0655 
0656   // Allocate enough memory for triplet candidates that would suffice for every
0657   // middle spacepoint.
0658   auto allTriplets =
0659       make_device_array<Triplet>(nParallelMiddleSPs * dubletCounts.maxTriplets);
0660   auto filteredTriplets =
0661       make_device_array<Triplet>(nParallelMiddleSPs * dubletCounts.maxTriplets);
0662   auto filteredTripletsHost =
0663       make_host_array<Triplet>(nParallelMiddleSPs * dubletCounts.maxTriplets);
0664 
0665   // Allocate and initialise the array holding the per bottom dublet triplet
0666   // numbers.
0667   auto tripletsPerBottomDubletHost = make_host_array<unsigned int>(
0668       nParallelMiddleSPs * dubletCounts.maxMBDublets);
0669   memset(tripletsPerBottomDubletHost.get(), 0,
0670          nParallelMiddleSPs * dubletCounts.maxMBDublets * sizeof(unsigned int));
0671   auto tripletsPerBottomDublet = make_device_array<unsigned int>(
0672       nParallelMiddleSPs * dubletCounts.maxMBDublets);
0673 
0674   // Allocate the array holding the indices of the triplets found for a given
0675   // bottom-middle spacepoint combination.
0676   auto tripletIndices = make_device_array<std::size_t>(
0677       nParallelMiddleSPs * dubletCounts.maxMBDublets *
0678       dubletCounts.maxMTDublets);
0679 
0680   // Allocate and initialise the arrays holding the per-middle-spacepoint
0681   // filtered triplet counts.
0682   auto filteredTripletCountsHostNull =
0683       make_host_array<unsigned int>(nParallelMiddleSPs);
0684   memset(filteredTripletCountsHostNull.get(), 0,
0685          nParallelMiddleSPs * sizeof(unsigned int));
0686   auto filteredTripletCountsHost =
0687       make_host_array<unsigned int>(nParallelMiddleSPs);
0688   auto filteredTripletCounts =
0689       make_device_array<unsigned int>(nParallelMiddleSPs);
0690 
0691   // Block size used in the triplet finding.
0692   const std::size_t blockSize = std::sqrt(maxBlockSize);
0693 
0694   // Create the result object.
0695   std::vector<std::vector<Triplet>> result(nMiddleSPs);
0696 
0697   // Copy the dublet counts back to the host.
0698   auto middleBottomCountsHost = make_host_array<unsigned int>(nMiddleSPs);
0699   copyToHost(middleBottomCountsHost, middleBottomCounts, nMiddleSPs);
0700   auto middleTopCountsHost = make_host_array<unsigned int>(nMiddleSPs);
0701   copyToHost(middleTopCountsHost, middleTopCounts, nMiddleSPs);
0702 
0703   // Execute the triplet finding and filtering in the maximal allowed groups of
0704   // middle spacepoints.
0705   for (std::size_t middleIndex = 0; middleIndex < nMiddleSPs;
0706        middleIndex += nParallelMiddleSPs) {
0707     // Reset the device arrays.
0708     copyToDevice(objectCounts, objectCountsHostNull, NObjectCountTypes);
0709     copyToDevice(tripletsPerBottomDublet, tripletsPerBottomDubletHost,
0710                  nParallelMiddleSPs * dubletCounts.maxMBDublets);
0711 
0712     // The number of middle spacepoints to process in this iteration.
0713     const std::size_t nMiddleSPsProcessed =
0714         std::min(nParallelMiddleSPs, nMiddleSPs - middleIndex);
0715 
0716     // Calculate the parallelisation for the triplet finding for this collection
0717     // of middle spacepoints.
0718     const dim3 blockSizeFT(1, maxBlockSize);
0719     const dim3 numBlocksFT(
0720         (nMiddleSPsProcessed + blockSizeFT.x - 1) / blockSizeFT.x,
0721         (dubletCounts.maxTriplets + blockSizeFT.y - 1) / blockSizeFT.y);
0722     assert(dubletCounts.maxTriplets > 0);
0723 
0724     // Launch the triplet finding for this middle spacepoint.
0725     Kernels::findTriplets<<<numBlocksFT, blockSizeFT>>>(
0726         // Parameters needed to use all the arrays.
0727         middleIndex, dubletCounts.maxMBDublets, dubletCounts.maxMTDublets,
0728         dubletCounts.maxTriplets, nParallelMiddleSPs, nMiddleSPsProcessed,
0729         // Parameters of all of the spacepoints.
0730         nBottomSPs, bottomSPs.get(), nMiddleSPs, middleSPs.get(), nTopSPs,
0731         topSPs.get(),
0732         // Arrays describing the identified dublets.
0733         middleBottomCounts.get(), middleBottomDublets.get(),
0734         middleTopCounts.get(), middleTopDublets.get(),
0735         // The transformed parameters of the bottom and top spacepoints for
0736         // spacepoints taking part in dublets.
0737         bottomSPLinTransArray.get(), topSPLinTransArray.get(),
0738         // Configuration constants.
0739         maxScatteringAngle2, sigmaScattering, minHelixDiameter2, pT2perRadius,
0740         impactMax, seedConfig.impactWeightFactor,
0741         // Variables storing the results of the triplet finding.
0742         tripletsPerBottomDublet.get(), tripletIndices.get(),
0743         objectCounts.get() + MaxTripletsPerSpB,
0744         objectCounts.get() + AllTriplets, allTriplets.get());
0745     ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0746     ACTS_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0747 
0748     // Retrieve the object counts.
0749     copyToHost(objectCountsHost, objectCounts, NObjectCountTypes);
0750     const unsigned int nAllTriplets = objectCountsHost.get()[AllTriplets];
0751     const unsigned int nMaxTripletsPerSpB =
0752         objectCountsHost.get()[MaxTripletsPerSpB];
0753 
0754     // If no triplet has been found, stop here for this middle spacepoint range.
0755     if (nAllTriplets == 0) {
0756       continue;
0757     }
0758 
0759     // Calculate the parallelisation for the "2SpFixed" filtering of the
0760     // triplets.
0761     const dim3 blockSizeF2SP(1, blockSize, blockSize);
0762     const dim3 numBlocksF2SP(
0763         (nMiddleSPsProcessed + blockSizeF2SP.x - 1) / blockSizeF2SP.x,
0764         (dubletCounts.maxMBDublets + blockSizeF2SP.y - 1) / blockSizeF2SP.y,
0765         (nMaxTripletsPerSpB + blockSizeF2SP.z - 1) / blockSizeF2SP.z);
0766     assert(dubletCounts.maxMBDublets > 0);
0767     assert(nMaxTripletsPerSpB > 0);
0768 
0769     // Launch the "2SpFixed" filtering of the triplets.
0770     assert(filterConfig.seedWeight != nullptr);
0771     assert(filterConfig.singleSeedCut != nullptr);
0772     Kernels::filterTriplets2Sp<<<numBlocksF2SP, blockSizeF2SP>>>(
0773         // Pointers to the user provided filter functions.
0774         filterConfig.seedWeight, filterConfig.singleSeedCut,
0775         // Parameters needed to use all the arrays.
0776         middleIndex, dubletCounts.maxMBDublets, dubletCounts.maxMTDublets,
0777         dubletCounts.maxTriplets, nAllTriplets, nParallelMiddleSPs,
0778         nMiddleSPsProcessed, middleBottomCounts.get(),
0779         // Parameters of all of the spacepoints.
0780         nBottomSPs, bottomSPs.get(), nMiddleSPs, middleSPs.get(), nTopSPs,
0781         topSPs.get(),
0782         // Variables holding the results of the triplet finding.
0783         tripletsPerBottomDublet.get(), tripletIndices.get(), allTriplets.get(),
0784         // Configuration constants.
0785         seedConfig.deltaInvHelixDiameter, seedConfig.deltaRMin,
0786         seedConfig.compatSeedWeight, seedConfig.compatSeedLimit,
0787         // Variables storing the results of the filtering.
0788         objectCounts.get() + FilteredTriplets, filteredTriplets.get());
0789     ACTS_CUDA_ERROR_CHECK(cudaGetLastError());
0790     ACTS_CUDA_ERROR_CHECK(cudaDeviceSynchronize());
0791 
0792     // Retrieve the result counts of the filtering.
0793     copyToHost(objectCountsHost, objectCounts, NObjectCountTypes);
0794 
0795     // The number of triplets that survived the 2Sp filtering.
0796     const unsigned int nFilteredTriplets =
0797         objectCountsHost.get()[FilteredTriplets];
0798     if (nFilteredTriplets == 0) {
0799       continue;
0800     }
0801 
0802     // Move the filtered triplets back to the host for the final selection.
0803     ACTS_CUDA_ERROR_CHECK(cudaMemcpy(
0804         filteredTripletsHost.get(), filteredTriplets.get(),
0805         nFilteredTriplets * sizeof(Triplet), cudaMemcpyDeviceToHost));
0806 
0807     // Fill the output variable.
0808     for (std::size_t i = 0; i < nFilteredTriplets; ++i) {
0809       // Access the triplet.
0810       const Triplet& triplet = filteredTripletsHost.get()[i];
0811       // Put it into the output object.
0812       result[triplet.middleIndex].push_back(triplet);
0813     }
0814   }
0815 
0816   // Return the indices of all identified triplets.
0817   assert(result.size() == nMiddleSPs);
0818   return result;
0819 }
0820 
0821 }  // namespace Details
0822 }  // namespace Cuda
0823 }  // namespace Acts