Plugins/Gnn/ConnectedComponentCudaTests.cu

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 #include <boost/test/unit_test.hpp>
0010
0011 #include "ActsPlugins/Gnn/detail/ConnectedComponents.cuh"
0012
0013 #include <filesystem>
0014 #include <fstream>
0015 #include <random>
0016 #include <ranges>
0017 #include <set>
0018 #include <vector>
0019
0020 #include <boost/graph/adjacency_list.hpp>
0021 #include <boost/graph/connected_components.hpp>
0022
0023 using namespace ActsPlugins::detail;
0024
0025 using BoostGraph =
0026     boost::adjacency_list<boost::vecS, boost::vecS, boost::undirectedS>;
0027
0028 using Vi = std::vector<int>;
0029
0030 Vi checkLabeling(const std::vector<int> &src, const std::vector<int> &tgt) {
0031   std::size_t numNodes = std::max(*std::max_element(src.begin(), src.end()),
0032                                   *std::max_element(tgt.begin(), tgt.end())) +
0033                          1;
0034
0035   int *cudaSrc, *cudaTgt;
0036   BOOST_REQUIRE_EQUAL(cudaMalloc(&cudaSrc, src.size() * sizeof(int)),
0037                       cudaSuccess);
0038   BOOST_REQUIRE_EQUAL(cudaMalloc(&cudaTgt, tgt.size() * sizeof(int)),
0039                       cudaSuccess);
0040   BOOST_REQUIRE_EQUAL(cudaMemcpy(cudaSrc, src.data(), src.size() * sizeof(int),
0041                                  cudaMemcpyHostToDevice),
0042                       cudaSuccess);
0043   BOOST_REQUIRE_EQUAL(cudaMemcpy(cudaTgt, tgt.data(), src.size() * sizeof(int),
0044                                  cudaMemcpyHostToDevice),
0045                       cudaSuccess);
0046
0047   int *cudaLabels;
0048   BOOST_REQUIRE_EQUAL(cudaMalloc(&cudaLabels, numNodes * sizeof(int)),
0049                       cudaSuccess);
0050   int *cudaLabelsNext;
0051   BOOST_REQUIRE_EQUAL(cudaMalloc(&cudaLabelsNext, numNodes * sizeof(int)),
0052                       cudaSuccess);
0053
0054   labelConnectedComponents<<<1, 1024>>>(src.size(), cudaSrc, cudaTgt, numNodes,
0055                                         cudaLabels, cudaLabelsNext);
0056
0057   std::vector<int> labelsFromCuda(numNodes);
0058   BOOST_REQUIRE_EQUAL(
0059       cudaMemcpy(labelsFromCuda.data(), cudaLabels, numNodes * sizeof(int),
0060                  cudaMemcpyDeviceToHost),
0061       cudaSuccess);
0062
0063   BoostGraph G(numNodes);
0064
0065   for (auto i = 0ul; i < src.size(); ++i) {
0066     boost::add_edge(src[i], tgt[i], G);
0067   }
0068
0069   std::vector<std::size_t> cpuLabels(numNodes);
0070   boost::connected_components(G, &cpuLabels[0]);
0071
0072   // print
0073   std::cout << "cpu labels:     ";
0074   for (auto i = 0ul; i < numNodes; ++i) {
0075     std::cout << cpuLabels[i] << " ";
0076   }
0077   std::cout << std::endl;
0078
0079   std::cout << "my CUDA labels: ";
0080   for (auto i = 0ul; i < numNodes; ++i) {
0081     std::cout << labelsFromCuda[i] << " ";
0082   }
0083   std::cout << std::endl;
0084
0085   // check systematically
0086   std::map<int, int> boostToCuda;
0087   for (auto i = 0ul; i < numNodes; ++i) {
0088     if (boostToCuda.contains(cpuLabels[i])) {
0089       BOOST_CHECK_EQUAL(labelsFromCuda[i], boostToCuda.at(cpuLabels[i]));
0090     } else {
0091       auto [it, success] =
0092           boostToCuda.insert({cpuLabels[i], labelsFromCuda[i]});
0093       BOOST_CHECK(success);
0094     }
0095   }
0096
0097   return labelsFromCuda;
0098 }
0099
0100 BOOST_AUTO_TEST_CASE(simple_test_1) {
0101   Vi src{0, 1, 2, 3};
0102   Vi tgt{1, 2, 3, 4};
0103   checkLabeling(src, tgt);
0104 }
0105
0106 BOOST_AUTO_TEST_CASE(simple_test_2) {
0107   Vi src{0, 1, 2, 4, 5, 6};
0108   Vi tgt{1, 2, 3, 5, 6, 7};
0109   checkLabeling(src, tgt);
0110 }
0111
0112 BOOST_AUTO_TEST_CASE(simple_test_3) {
0113   Vi src{4, 3, 2, 1};
0114   Vi tgt{3, 2, 1, 0};
0115   checkLabeling(src, tgt);
0116 }
0117
0118 void testRelabeling(const Vi &labels, const Vi &refLabelMask,
0119                     const Vi &refPrefixSum, const Vi &refLabels) {
0120   dim3 blockDim = 32;
0121   dim3 gridDim = (labels.size() + blockDim.x - 1) / blockDim.x;
0122
0123   // Copy labels to device
0124   int *cudaLabels;
0125   BOOST_REQUIRE_EQUAL(cudaMalloc(&cudaLabels, labels.size() * sizeof(int)),
0126                       cudaSuccess);
0127   BOOST_REQUIRE_EQUAL(
0128       cudaMemcpy(cudaLabels, labels.data(), labels.size() * sizeof(int),
0129                  cudaMemcpyHostToDevice),
0130       cudaSuccess);
0131
0132   // Init label mask
0133   int *cudaLabelMask;
0134   BOOST_REQUIRE_EQUAL(cudaMalloc(&cudaLabelMask, labels.size() * sizeof(int)),
0135                       cudaSuccess);
0136   BOOST_REQUIRE_EQUAL(cudaMemset(cudaLabelMask, 0, labels.size() * sizeof(int)),
0137                       cudaSuccess);
0138
0139   makeLabelMask<<<1, 256>>>(labels.size(), cudaLabels, cudaLabelMask);
0140   BOOST_REQUIRE_EQUAL(cudaDeviceSynchronize(), cudaSuccess);
0141
0142   std::vector<int> labelMask(labels.size());
0143   BOOST_REQUIRE_EQUAL(
0144       cudaMemcpy(labelMask.data(), cudaLabelMask,
0145                  labelMask.size() * sizeof(int), cudaMemcpyDeviceToHost),
0146       cudaSuccess);
0147
0148   BOOST_CHECK_EQUAL_COLLECTIONS(labelMask.begin(), labelMask.end(),
0149                                 refLabelMask.begin(), refLabelMask.end());
0150
0151   // Prefix sum
0152   int *cudaPrefixSum;
0153   BOOST_REQUIRE_EQUAL(cudaMalloc(&cudaPrefixSum, labels.size() * sizeof(int)),
0154                       cudaSuccess);
0155   thrust::exclusive_scan(thrust::device.on(0), cudaLabelMask,
0156                          cudaLabelMask + labels.size(), cudaPrefixSum);
0157
0158   Vi prefixSum(labels.size());
0159   BOOST_REQUIRE_EQUAL(
0160       cudaMemcpy(prefixSum.data(), cudaPrefixSum, labels.size() * sizeof(int),
0161                  cudaMemcpyDeviceToHost),
0162       cudaSuccess);
0163   BOOST_CHECK_EQUAL_COLLECTIONS(prefixSum.begin(), prefixSum.end(),
0164                                 refPrefixSum.begin(), refPrefixSum.end());
0165
0166   // Relabel
0167   mapEdgeLabels<<<1, 256>>>(labels.size(), cudaLabels, cudaPrefixSum);
0168   BOOST_REQUIRE_EQUAL(cudaDeviceSynchronize(), cudaSuccess);
0169
0170   std::vector<int> labelsFromCuda(labels.size());
0171   BOOST_REQUIRE_EQUAL(
0172       cudaMemcpy(labelsFromCuda.data(), cudaLabels, labels.size() * sizeof(int),
0173                  cudaMemcpyDeviceToHost),
0174       cudaSuccess);
0175
0176   BOOST_CHECK_EQUAL_COLLECTIONS(labelsFromCuda.begin(), labelsFromCuda.end(),
0177                                 refLabels.begin(), refLabels.end());
0178 }
0179
0180 BOOST_AUTO_TEST_CASE(test_relabeling) {
0181   // clang-format off
0182   Vi labels      {0, 3, 5, 3, 0, 0};
0183   Vi refLabelMask{1, 0, 0, 1, 0, 1};
0184   Vi refPrefixSum{0, 1, 1, 1, 2, 2};
0185   Vi refLabels   {0, 1, 2, 1, 0, 0};
0186   // clang-format on
0187
0188   testRelabeling(labels, refLabelMask, refPrefixSum, refLabels);
0189 }
0190
0191 BOOST_AUTO_TEST_CASE(test_relabeling_2) {
0192   // clang-format off
0193   Vi labels      {1, 3, 5, 3, 1, 1};
0194   Vi refLabelMask{0, 1, 0, 1, 0, 1};
0195   Vi refPrefixSum{0, 0, 1, 1, 2, 2};
0196   Vi refLabels   {0, 1, 2, 1, 0, 0};
0197   // clang-format on
0198
0199   testRelabeling(labels, refLabelMask, refPrefixSum, refLabels);
0200 }
0201
0202 auto makeRandomGraph(std::size_t nodes, std::size_t edges) {
0203   std::default_random_engine rng(2345);
0204   std::uniform_int_distribution<> dist(0, nodes);
0205   std::set<std::pair<int, int>> set;
0206   Vi src(edges), tgt(edges);
0207   for (auto n = 0ul; n < edges; ++n) {
0208     auto a = dist(rng);
0209     auto b = dist(rng);
0210     if (a == b) {
0211       continue;
0212     }
0213     auto s = std::min(a, b);
0214     auto t = std::max(a, b);
0215     auto [it, success] = set.insert({s, t});
0216     if (success) {
0217       src.at(n) = s;
0218       tgt.at(n) = t;
0219     }
0220   }
0221
0222   return std::make_pair(src, tgt);
0223 }
0224
0225 BOOST_AUTO_TEST_CASE(test_random_graph) {
0226   auto [src, tgt] = makeRandomGraph(5, 10);
0227   checkLabeling(src, tgt);
0228 }
0229
0230 void testFullConnectedComponents(const Vi &src, const Vi &tgt) {
0231   const auto nNodes = std::max(*std::max_element(src.begin(), src.end()),
0232                                *std::max_element(tgt.begin(), tgt.end())) +
0233                       1;
0234
0235   // print src and tgt
0236   /*
0237     std::cout << "src: ";
0238     for (int i = 0; i < src.size(); ++i) {
0239       std::cout << src[i] << " ";
0240     }
0241     std::cout << std::endl;
0242     std::cout << "tgt: ";
0243     for (int i = 0; i < tgt.size(); ++i) {
0244       std::cout << tgt[i] << " ";
0245     }
0246     std::cout << std::endl;
0247   */
0248   cudaStream_t stream;
0249   BOOST_REQUIRE_EQUAL(cudaStreamCreate(&stream), cudaSuccess);
0250
0251   // copy src and tgt to device
0252   int *cudaSrc, *cudaTgt;
0253   BOOST_REQUIRE_EQUAL(
0254       cudaMallocAsync(&cudaSrc, src.size() * sizeof(int), stream), cudaSuccess);
0255   BOOST_REQUIRE_EQUAL(
0256       cudaMallocAsync(&cudaTgt, tgt.size() * sizeof(int), stream), cudaSuccess);
0257   BOOST_REQUIRE_EQUAL(
0258       cudaMemcpyAsync(cudaSrc, src.data(), src.size() * sizeof(int),
0259                       cudaMemcpyHostToDevice, stream),
0260       cudaSuccess);
0261   BOOST_REQUIRE_EQUAL(
0262       cudaMemcpyAsync(cudaTgt, tgt.data(), src.size() * sizeof(int),
0263                       cudaMemcpyHostToDevice, stream),
0264       cudaSuccess);
0265
0266   // init label array
0267   int *cudaLabels;
0268   BOOST_REQUIRE_EQUAL(
0269       cudaMallocAsync(&cudaLabels, nNodes * sizeof(int), stream), cudaSuccess);
0270
0271   // run connected components
0272   int cudaNumLabels = connectedComponentsCuda(src.size(), cudaSrc, cudaTgt,
0273                                               nNodes, cudaLabels, stream);
0274   BOOST_REQUIRE_EQUAL(cudaStreamSynchronize(stream), cudaSuccess);
0275
0276   // print message from last cuda error code
0277   std::cout << "CUDA Error msg: " << cudaGetErrorString(cudaPeekAtLastError())
0278             << std::endl;
0279   BOOST_REQUIRE_EQUAL(cudaGetLastError(), cudaSuccess);
0280
0281   // copy labels back
0282   std::vector<int> labelsFromCuda(nNodes);
0283   BOOST_REQUIRE_EQUAL(
0284       cudaMemcpyAsync(labelsFromCuda.data(), cudaLabels, nNodes * sizeof(int),
0285                       cudaMemcpyDeviceToHost, stream),
0286       cudaSuccess);
0287
0288   BOOST_REQUIRE_EQUAL(cudaFreeAsync(cudaSrc, stream), cudaSuccess);
0289   BOOST_REQUIRE_EQUAL(cudaFreeAsync(cudaTgt, stream), cudaSuccess);
0290   BOOST_REQUIRE_EQUAL(cudaFreeAsync(cudaLabels, stream), cudaSuccess);
0291
0292   // sync
0293   BOOST_REQUIRE_EQUAL(cudaStreamSynchronize(stream), cudaSuccess);
0294   BOOST_REQUIRE_EQUAL(cudaStreamDestroy(stream), cudaSuccess);
0295
0296   // print labelsFromCuda
0297   /*
0298       std::cout << "CUDA labels: ";
0299       for (int i = 0; i < nNodes; ++i) {
0300         std::cout << labelsFromCuda[i] << " ";
0301       }
0302       std::cout << std::endl;
0303   */
0304   // run boost graph for comparison
0305
0306   BoostGraph G(nNodes);
0307
0308   for (auto i = 0ul; i < src.size(); ++i) {
0309     boost::add_edge(src[i], tgt[i], G);
0310   }
0311
0312   std::vector<std::size_t> cpuLabels(boost::num_vertices(G));
0313   int cpuNumLabels = boost::connected_components(G, &cpuLabels[0]);
0314
0315   // check
0316   BOOST_CHECK_EQUAL(cudaNumLabels, cpuNumLabels);
0317   BOOST_CHECK_EQUAL_COLLECTIONS(labelsFromCuda.begin(), labelsFromCuda.end(),
0318                                 cpuLabels.begin(), cpuLabels.end());
0319 }
0320
0321 BOOST_AUTO_TEST_CASE(full_test_tiny_graph) {
0322   auto [src, tgt] = makeRandomGraph(5, 10);
0323   testFullConnectedComponents(src, tgt);
0324 }
0325
0326 BOOST_AUTO_TEST_CASE(full_test_small_graph) {
0327   auto [src, tgt] = makeRandomGraph(100, 500);
0328   testFullConnectedComponents(src, tgt);
0329 }
0330
0331 BOOST_AUTO_TEST_CASE(full_test_big_graph) {
0332   for (int i = 0; i < 3; ++i) {
0333     std::cout << "Test graph " << i << std::endl;
0334     auto [src, tgt] = makeRandomGraph(100'000, 500'000);
0335     testFullConnectedComponents(src, tgt);
0336   }
0337 }
0338
0339 BOOST_AUTO_TEST_CASE(test_from_file) {
0340   if (!std::filesystem::exists("edges_cuda_trackbuilding.txt")) {
0341     std::cout << "File edges_cuda_trackbuilding.txt not found" << std::endl;
0342     return;
0343   }
0344
0345   std::ifstream file("edges_cuda_trackbuilding.txt");
0346   std::vector<int> src, tgt;
0347   int a, b;
0348   while (file >> a >> b) {
0349     src.push_back(a);
0350     tgt.push_back(b);
0351   }
0352
0353   testFullConnectedComponents(src, tgt);
0354 }
0355
0356 // try this pathologic case
0357 BOOST_AUTO_TEST_CASE(special_1) {
0358   testFullConnectedComponents({1, 2}, {4, 7});
0359 }
0360
0361 BOOST_AUTO_TEST_CASE(special_2) {
0362   Vi src{1, 2};
0363   Vi tgt{4, 7};
0364   checkLabeling(src, tgt);
0365 }
0366
0367 BOOST_AUTO_TEST_CASE(special_3) {
0368   // clang-format off
0369   Vi labels      {0, 1, 2, 3, 1, 5, 6, 2};
0370   Vi refLabelMask{1, 1, 1, 1, 0, 1, 1, 0};
0371   Vi refPrefixSum{0, 1, 2, 3, 4, 4, 5, 6};
0372   Vi refLabels   {0, 1, 2, 3, 1, 4, 5, 2};
0373   // clang-format on
0374
0375   testRelabeling(labels, refLabelMask, refPrefixSum, refLabels);
0376 }
0377
0378 BOOST_AUTO_TEST_CASE(special_4) {
0379   Vi src{1, 2};
0380   Vi tgt{4, 7};
0381
0382   auto labelsFromCuda = checkLabeling(src, tgt);
0383
0384   Vi refLabelMask{1, 1, 1, 1, 0, 1, 1, 0};
0385   Vi refPrefixSum{0, 1, 2, 3, 4, 4, 5, 6};
0386   Vi refLabels{0, 1, 2, 3, 1, 4, 5, 2};
0387
0388   testRelabeling(labelsFromCuda, refLabelMask, refPrefixSum, refLabels);
0389 }
0390
0391 BOOST_AUTO_TEST_CASE(find_bounds) {
0392   Vi labels{0, 1, 2, 1, 2, 3, 0, 3, 0};
0393   Vi spids{0, 1, 2, 3, 4, 5, 6, 7, 8};
0394   int numberLabels = 4;
0395
0396   const std::vector<Vi> expectedTrackCandidates{
0397       {0, 6, 8},  // label 0
0398       {1, 3},     // label 1
0399       {2, 4},     // label 2
0400       {5, 7}      // label 3
0401   };
0402
0403   const Vi expectedBounds{0, 3, 5, 7, 9};  // bounds for labels 0, 1, 2, 3
0404
0405   cudaStream_t stream{};
0406   BOOST_REQUIRE_EQUAL(cudaStreamCreate(&stream), cudaSuccess);
0407
0408   // Copy spacepoint IDs to device
0409   int *cudaSpacepointIDs{};
0410   BOOST_REQUIRE_EQUAL(
0411       cudaMallocAsync(&cudaSpacepointIDs, spids.size() * sizeof(int), stream),
0412       cudaSuccess);
0413   BOOST_REQUIRE_EQUAL(cudaMemcpyAsync(cudaSpacepointIDs, spids.data(),
0414                                       spids.size() * sizeof(int),
0415                                       cudaMemcpyHostToDevice, stream),
0416                       cudaSuccess);
0417
0418   // Copy labels to device
0419   int *cudaLabels{};
0420   BOOST_REQUIRE_EQUAL(
0421       cudaMallocAsync(&cudaLabels, labels.size() * sizeof(int), stream),
0422       cudaSuccess);
0423   BOOST_REQUIRE_EQUAL(
0424       cudaMemcpyAsync(cudaLabels, labels.data(), labels.size() * sizeof(int),
0425                       cudaMemcpyHostToDevice, stream),
0426       cudaSuccess);
0427
0428   // Allocate bounds array
0429   int *cudaBounds{};
0430   BOOST_REQUIRE_EQUAL(
0431       cudaMallocAsync(&cudaBounds, numberLabels * sizeof(int), stream),
0432       cudaSuccess);
0433
0434   ActsPlugins::detail::findTrackCandidateBounds(cudaLabels, cudaSpacepointIDs,
0435                                                 cudaBounds, spids.size(),
0436                                                 numberLabels, stream);
0437   BOOST_REQUIRE_EQUAL(cudaGetLastError(), cudaSuccess);
0438
0439   // Copy bounds back to host
0440   std::vector<int> bounds(numberLabels);
0441   BOOST_REQUIRE_EQUAL(
0442       cudaMemcpyAsync(bounds.data(), cudaBounds, numberLabels * sizeof(int),
0443                       cudaMemcpyDeviceToHost, stream),
0444       cudaSuccess);
0445
0446   // Copy back sorted spacepoint IDs
0447   BOOST_REQUIRE_EQUAL(cudaMemcpyAsync(spids.data(), cudaSpacepointIDs,
0448                                       spids.size() * sizeof(int),
0449                                       cudaMemcpyDeviceToHost, stream),
0450                       cudaSuccess);
0451
0452   // Synchronize the stream
0453   BOOST_REQUIRE_EQUAL(cudaStreamSynchronize(stream), cudaSuccess);
0454
0455   // Free resources
0456   BOOST_REQUIRE_EQUAL(cudaStreamDestroy(stream), cudaSuccess);
0457   BOOST_REQUIRE_EQUAL(cudaFree(cudaSpacepointIDs), cudaSuccess);
0458   BOOST_REQUIRE_EQUAL(cudaFree(cudaLabels), cudaSuccess);
0459   BOOST_REQUIRE_EQUAL(cudaFree(cudaBounds), cudaSuccess);
0460
0461   bounds.push_back(spids.size());  // Add the last bound
0462   BOOST_REQUIRE_EQUAL(bounds.size(), expectedBounds.size());
0463   BOOST_CHECK_EQUAL_COLLECTIONS(bounds.begin(), bounds.end(),
0464                                 expectedBounds.begin(), expectedBounds.end());
0465
0466   std::vector<Vi> trackCandidates;
0467   for (int i = 0; i < numberLabels; ++i) {
0468     std::vector<int> trackCandidate;
0469     for (int j = bounds.at(i); j < bounds.at(i + 1); ++j) {
0470       trackCandidate.push_back(spids.at(j));
0471     }
0472     std::sort(trackCandidate.begin(), trackCandidate.end());
0473     trackCandidates.push_back(trackCandidate);
0474   }
0475
0476   BOOST_REQUIRE_EQUAL(trackCandidates.size(), expectedTrackCandidates.size());
0477   for (int i = 0; i < numberLabels; ++i) {
0478     BOOST_REQUIRE_EQUAL(trackCandidates.at(i).size(),
0479                         expectedTrackCandidates.at(i).size());
0480     BOOST_CHECK_EQUAL_COLLECTIONS(trackCandidates.at(i).begin(),
0481                                   trackCandidates.at(i).end(),
0482                                   expectedTrackCandidates.at(i).begin(),
0483                                   expectedTrackCandidates.at(i).end());
0484   }
0485 }