cpu/soa/intersectors.cpp

0001 // This file is part of the ACTS project.
0002 //
0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project
0004 //
0005 // This Source Code Form is subject to the terms of the Mozilla Public
0006 // License, v. 2.0. If a copy of the MPL was not distributed with this
0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
0008
0009 // Algebra include(s).
0010 // #include "detray/plugins/algebra/array_definitions.hpp"
0011 // #include "detray/plugins/algebra/eigen_definitions.hpp"
0012 #include "algebra/vc_aos.hpp"
0013 #include "algebra/vc_soa.hpp"
0014
0015 // Detray core include(s).
0016 #include "detray/definitions/containers.hpp"
0017 #include "detray/definitions/indexing.hpp"
0018 #include "detray/geometry/mask.hpp"
0019 #include "detray/geometry/shapes.hpp"
0020 #include "detray/geometry/surface_descriptor.hpp"
0021 #include "detray/navigation/intersection/ray_intersector.hpp"
0022 #include "detray/tracks/ray.hpp"
0023 #include "detray/utils/logging.hpp"
0024
0025 // Detray benchmark include(s)
0026 #include "detray/benchmarks/types.hpp"
0027
0028 // Detray test include(s)
0029 #include "detray/test/common/track_generators.hpp"
0030 #include "detray/test/utils/planes_along_direction.hpp"
0031
0032 // Google Benchmark include(s)
0033 #include <benchmark/benchmark.h>
0034
0035 // System include(s)
0036 #include <algorithm>
0037
0038 using namespace detray;
0039
0040 static constexpr unsigned int theta_steps{2000u};
0041 static constexpr unsigned int phi_steps{2000u};
0042 static constexpr unsigned int n_surfaces{16u};
0043
0044 /// Linear algebra implementation using SoA memory layout
0045 using algebra_v = detray::vc_soa<benchmarks::scalar>;
0046
0047 /// Linear algebra implementation using AoS memory layout
0048 // using algebra_array = detray::array<benchmarks::scalar>;
0049 using algebra_vc_aos = detray::vc_aos<benchmarks::scalar>;
0050 // using algebra_eigen = detray::eigen<benchmarks::scalar>;
0051
0052 using algebra_s = algebra_vc_aos;
0053
0054 // Size of an SoA batch
0055 constexpr std::size_t simd_size{dscalar<algebra_v>::size()};
0056
0057 using ray_t = detail::ray<algebra_s>;
0058
0059 namespace {
0060
0061 enum class mask_id : unsigned int {
0062   e_rectangle2D = 0,
0063   e_cylinder2D = 1,
0064   e_conc_cylinder3 = 2,
0065 };
0066
0067 enum class material_id : unsigned int {
0068   e_material_slab = 0,
0069 };
0070
0071 // Helper type definitions.
0072 using mask_link_t = dtyped_index<mask_id, dindex>;
0073 using material_link_t = dtyped_index<material_id, dindex>;
0074
0075 using surface_desc_t = surface_descriptor<mask_link_t, material_link_t>;
0076
0077 /// Generate a number of test rays
0078 std::vector<ray_t> generate_rays() {
0079   using ray_generator_t = uniform_track_generator<ray_t>;
0080
0081   // Iterate through uniformly distributed momentum directions
0082   auto ray_generator = ray_generator_t{};
0083   ray_generator.config().theta_steps(theta_steps).phi_steps(phi_steps);
0084
0085   std::vector<ray_t> rays;
0086   std::ranges::copy(ray_generator, std::back_inserter(rays));
0087
0088   return rays;
0089 }
0090
0091 /// Generate the translation distances to place the surfaces
0092 template <concepts::algebra algebra_t>
0093 dvector<dscalar<algebra_t>> get_dists(std::size_t n) {
0094   using scalar_t = dscalar<algebra_t>;
0095
0096   dvector<benchmarks::scalar> dists;
0097
0098   for (std::size_t i = 1u; i <= n; ++i) {
0099     dists.push_back(static_cast<scalar_t>(i));
0100   }
0101
0102   return dists;
0103 }
0104
0105 /// Specialization for hthe SOA memory layout (need n/simd_size samples)
0106 template <>
0107 dvector<dscalar<algebra_v>> get_dists<algebra_v>(std::size_t n) {
0108   using scalar_t = dscalar<algebra_v>;
0109   using value_t = typename algebra_v::value_type;
0110
0111   dvector<scalar_t> dists;
0112   dists.resize(static_cast<std::size_t>(std::ceil(n / simd_size)));
0113   for (std::size_t i = 0u; i < dists.size(); ++i) {
0114     dists[i] = scalar_t::IndexesFromZero() +
0115                scalar_t(static_cast<value_t>(i)) * simd_size + scalar_t(1.f);
0116   }
0117
0118   return dists;
0119 }
0120
0121 }  // namespace
0122
0123 /// This benchmark runs intersection with the planar intersector
0124 void BM_INTERSECT_PLANES_AOS(benchmark::State& state) {
0125   using mask_t = mask<rectangle2D, algebra_s, std::uint_least16_t>;
0126
0127   auto dists = get_dists<algebra_s>(n_surfaces);
0128   auto [plane_descs, transforms] = test::planes_along_direction<algebra_s>(
0129       dists, dvector3D<algebra_s>{1.f, 1.f, 1.f});
0130
0131   constexpr mask_t rect{0u, 100.f, 200.f};
0132   std::vector<mask_t> masks(plane_descs.size(), rect);
0133
0134   const auto rays = generate_rays();
0135   const auto pi = ray_intersector<rectangle2D, algebra_s>{};
0136
0137 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0138   std::size_t hit{0u};
0139   std::size_t miss{0u};
0140 #endif
0141
0142   for (auto _ : state) {
0143 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0144     hit = 0u;
0145     miss = 0u;
0146 #endif
0147
0148     // Iterate through uniformly distributed momentum directions
0149     for (const auto& ray : rays) {
0150       for (std::size_t i = 0u; i < plane_descs.size(); ++i) {
0151         auto is = pi(ray, plane_descs[i], masks[i], transforms[i]);
0152
0153 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0154         if (is.is_inside()) {
0155           ++hit;
0156         } else {
0157           ++miss;
0158         }
0159 #endif
0160
0161         benchmark::DoNotOptimize(is);
0162       }
0163     }
0164   }
0165 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0166   DETRAY_INFO_HOST(mask_t::shape::name
0167                    << " AoS: hit/miss ... " << hit << " / " << miss
0168                    << " (total: " << rays.size() * masks.size() << ")");
0169 #endif  // DETRAY_BENCHMARK_PRINTOUTS
0170 }
0171
0172 BENCHMARK(BM_INTERSECT_PLANES_AOS)
0173 #ifdef DETRAY_BENCHMARK_MULTITHREAD
0174     ->ThreadRange(1, benchmark::CPUInfo::Get().num_cpus)
0175 #endif
0176     ->Unit(benchmark::kMillisecond);
0177
0178 /// This benchmark runs intersection with the planar intersector
0179 void BM_INTERSECT_PLANES_SOA(benchmark::State& state) {
0180   using mask_t = mask<rectangle2D, algebra_v, std::uint_least16_t>;
0181   using vector3_t = dvector3D<algebra_v>;
0182
0183   auto dists = get_dists<algebra_v>(n_surfaces);
0184   auto [plane_descs, transforms] =
0185       test::planes_along_direction<algebra_v>(dists, vector3_t{1.f, 1.f, 1.f});
0186
0187   std::vector<mask_t> masks{};
0188   for (std::size_t i = 0u; i < plane_descs.size(); ++i) {
0189     masks.emplace_back(0u, 100.f, 200.f);
0190   }
0191
0192   const auto rays = generate_rays();
0193   const auto pi = ray_intersector<rectangle2D, algebra_v>{};
0194
0195 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0196   std::size_t hit{0u};
0197   std::size_t miss{0u};
0198 #endif
0199
0200   for (auto _ : state) {
0201 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0202     hit = 0u;
0203     miss = 0u;
0204 #endif
0205
0206     // Iterate through uniformly distributed momentum directions
0207     for (const auto& ray : rays) {
0208       for (std::size_t i = 0u; i < plane_descs.size(); ++i) {
0209         auto is = pi(ray, plane_descs[i], masks[i], transforms[i]);
0210
0211         benchmark::DoNotOptimize(is);
0212
0213 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0214         hit += is.is_inside().count();
0215         miss += simd_size - is.is_inside().count();
0216 #endif
0217       }
0218     }
0219   }
0220
0221 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0222   DETRAY_INFO_HOST(mask_t::shape::name
0223                    << " SoA: hit/miss ... " << hit << " / " << miss
0224                    << " (total: " << rays.size() * masks.size() * simd_size
0225                    << ")");
0226 #endif  // DETRAY_BENCHMARK_PRINTOUTS
0227 }
0228
0229 BENCHMARK(BM_INTERSECT_PLANES_SOA)
0230 #ifdef DETRAY_BENCHMARK_MULTITHREAD
0231     ->ThreadRange(1, benchmark::CPUInfo::Get().num_cpus)
0232 #endif
0233     ->Unit(benchmark::kMillisecond);
0234
0235 /// This benchmark runs intersection with the cylinder intersector
0236 void BM_INTERSECT_CYLINDERS_AOS(benchmark::State& state) {
0237   using transform3_t = dtransform3D<algebra_s>;
0238   using scalar_t = dscalar<algebra_s>;
0239
0240   using mask_t = mask<cylinder2D, algebra_s, std::uint_least16_t>;
0241
0242   std::vector<mask_t> masks;
0243   for (const scalar_t r : get_dists<algebra_s>(n_surfaces)) {
0244     masks.emplace_back(0u, r, -100.f, 100.f);
0245   }
0246
0247   transform3_t trf{};
0248
0249   mask_link_t mask_link{mask_id::e_conc_cylinder3, 0u};
0250   material_link_t material_link{material_id::e_material_slab, 0u};
0251   surface_desc_t cyl_desc(0u, mask_link, material_link, 0u,
0252                           surface_id::e_sensitive);
0253
0254   // Iterate through uniformly distributed momentum directions
0255   const auto rays = generate_rays();
0256   const auto cci = ray_intersector<cylinder2D, algebra_s>{};
0257 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0258   std::size_t hit{0u};
0259   std::size_t miss{0u};
0260 #endif
0261   for (auto _ : state) {
0262 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0263     hit = 0u;
0264     miss = 0u;
0265 #endif
0266
0267     // Iterate through uniformly distributed momentum directions
0268     for (const auto& ray : rays) {
0269       for (const auto& cylinder : masks) {
0270         auto is = cci(ray, cyl_desc, cylinder, trf);
0271
0272         static_assert(is.size() == 2u, "Wrong number of solutions");
0273 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0274         for (const auto& i : is) {
0275           if (i.is_inside()) {
0276             ++hit;
0277           } else {
0278             ++miss;
0279           }
0280         }
0281 #endif
0282         benchmark::DoNotOptimize(is);
0283       }
0284     }
0285   }
0286 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0287   DETRAY_INFO_HOST(mask_t::shape::name
0288                    << " AoS: hit/miss ... " << hit << " / " << miss
0289                    << " (total: " << rays.size() * masks.size() << ")");
0290 #endif  // DETRAY_BENCHMARK_PRINTOUTS
0291 }
0292
0293 BENCHMARK(BM_INTERSECT_CYLINDERS_AOS)
0294 #ifdef DETRAY_BENCHMARK_MULTITHREAD
0295     ->ThreadRange(1, benchmark::CPUInfo::Get().num_cpus)
0296 #endif
0297     ->Unit(benchmark::kMillisecond);
0298
0299 /// This benchmark runs intersection with the cylinder intersector
0300 void BM_INTERSECT_CYLINDERS_SOA(benchmark::State& state) {
0301   using transform3_t = dtransform3D<algebra_v>;
0302   using scalar_t = dscalar<algebra_v>;
0303
0304   using mask_t = mask<cylinder2D, algebra_v, std::uint_least16_t>;
0305
0306   std::vector<mask_t> masks;
0307   for (const scalar_t r : get_dists<algebra_v>(n_surfaces)) {
0308     masks.emplace_back(0u, r, -100.f, 100.f);
0309   }
0310
0311   transform3_t trf{};
0312
0313   mask_link_t mask_link{mask_id::e_conc_cylinder3, 0u};
0314   material_link_t material_link{material_id::e_material_slab, 0u};
0315   surface_desc_t cyl_desc(0u, mask_link, material_link, 0u,
0316                           surface_id::e_sensitive);
0317
0318   // Iterate through uniformly distributed momentum directions
0319   const auto rays = generate_rays();
0320   const auto cci = ray_intersector<cylinder2D, algebra_v>{};
0321 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0322   std::size_t hit{0u};
0323   std::size_t miss{0u};
0324 #endif
0325   for (auto _ : state) {
0326 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0327     hit = 0u;
0328     miss = 0u;
0329 #endif
0330
0331     // Iterate through uniformly distributed momentum directions
0332     for (const auto& ray : rays) {
0333       for (const auto& cylinder : masks) {
0334         auto is = cci(ray, cyl_desc, cylinder, trf);
0335
0336         static_assert(is.size() == 2u, "Wrong number of solutions");
0337 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0338         for (const auto& i : is) {
0339           hit += i.is_inside().count();
0340           miss += simd_size - i.is_inside().count();
0341         }
0342 #endif
0343         benchmark::DoNotOptimize(is);
0344       }
0345     }
0346   }
0347 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0348   DETRAY_INFO_HOST(mask_t::shape::name
0349                    << " SoA: hit/miss ... " << hit << " / " << miss
0350                    << " (total: " << rays.size() * masks.size() * simd_size
0351                    << ")");
0352 #endif  // DETRAY_BENCHMARK_PRINTOUTS
0353 }
0354
0355 BENCHMARK(BM_INTERSECT_CYLINDERS_SOA)
0356 #ifdef DETRAY_BENCHMARK_MULTITHREAD
0357     ->ThreadRange(1, benchmark::CPUInfo::Get().num_cpus)
0358 #endif
0359     ->Unit(benchmark::kMillisecond);
0360
0361 /// This benchmark runs intersection with the concentric cylinder intersector
0362 void BM_INTERSECT_CONCENTRIC_CYLINDERS_AOS(benchmark::State& state) {
0363   using transform3_t = dtransform3D<algebra_s>;
0364   using scalar_t = dscalar<algebra_s>;
0365
0366   using mask_t = mask<concentric_cylinder2D, algebra_s, std::uint_least16_t>;
0367
0368   std::vector<mask_t> masks;
0369   for (const scalar_t r : get_dists<algebra_s>(n_surfaces)) {
0370     masks.emplace_back(0u, r, -100.f, 100.f);
0371   }
0372
0373   transform3_t trf{};
0374
0375   mask_link_t mask_link{mask_id::e_conc_cylinder3, 0u};
0376   material_link_t material_link{material_id::e_material_slab, 0u};
0377   surface_desc_t cyl_desc(0u, mask_link, material_link, 0u,
0378                           surface_id::e_sensitive);
0379
0380   // Iterate through uniformly distributed momentum directions
0381   const auto rays = generate_rays();
0382   const auto cci = ray_intersector<concentric_cylinder2D, algebra_s>{};
0383 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0384   std::size_t hit{0u};
0385   std::size_t miss{0u};
0386 #endif
0387
0388   for (auto _ : state) {
0389 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0390     hit = 0u;
0391     miss = 0u;
0392 #endif
0393     // Iterate through uniformly distributed momentum directions
0394     for (const auto& ray : rays) {
0395       for (const auto& cylinder : masks) {
0396         auto is = cci(ray, cyl_desc, cylinder, trf);
0397 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0398         if (is.is_inside()) {
0399           ++hit;
0400         } else {
0401           ++miss;
0402         }
0403 #endif
0404         benchmark::DoNotOptimize(is);
0405       }
0406     }
0407   }
0408 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0409   DETRAY_INFO_HOST(mask_t::shape::name
0410                    << " AoS: hit/miss ... " << hit << " / " << miss
0411                    << " (total: " << rays.size() * masks.size() << ")");
0412 #endif  // DETRAY_BENCHMARK_PRINTOUTS
0413 }
0414
0415 BENCHMARK(BM_INTERSECT_CONCENTRIC_CYLINDERS_AOS)
0416 #ifdef DETRAY_BENCHMARK_MULTITHREAD
0417     ->ThreadRange(1, benchmark::CPUInfo::Get().num_cpus)
0418 #endif
0419     ->Unit(benchmark::kMillisecond);
0420
0421 /// This benchmark runs intersection with the concentric cylinder intersector
0422 void BM_INTERSECT_CONCENTRIC_CYLINDERS_SOA(benchmark::State& state) {
0423   using transform3_t = dtransform3D<algebra_v>;
0424   using scalar_t = dscalar<algebra_v>;
0425
0426   using mask_t = mask<concentric_cylinder2D, algebra_v, std::uint_least16_t>;
0427
0428   std::vector<mask_t> masks;
0429   for (const scalar_t r : get_dists<algebra_v>(n_surfaces)) {
0430     masks.emplace_back(0u, r, -100.f, 100.f);
0431   }
0432
0433   transform3_t trf{};
0434
0435   mask_link_t mask_link{mask_id::e_conc_cylinder3, 0u};
0436   material_link_t material_link{material_id::e_material_slab, 0u};
0437   surface_desc_t cyl_desc(0u, mask_link, material_link, 0u,
0438                           surface_id::e_sensitive);
0439
0440   // Iterate through uniformly distributed momentum directions
0441   const auto rays = generate_rays();
0442   const auto cci = ray_intersector<concentric_cylinder2D, algebra_v>{};
0443 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0444   std::size_t hit{0u};
0445   std::size_t miss{0u};
0446 #endif
0447   for (auto _ : state) {
0448 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0449     hit = 0u;
0450     miss = 0u;
0451 #endif
0452     // Iterate through uniformly distributed momentum directions
0453     for (const auto& ray : rays) {
0454       for (const auto& cylinder : masks) {
0455         auto is = cci(ray, cyl_desc, cylinder, trf);
0456 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0457         hit += is.is_inside().count();
0458         miss += simd_size - is.is_inside().count();
0459 #endif
0460         benchmark::DoNotOptimize(is);
0461       }
0462     }
0463   }
0464 #ifdef DETRAY_BENCHMARK_PRINTOUTS
0465   DETRAY_INFO_HOST(mask_t::shape::name
0466                    << " SoA: hit/miss ... " << hit << " / " << miss
0467                    << " (total: " << rays.size() * masks.size() * simd_size
0468                    << ")");
0469 #endif
0470 }
0471
0472 BENCHMARK(BM_INTERSECT_CONCENTRIC_CYLINDERS_SOA)
0473 #ifdef DETRAY_BENCHMARK_MULTITHREAD
0474     ->ThreadRange(1, benchmark::CPUInfo::Get().num_cpus)
0475 #endif
0476     ->Unit(benchmark::kMillisecond);