|
||||
File indexing completed on 2025-01-18 09:11:09
0001 // This file is part of the ACTS project. 0002 // 0003 // Copyright (C) 2016 CERN for the benefit of the ACTS project 0004 // 0005 // This Source Code Form is subject to the terms of the Mozilla Public 0006 // License, v. 2.0. If a copy of the MPL was not distributed with this 0007 // file, You can obtain one at https://mozilla.org/MPL/2.0/. 0008 0009 #pragma once 0010 0011 #include "Acts/Utilities/KDTree.hpp" 0012 0013 #include <algorithm> 0014 #include <array> 0015 #include <cmath> 0016 #include <functional> 0017 #include <iostream> 0018 #include <memory> 0019 #include <string> 0020 #include <vector> 0021 0022 namespace Acts { 0023 /// @brief A general implementation of an N dimensional DBScan clustering algorithm. 0024 /// 0025 /// This is a general implementation of an N dimensional DBScan clustering 0026 /// algorithm. The DBScan algorithm uses density information to cluster together 0027 /// points that are close to each other. 0028 /// 0029 /// For each point, we will look for the neighbours that are within the epsilon 0030 /// radius. If the number of neighbours is greater than the minimum number of 0031 /// points, we will start a new cluster and assign the current point to it. We 0032 /// will then look for the neighbours of the neighbours and assign them to the 0033 /// current cluster if they are not already assigned to a cluster. If the 0034 /// neighbours have itself more than the minimum number of points as neighbours, 0035 /// we will repeat the process on those neighbours. 0036 /// 0037 /// To speed up the search for the neighbours, we use the KDTree implemented in 0038 /// ACTS. It performs a range search in the orthogonal hypercube with a length 0039 /// of 2 epsilon. An extra cut is used to only keep the neighbours that are 0040 /// within the epsilon radius. 0041 /// 0042 /// @tparam kDims The number of dimensions. 0043 /// @tparam scalar_t The scalar type used to construct position vectors. 0044 /// @tparam kLeafSize The maximum number of points in a leaf node of the KDTree. 0045 template <std::size_t kDims, typename scalar_t = double, 0046 std::size_t kLeafSize = 4> 0047 class DBScan { 0048 public: 0049 // The type of coordinates for points. 0050 using Point = std::array<scalar_t, kDims>; 0051 0052 // The type of a vector of coordinate. 0053 using VectorPoints = std::vector<Point>; 0054 0055 // The type to pair the points with an ID. 0056 using Pair = std::pair<Point, std::size_t>; 0057 0058 // The type of a vector of coordinate-ID pairs. 0059 using VectorPairs = std::vector<Pair>; 0060 0061 // KDTree used before the DBScan algorithm to find the neighbours. 0062 using Tree = KDTree<kDims, std::size_t, scalar_t, std::array, kLeafSize>; 0063 0064 // Remove the default constructor. 0065 DBScan() = delete; 0066 0067 /// @brief Construct the DBScan algorithm with a given epsilon and minPoints. 0068 /// 0069 /// @param epsilon The epsilon radius used to find the neighbours. 0070 /// @param minPoints The minimum number of points to form a cluster. 0071 /// @param onePointCluster If true, all the noise points are considered as 0072 /// individual one point clusters. 0073 DBScan(scalar_t epsilon = 1.0, std::size_t minPoints = 1, 0074 bool onePointCluster = false) 0075 : m_eps(epsilon), 0076 m_minPoints(minPoints), 0077 m_onePointCluster(onePointCluster) {} 0078 0079 /// @brief Cluster the input points. 0080 /// 0081 /// This function implements the main loop of the DBScan algorithm. 0082 /// It loops over all the point and will try to start new cluster 0083 /// if it finds points that have yet to be clustered. 0084 /// 0085 /// @param inputPoints The input points to cluster. 0086 /// @param clusteredPoints Vector containing the cluster ID of each point. 0087 /// @return The number of clusters (excluding noise if onePointCluster==False). 0088 /// 0089 int cluster(const VectorPoints& inputPoints, 0090 std::vector<int>& clusteredPoints) { 0091 // Transform the initial vector of input point to a vector of pairs 0092 // with the index of the point in the initial vector. 0093 VectorPairs inputPointsWithIndex; 0094 for (std::size_t id = 0; id < inputPoints.size(); id++) { 0095 inputPointsWithIndex.push_back(std::make_pair(inputPoints[id], id)); 0096 } 0097 // Build the KDTree with the input points. 0098 Tree tree = Tree(std::move(inputPointsWithIndex)); 0099 0100 // Initialize the cluster ID to 0. 0101 int clusterID = 0; 0102 // By default all the points are considered as noise. 0103 clusteredPoints = std::vector<int>(inputPoints.size(), -1); 0104 0105 // Loop over all the points 0106 for (std::size_t id = 0; id < inputPoints.size(); id++) { 0107 // If the point is already assigned to a cluster, skip it. 0108 if (clusteredPoints[id] != -1) { 0109 continue; 0110 } 0111 // If not we try to build a new cluster 0112 std::vector<std::size_t> pointToProcess{id}; 0113 expandCluster(tree, inputPoints, pointToProcess, clusteredPoints, 0114 clusterID); 0115 // If the cluster has been created, increment the cluster ID. 0116 if (clusteredPoints[id] != -1) { 0117 clusterID++; 0118 } 0119 } 0120 if (m_onePointCluster) { 0121 // If noise is present and onePointCluster is true, all the noise points 0122 // are considered as individual one point clusters. Loop over all the 0123 // points in the KDTree. 0124 for (auto& cluster : clusteredPoints) { 0125 // If the point is assigned to noise, assign it to a new cluster. 0126 if (cluster == -1) { 0127 cluster = clusterID; 0128 clusterID++; 0129 } 0130 } 0131 } 0132 return clusterID; 0133 } 0134 0135 private: 0136 /// @brief Extend the cluster. 0137 /// 0138 /// This function will extend the cluster by finding all the neighbours of the 0139 /// current point and assign them to the current cluster. 0140 /// The KDTree is used to find the neighbours and an extra cut is used to only 0141 /// keep the neighbours that are within the epsilon radius. 0142 /// 0143 /// @param tree The KDTree containing all the points. 0144 /// @param inputPoints The vector containing the input points. 0145 /// @param pointsToProcess The vector containing the ids of the points that need to be 0146 /// processed. 0147 /// @param clusteredPoints Vector containing the cluster ID of each point. 0148 /// @param clusterID The ID of the current cluster. 0149 /// 0150 void expandCluster(const Tree& tree, const VectorPoints& inputPoints, 0151 const std::vector<std::size_t>& pointsToProcess, 0152 std::vector<int>& clusteredPoints, const int clusterID) { 0153 // Loop over all the points that need to be process. 0154 for (const auto id : pointsToProcess) { 0155 // Lets look for the neighbours of the current point. 0156 const Point currentPoint = inputPoints[id]; 0157 std::vector<std::size_t> neighbours; 0158 // We create the range in which we will look for the neighbours (an 0159 // hypercube with a length of 2 epsilon). 0160 typename Tree::range_t range; 0161 for (std::size_t dim = 0; dim < kDims; dim++) { 0162 range[dim] = Range1D<scalar_t>(currentPoint[dim] - m_eps, 0163 currentPoint[dim] + m_eps); 0164 } 0165 // We use the KDTree to find the neighbours. 0166 // An extra cut needs to be applied to only keep the neighbours that 0167 // are within the epsilon radius. 0168 tree.rangeSearchMapDiscard( 0169 range, [this, &neighbours, currentPoint]( 0170 const typename Tree::coordinate_t& pos, 0171 const typename Tree::value_t& val) { 0172 scalar_t distance = 0; 0173 for (std::size_t dim = 0; dim < kDims; dim++) { 0174 distance += (pos[dim] - currentPoint[dim]) * 0175 (pos[dim] - currentPoint[dim]); 0176 } 0177 if (distance <= m_eps * m_eps) { 0178 neighbours.push_back(val); 0179 } 0180 }); 0181 std::size_t nNeighbours = neighbours.size(); 0182 // If a cluster has already been started we add the neighbours to it 0183 if (clusteredPoints[id] != -1) { 0184 updateNeighbours(neighbours, clusteredPoints, clusterID); 0185 } 0186 if (nNeighbours >= m_minPoints) { 0187 // If the cluster has not been started yet and we have enough 0188 // neighbours, we start the cluster and assign the current point and its 0189 // neighbours to it. 0190 if (clusteredPoints[id] == -1) { 0191 clusteredPoints[id] = clusterID; 0192 updateNeighbours(neighbours, clusteredPoints, clusterID); 0193 } 0194 // Try to extend the cluster with the neighbours. 0195 expandCluster(tree, inputPoints, neighbours, clusteredPoints, 0196 clusterID); 0197 } 0198 } 0199 } 0200 0201 /// @brief Update the neighbours. 0202 /// 0203 /// This function will remove the neighbours that are already assigned to a 0204 /// cluster and assign the remaining ones to the current cluster. 0205 /// 0206 /// @param neighbours The vector containing the ids of the neighbours. 0207 /// @param clusteredPoints Vector containing the cluster ID of each point. 0208 /// @param clusterID The ID of the current cluster. 0209 /// 0210 void updateNeighbours(std::vector<std::size_t>& neighbours, 0211 std::vector<int>& clusteredPoints, 0212 const int clusterID) { 0213 neighbours.erase(std::remove_if(neighbours.begin(), neighbours.end(), 0214 [&clusteredPoints](int i) { 0215 return clusteredPoints[i] != -1; 0216 }), 0217 neighbours.end()); 0218 0219 for (const auto& neighbour : neighbours) { 0220 clusteredPoints[neighbour] = clusterID; 0221 } 0222 } 0223 0224 // The epsilon radius used to find the neighbours. 0225 scalar_t m_eps; 0226 // The minimum number of points to form a cluster. 0227 std::size_t m_minPoints = 1; 0228 // If true, all the noise points are considered as individual one point 0229 // clusters. 0230 bool m_onePointCluster = false; 0231 }; 0232 0233 } // namespace Acts
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |