|
|
|||
File indexing completed on 2026-05-10 08:43:04
0001 //===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===// 0002 // 0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 0004 // See https://llvm.org/LICENSE.txt for license information. 0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 0006 // 0007 //===----------------------------------------------------------------------===// 0008 /// 0009 /// \file 0010 /// This file defines a Levenshtein distance function that works for any two 0011 /// sequences, with each element of each sequence being analogous to a character 0012 /// in a string. 0013 /// 0014 //===----------------------------------------------------------------------===// 0015 0016 #ifndef LLVM_ADT_EDIT_DISTANCE_H 0017 #define LLVM_ADT_EDIT_DISTANCE_H 0018 0019 #include "llvm/ADT/ArrayRef.h" 0020 #include <algorithm> 0021 0022 namespace llvm { 0023 0024 /// Determine the edit distance between two sequences. 0025 /// 0026 /// \param FromArray the first sequence to compare. 0027 /// 0028 /// \param ToArray the second sequence to compare. 0029 /// 0030 /// \param Map A Functor to apply to each item of the sequences before 0031 /// comparison. 0032 /// 0033 /// \param AllowReplacements whether to allow element replacements (change one 0034 /// element into another) as a single operation, rather than as two operations 0035 /// (an insertion and a removal). 0036 /// 0037 /// \param MaxEditDistance If non-zero, the maximum edit distance that this 0038 /// routine is allowed to compute. If the edit distance will exceed that 0039 /// maximum, returns \c MaxEditDistance+1. 0040 /// 0041 /// \returns the minimum number of element insertions, removals, or (if 0042 /// \p AllowReplacements is \c true) replacements needed to transform one of 0043 /// the given sequences into the other. If zero, the sequences are identical. 0044 template <typename T, typename Functor> 0045 unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray, 0046 Functor Map, bool AllowReplacements = true, 0047 unsigned MaxEditDistance = 0) { 0048 // The algorithm implemented below is the "classic" 0049 // dynamic-programming algorithm for computing the Levenshtein 0050 // distance, which is described here: 0051 // 0052 // http://en.wikipedia.org/wiki/Levenshtein_distance 0053 // 0054 // Although the algorithm is typically described using an m x n 0055 // array, only one row plus one element are used at a time, so this 0056 // implementation just keeps one vector for the row. To update one entry, 0057 // only the entries to the left, top, and top-left are needed. The left 0058 // entry is in Row[x-1], the top entry is what's in Row[x] from the last 0059 // iteration, and the top-left entry is stored in Previous. 0060 typename ArrayRef<T>::size_type m = FromArray.size(); 0061 typename ArrayRef<T>::size_type n = ToArray.size(); 0062 0063 if (MaxEditDistance) { 0064 // If the difference in size between the 2 arrays is larger than the max 0065 // distance allowed, we can bail out as we will always need at least 0066 // MaxEditDistance insertions or removals. 0067 typename ArrayRef<T>::size_type AbsDiff = m > n ? m - n : n - m; 0068 if (AbsDiff > MaxEditDistance) 0069 return MaxEditDistance + 1; 0070 } 0071 0072 SmallVector<unsigned, 64> Row(n + 1); 0073 for (unsigned i = 1; i < Row.size(); ++i) 0074 Row[i] = i; 0075 0076 for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) { 0077 Row[0] = y; 0078 unsigned BestThisRow = Row[0]; 0079 0080 unsigned Previous = y - 1; 0081 const auto &CurItem = Map(FromArray[y - 1]); 0082 for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) { 0083 int OldRow = Row[x]; 0084 if (AllowReplacements) { 0085 Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u), 0086 std::min(Row[x - 1], Row[x]) + 1); 0087 } 0088 else { 0089 if (CurItem == Map(ToArray[x - 1])) 0090 Row[x] = Previous; 0091 else Row[x] = std::min(Row[x-1], Row[x]) + 1; 0092 } 0093 Previous = OldRow; 0094 BestThisRow = std::min(BestThisRow, Row[x]); 0095 } 0096 0097 if (MaxEditDistance && BestThisRow > MaxEditDistance) 0098 return MaxEditDistance + 1; 0099 } 0100 0101 unsigned Result = Row[n]; 0102 return Result; 0103 } 0104 0105 template <typename T> 0106 unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray, 0107 bool AllowReplacements = true, 0108 unsigned MaxEditDistance = 0) { 0109 return ComputeMappedEditDistance( 0110 FromArray, ToArray, [](const T &X) -> const T & { return X; }, 0111 AllowReplacements, MaxEditDistance); 0112 } 0113 0114 } // End llvm namespace 0115 0116 #endif
| [ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
|
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
|