Back to home page

EIC code displayed by LXR

 
 

    


File indexing completed on 2026-05-10 08:43:04

0001 //===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===//
0002 //
0003 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0004 // See https://llvm.org/LICENSE.txt for license information.
0005 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0006 //
0007 //===----------------------------------------------------------------------===//
0008 ///
0009 /// \file
0010 /// This file defines a Levenshtein distance function that works for any two
0011 /// sequences, with each element of each sequence being analogous to a character
0012 /// in a string.
0013 ///
0014 //===----------------------------------------------------------------------===//
0015 
0016 #ifndef LLVM_ADT_EDIT_DISTANCE_H
0017 #define LLVM_ADT_EDIT_DISTANCE_H
0018 
0019 #include "llvm/ADT/ArrayRef.h"
0020 #include <algorithm>
0021 
0022 namespace llvm {
0023 
0024 /// Determine the edit distance between two sequences.
0025 ///
0026 /// \param FromArray the first sequence to compare.
0027 ///
0028 /// \param ToArray the second sequence to compare.
0029 ///
0030 /// \param Map A Functor to apply to each item of the sequences before
0031 /// comparison.
0032 ///
0033 /// \param AllowReplacements whether to allow element replacements (change one
0034 /// element into another) as a single operation, rather than as two operations
0035 /// (an insertion and a removal).
0036 ///
0037 /// \param MaxEditDistance If non-zero, the maximum edit distance that this
0038 /// routine is allowed to compute. If the edit distance will exceed that
0039 /// maximum, returns \c MaxEditDistance+1.
0040 ///
0041 /// \returns the minimum number of element insertions, removals, or (if
0042 /// \p AllowReplacements is \c true) replacements needed to transform one of
0043 /// the given sequences into the other. If zero, the sequences are identical.
0044 template <typename T, typename Functor>
0045 unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
0046                                    Functor Map, bool AllowReplacements = true,
0047                                    unsigned MaxEditDistance = 0) {
0048   // The algorithm implemented below is the "classic"
0049   // dynamic-programming algorithm for computing the Levenshtein
0050   // distance, which is described here:
0051   //
0052   //   http://en.wikipedia.org/wiki/Levenshtein_distance
0053   //
0054   // Although the algorithm is typically described using an m x n
0055   // array, only one row plus one element are used at a time, so this
0056   // implementation just keeps one vector for the row.  To update one entry,
0057   // only the entries to the left, top, and top-left are needed.  The left
0058   // entry is in Row[x-1], the top entry is what's in Row[x] from the last
0059   // iteration, and the top-left entry is stored in Previous.
0060   typename ArrayRef<T>::size_type m = FromArray.size();
0061   typename ArrayRef<T>::size_type n = ToArray.size();
0062 
0063   if (MaxEditDistance) {
0064     // If the difference in size between the 2 arrays is larger than the max
0065     // distance allowed, we can bail out as we will always need at least
0066     // MaxEditDistance insertions or removals.
0067     typename ArrayRef<T>::size_type AbsDiff = m > n ? m - n : n - m;
0068     if (AbsDiff > MaxEditDistance)
0069       return MaxEditDistance + 1;
0070   }
0071 
0072   SmallVector<unsigned, 64> Row(n + 1);
0073   for (unsigned i = 1; i < Row.size(); ++i)
0074     Row[i] = i;
0075 
0076   for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) {
0077     Row[0] = y;
0078     unsigned BestThisRow = Row[0];
0079 
0080     unsigned Previous = y - 1;
0081     const auto &CurItem = Map(FromArray[y - 1]);
0082     for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
0083       int OldRow = Row[x];
0084       if (AllowReplacements) {
0085         Row[x] = std::min(Previous + (CurItem == Map(ToArray[x - 1]) ? 0u : 1u),
0086                           std::min(Row[x - 1], Row[x]) + 1);
0087       }
0088       else {
0089         if (CurItem == Map(ToArray[x - 1]))
0090           Row[x] = Previous;
0091         else Row[x] = std::min(Row[x-1], Row[x]) + 1;
0092       }
0093       Previous = OldRow;
0094       BestThisRow = std::min(BestThisRow, Row[x]);
0095     }
0096 
0097     if (MaxEditDistance && BestThisRow > MaxEditDistance)
0098       return MaxEditDistance + 1;
0099   }
0100 
0101   unsigned Result = Row[n];
0102   return Result;
0103 }
0104 
0105 template <typename T>
0106 unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
0107                              bool AllowReplacements = true,
0108                              unsigned MaxEditDistance = 0) {
0109   return ComputeMappedEditDistance(
0110       FromArray, ToArray, [](const T &X) -> const T & { return X; },
0111       AllowReplacements, MaxEditDistance);
0112 }
0113 
0114 } // End llvm namespace
0115 
0116 #endif