Levenshtein Distance
Definition​
- Definition
- Explanation
- Guidance
- Tips
The Levenshtein Distance Algorithm is a dynamic programming technique used to measure the similarity between two strings by calculating the minimum number of single-character edits (insertions, deletions, or substitutions) required to transform one string into another
The Levenshtein Distance Algorithm works by constructing a matrix where each cell represents the cost of transforming a substring of one string into a substring of another. It iterates through each cell in the matrix, calculating the minimum cost based on the previous cell values and the current characters being compared. The final value in the bottom-right cell of the matrix represents the Levenshtein distance between the two strings
- Create a grid (matrix)
M
with dimensions ([m+1][n+1]
), wherem
andn
are the lengths of the 2 string - Fill the first row and column of the matrix with values from
0
tom
and0
ton
respectively, representing transformation costs - Iterate through each cell
[i, j]
of the matrix, starting from[1, 1]
- Calculate costs for 3 operations, where
cost
is0
if characters at[i, j]
match,1
otherwise- insertion:
M[i][j-1] + 1
- deletion:
M[i-1][j] + 1
- substitution:
M[i-1][j-1] + cost
- insertion:
- Set
M[i][j]
to the minimum of the calculated costs - The value at
M[m][n]
represents the Levenshtein distance, indicating the minimum number of edits required to transform one string into the other
- utilize memoization or tabulation to optimize the algorithm's performance, especially when dealing with large strings
- consider space optimization techniques such as only storing the last two rows of the matrix to reduce memory usage
- leverage bitwise operations or other optimizations when calculating the costs to further enhance efficiency
Practice​
- Practice
- Solution
levenshteinDistance(str1, str2):
m = length(str1)
n = length(str2)
matrix = new matrix[m+1][n+1]
// Initialize first row and column
for i from 0 to m:
matrix[i][0] = i
for j from 0 to n:
matrix[0][j] = j
// Fill the matrix
for i from 1 to m:
for j from 1 to n:
cost = 0 if str1[i-1] == str2[j-1] else 1
insertion = matrix[i][j-1] + 1
deletion = matrix[i-1][j] + 1
substitution = matrix[i-1][j-1] + cost
matrix[i][j] = min(insertion, deletion, substitution)
return matrix[m][n]
package main
func min(a, b, c int) int {
if a <= b && a <= c {
return a
} else if b <= a && b <= c {
return b
} else {
return c
}
}
func levenshteinDistance(s1, s2 string) int {
m, n := len(s1), len(s2)
dp := make([][]int, m+1)
for i := range dp {
dp[i] = make([]int, n+1)
}
for i := 0; i <= m; i++ {
for j := 0; j <= n; j++ {
if i == 0 {
dp[i][j] = j
} else if j == 0 {
dp[i][j] = i
} else if s1[i-1] == s2[j-1] {
dp[i][j] = dp[i-1][j-1]
} else {
dp[i][j] = 1 + min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1])
}
}
}
return dp[m][n]
}
public class LevenshteinDistance {
public static int min(int x, int y, int z) {
if (x <= y && x <= z) {
return x;
}
if (y <= x && y <= z) {
return y;
}
return z;
}
public static int levenshteinDistance(String s1, String s2) {
int m = s1.length();
int n = s2.length();
int[][] dp = new int[m + 1][n + 1];
for (int i = 0; i <= m; i++) {
for (int j = 0; j <= n; j++) {
if (i == 0) {
dp[i][j] = j;
} else if (j == 0) {
dp[i][j] = i;
} else if (s1.charAt(i - 1) == s2.charAt(j - 1)) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
}
}
}
return dp[m][n];
}
}
function min(x, y, z) {
if (x <= y && x <= z) {
return x;
}
if (y <= x && y <= z) {
return y;
}
return z;
}
function levenshteinDistance(s1, s2) {
const m = s1.length;
const n = s2.length;
const dp = [];
for (let i = 0; i <= m; i++) {
dp[i] = [];
for (let j = 0; j <= n; j++) {
if (i === 0) {
dp[i][j] = j;
} else if (j === 0) {
dp[i][j] = i;
} else if (s1[i - 1] === s2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
}
}
}
return dp[m][n];
}
fun min(x: Int, y: Int, z: Int) = if (x <= y && x <= z) x else if (y <= x && y <= z) y else z
fun levenshteinDistance(s1: String, s2: String): Int {
val m = s1.length
val n = s2.length
val dp = Array(m + 1) { IntArray(n + 1) }
for (i in 0..m) {
for (j in 0..n) {
when {
i == 0 -> dp[i][j] = j
j == 0 -> dp[i][j] = i
s1[i - 1] == s2[j - 1] -> dp[i][j] = dp[i - 1][j - 1]
else -> dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
}
}
}
return dp[m][n]
}
def levenshtein_distance(s1, s2):
m, n = len(s1), len(s2)
dp = [[0] * (n + 1) for _ in range(m + 1)]
for i in range(m + 1):
for j in range(n + 1):
if i == 0:
dp[i][j] = j
elif j == 0:
dp[i][j] = i
elif s1[i - 1] == s2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
else:
dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
return dp[m][n]
fn min(x: usize, y: usize, z: usize) -> usize {
if x <= y && x <= z {
x
} else if y <= x && y <= z {
y
} else {
z
}
}
fn levenshtein_distance(s1: &str, s2: &str) -> usize {
let m = s1.chars().count();
let n = s2.chars().count();
let mut dp = vec![vec![0; n + 1]; m + 1];
for i in 0..=m {
for j in 0..=n {
dp[i][j] = if i == 0 {
j
} else if j == 0 {
i
} else if s1.chars().nth(i - 1) == s2.chars().nth(j - 1) {
dp[i - 1][j - 1]
} else {
1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
};
}
}
dp[m][n]
}
function min(x: number, y: number, z: number): number {
if (x <= y && x <= z) return x;
if (y <= x && y <= z) return y;
return z;
}
function levenshteinDistance(s1: string, s2: string): number {
const m = s1.length;
const n = s2.length;
const dp: number[][] = [];
for (let i = 0; i <= m; i++) {
dp[i] = [];
for (let j = 0; j <= n; j++) {
if (i === 0) {
dp[i][j] = j;
} else if (j === 0) {
dp[i][j] = i;
} else if (s1.charAt(i - 1) === s2.charAt(j - 1)) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = 1 + min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);
}
}
}
return dp[m][n];
}