-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathLevenshtein_distance.py
73 lines (63 loc) · 2.71 KB
/
Levenshtein_distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"""
In information theory, linguistics and computer science, the Levenshtein distance is a string metric for measuring the
difference between two sequences. Informally, the Levenshtein distance between two words is the minimum number of
single-character edits (insertions, deletions or substitutions) required to change one word into the other.
It is named after the Soviet mathematician Vladimir Levenshtein, who considered this distance in 1965.[1]
Levenshtein distance may also be referred to as edit distance, although that term may also denote a larger family of
distance metrics known collectively as edit distance.[2]:32 It is closely related to pairwise string alignments.
(source: wikipedia)
And this webpage helped me a lot: https://medium.com/@ethannam/understanding-the-levenshtein-distance-equation-for-beginners-c4285a5604f0
"""
import pprint
def lev_distance(i, j, matrix, a, b, noted):
if [i, j] in noted:
return matrix[i][j]
else:
noted.append([i, j])
if min(i, j) != 0:
x = lev_distance(i - 1, j, matrix, a, b, noted) + 1
y = lev_distance(i, j - 1, matrix, a, b, noted) + 1
conditional_addition = (1 if a[i - 1] != b[j - 1] else 0)
z = lev_distance(i - 1, j - 1, matrix, a, b, noted) + conditional_addition
value = min(x, y, z)
matrix[i][j] = value
return value
else:
value = max(i, j)
matrix[i][j] = value
return value
def LevenshteinDistance(m, n, a, b, matrix, noted, print_perm):
for i in range(1, m + 1):
for j in range(1, n + 1):
value = lev_distance(i, j, matrix, a, b, noted)
if print_perm:
print(f"value for lev({i},{j})={value}.")
matrix[i][j] = value
if print_perm:
print('Final Matrix:')
pprint.PrettyPrinter(indent=5).pprint(matrix)
return matrix[m][n]
def generate_matrix(m, n,print_perm):
matrix = []
for i in range(m):
temp = []
for j in range(n):
temp.append(0)
matrix.append(temp)
if print_perm:
print("Matrix generated...")
pprint.PrettyPrinter(indent=5).pprint(matrix)
return matrix
if __name__ == '__main__':
a = input('Word-1:')
b = input('Word-2:')
m, n = len(a), len(b)
# res = input("Want to see the steps:(y/n)")
res = 'n'
if res.lower().startswith('y'):
print_perm = True
else:
print_perm = False
matrix = generate_matrix(m + 1, n + 1,print_perm)
distance = LevenshteinDistance(m, n, a, b, matrix, [], print_perm=print_perm)
print(f"Word's Levenshtein Distance={distance}")