-
Notifications
You must be signed in to change notification settings - Fork 400
/
Levenshtein_distance.py
41 lines (32 loc) · 1.48 KB
/
Levenshtein_distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# The Levenshtein distance (Edit distance) Problem
# Informally, the Levenshtein distance between two words is
# the minimum number of single-character edits (insertions, deletions or substitutions)
# required to change one word into the other.
# For example, the Levenshtein distance between kitten and sitting is 3.
# The minimal edit script that transforms the former into the latter is:
# kitten —> sitten (substitution of s for k)
# sitten —> sittin (substitution of i for e)
# sittin —> sitting (insertion of g at the end)
def levenshtein_distance(word_1, chars_1, word_2, chars_2):
# base case if the strings are empty
if chars_1 == 0:
return chars_2
if chars_2 == 0:
return chars_1
# if last characters of the string match, the cost of
# operations is 0, i.e. no changes are made
if word_1[chars_1 - 1] == word_2[chars_2 - 1]:
cost = 0
else:
cost = 1
# calculating the numbers of operations recursively
deletion = levenshtein_distance(word_1, chars_1 - 1, word_2, chars_2) + 1
insertion = levenshtein_distance(word_1, chars_1, word_2, chars_2 - 1) + 1
substitution = levenshtein_distance(word_1, chars_1 - 1, word_2, chars_2 - 1) + cost
return min(deletion, insertion, substitution)
# driving script
if __name__ == '__main__':
word_1 = input("Enter Word 1 :")
word_2 = input("Enter Word 2 :")
print('The Levenshtein distance is:')
print(levenshtein_distance(word_1, len(word_1), word_2, len(word_2)))