# Fuzzy String Matching

## Levenshtein Distance
Also known as Edit Distance

In [1]:
import Levenshtein as lev

In [3]:
str1 = "Apple Inc."
str2 = "apple Inc"
distance = lev.distance(str1.lower(),str2.lower()),
print(f"distance - {distance}")

ratio = lev.ratio(str1.lower(),str2.lower())
print(f"ratio - {ratio}")

distance - (1,)
ratio - 0.9473684210526315


## FuzzyWuzzy
* [Source](https://www.youtube.com/watch?v=4L0Py4GkmPU)

In [20]:
from fuzzywuzzy import fuzz, process

### fuzz.ratio - Compare entire string in order

In [21]:
fuzz.ratio("meat", "meet")

75

In [22]:
fuzz.ratio("this is a coding", "this is code")

79

In [23]:
fuzz.ratio("this is a coding", "code this is")

50

### fuzz.partial_ratio - Compare subsection of string in order

In [24]:
fuzz.partial_ratio("this is a coding", "this is code")

83

In [25]:
fuzz.partial_ratio("this is a coding", "code this is")

58

In [26]:
fuzz.partial_ratio("meat", "meet")

75

### fuzz.token_sort_ratio - ignores word order, uses tokens
* ignores duplicates
* displays both, term and their score 

In [27]:
ex1 = ["he eats apple", "did he eat apple"]
ex2 = ["apple he eats ", "did he eat apple and mango"]

In [28]:
for i, j in zip(ex1, ex2):
    print(f"Score: {fuzz.token_sort_ratio(i, j)} - {i} -- {j}")

Score: 100 - he eats apple -- apple he eats 
Score: 76 - did he eat apple -- did he eat apple and mango


### Process.extract
* Set limit to extract
    * process.extract("apples", ex1, limit=1)
* Specify ratio score to use
    * process.extract("apples", ex1, scorer=fuzz.ratio)

In [29]:
process.extract("apple", ex1, limit=1)

[('he eats apple', 90)]

In [30]:
process.extract("apple", ex1, scorer=fuzz.ratio)

[('he eats apple', 56), ('did he eat apple', 48)]

In [31]:
process.extract("apple", ex1, scorer=fuzz.partial_ratio)

[('he eats apple', 100), ('did he eat apple', 100)]