Skip to content

Commit

Permalink
remove unicode normalization for non-phonetic similarity algorithms, …
Browse files Browse the repository at this point in the history
…update test cases
  • Loading branch information
GreatYYX committed Mar 23, 2018
1 parent 09c9b2a commit 4fefec8
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 9 deletions.
4 changes: 2 additions & 2 deletions rltk/similarity/jaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def _jaro_distance(s1, s2):
utils.check_for_none(s1, s2)
utils.check_for_type(str, s1, s2)

s1 = utils.unicode_normalize(s1)
s2 = utils.unicode_normalize(s2)
# s1 = utils.unicode_normalize(s1)
# s2 = utils.unicode_normalize(s2)

shorter, longer = s1.lower(), s2.lower()

Expand Down
8 changes: 4 additions & 4 deletions rltk/similarity/levenshtein.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def levenshtein_distance(s1, s2, insert={}, delete={}, substitute={},
utils.check_for_none(s1, s2)
utils.check_for_type(str, s1, s2)

s1 = utils.unicode_normalize(s1)
s2 = utils.unicode_normalize(s2)
# s1 = utils.unicode_normalize(s1)
# s2 = utils.unicode_normalize(s2)

n1, n2 = len(s1), len(s2)
if n1 == 0 and n2 == 0:
Expand Down Expand Up @@ -127,8 +127,8 @@ def damerau_levenshtein_distance(s1, s2):
utils.check_for_none(s1, s2)
utils.check_for_type(str, s1, s2)

s1 = utils.unicode_normalize(s1)
s2 = utils.unicode_normalize(s2)
# s1 = utils.unicode_normalize(s1)
# s2 = utils.unicode_normalize(s2)

n1, n2 = len(s1), len(s2)
infinite = n1 + n2
Expand Down
4 changes: 2 additions & 2 deletions rltk/similarity/needleman.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def needleman_wunsch_score(s1, s2, match=2, mismatch=-1, gap=-0.5, score_table={
utils.check_for_none(s1, s2)
utils.check_for_type(str, s1, s2)

s1 = utils.unicode_normalize(s1)
s2 = utils.unicode_normalize(s2)
# s1 = utils.unicode_normalize(s1)
# s2 = utils.unicode_normalize(s2)

n1, n2 = len(s1), len(s2)
if n1 == 0 and n2 == 0:
Expand Down
2 changes: 1 addition & 1 deletion rltk/tests/test_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_weighted_levenshtein(s1, s2, insert, delete, substitute,
('cape sand recycling', 'edith ann graham', 16),
('jellyifhs', 'jellyfish', 2),
('ifhs', 'fish', 2),
('Hello, world!', 'Hello, world!', 2),
('Hello, world!', 'Hello, world!', 1),
])
def test_damerau_levenshtein(s1, s2, distance):
if s1 is None or s2 is None:
Expand Down

0 comments on commit 4fefec8

Please sign in to comment.