Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added feature to return span_info when using extract_keywords
- Loading branch information
Showing
4 changed files
with
105 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
from flashtext import KeywordProcessor | ||
import logging | ||
import unittest | ||
import json | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class TestKPExtractorSpan(unittest.TestCase): | ||
def setUp(self): | ||
logger.info("Starting...") | ||
with open('test/keyword_extractor_test_cases.json') as f: | ||
self.test_cases = json.load(f) | ||
|
||
def tearDown(self): | ||
logger.info("Ending.") | ||
|
||
def test_extract_keywords(self): | ||
"""For each of the test case initialize a new KeywordProcessor. | ||
Add the keywords the test case to KeywordProcessor. | ||
Extract keywords and check if they match the expected result for the test case. | ||
""" | ||
for test_id, test_case in enumerate(self.test_cases): | ||
keyword_processor = KeywordProcessor() | ||
for key in test_case['keyword_dict']: | ||
keyword_processor.add_keywords_from_list(test_case['keyword_dict'][key]) | ||
keywords_extracted = keyword_processor.extract_keywords(test_case['sentence'], span_info=True) | ||
for kwd in keywords_extracted: | ||
# returned keyword lowered should match the sapn from sentence | ||
self.assertEqual( | ||
kwd[0].lower(), test_case['sentence'].lower()[kwd[1]:kwd[2]], | ||
"keywords span don't match the expected results for test case: {}".format(test_id)) | ||
|
||
def test_extract_keywords_case_sensitive(self): | ||
"""For each of the test case initialize a new KeywordProcessor. | ||
Add the keywords the test case to KeywordProcessor. | ||
Extract keywords and check if they match the expected result for the test case. | ||
""" | ||
for test_id, test_case in enumerate(self.test_cases): | ||
keyword_processor = KeywordProcessor(case_sensitive=True) | ||
for key in test_case['keyword_dict']: | ||
keyword_processor.add_keywords_from_list(test_case['keyword_dict'][key]) | ||
keywords_extracted = keyword_processor.extract_keywords(test_case['sentence'], span_info=True) | ||
for kwd in keywords_extracted: | ||
# returned keyword should match the sapn from sentence | ||
self.assertEqual( | ||
kwd[0], test_case['sentence'][kwd[1]:kwd[2]], | ||
"keywords span don't match the expected results for test case: {}".format(test_id)) | ||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters