/
test_performance.py
36 lines (33 loc) · 1.15 KB
/
test_performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- coding: utf-8 -*-
from unittest import TestCase, skip
from os import listdir
from os.path import dirname, join
import io
from tests.word_sent.test_config import EXPECTED_SPEED
from underthesea import word_sent
import time
from underthesea.word_sent.tokenize import tokenize
class TestPerformance(TestCase):
def setUp(self):
input_folder = join(dirname(__file__), "samples", "4_documents")
files = listdir(input_folder)
files = [join(input_folder, file) for file in files]
texts = []
for file in files:
with io.open(file, "r", encoding="utf-8") as f:
texts.append(f.read().split("\n"))
texts = [text for sublist in texts for text in sublist]
self.texts = texts
@skip("")
def test_1(self):
n_tokens = 0
for text in self.texts:
n_tokens += len(tokenize(text).split(" "))
start = time.time()
for text in self.texts:
word_sent(text)
end = time.time()
duration = end - start # in seconds
speed = n_tokens / duration
print("Speed: ", speed)
self.assertGreater(speed, EXPECTED_SPEED)