-
-
Notifications
You must be signed in to change notification settings - Fork 270
/
__init__.py
38 lines (33 loc) · 888 Bytes
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# -*- coding: utf-8 -*-
from underthesea import word_tokenize
from .model_crf import CRFPOSTagPredictor
def pos_tag(sentence, format=None):
"""
Vietnamese POS tagging
Parameters
==========
sentence: {unicode, str}
Raw sentence
Returns
=======
tokens: list of tuple with word, pos tag
tagged sentence
Examples
--------
>>> # -*- coding: utf-8 -*-
>>> from underthesea import pos_tag
>>> sentence = "Chợ thịt chó nổi tiếng ở TPHCM bị truy quét"
>>> pos_tag(sentence)
[('Chợ', 'N'),
('thịt', 'N'),
('chó', 'N'),
('nổi tiếng', 'A'),
('ở', 'E'),
('TPHCM', 'Np'),
('bị', 'V'),
('truy quét', 'V')]
"""
sentence = word_tokenize(sentence)
crf_model = CRFPOSTagPredictor.Instance()
result = crf_model.predict(sentence, format)
return result