In [580]:
from collections import OrderedDict
from conllu import parse
from enum import Enum
import numpy as np
import json
from sklearn.metrics import classification_report
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import tokenize_uk
import pymorphy2

In [424]:
PATH = '../../../../UD_Ukrainian-IU'

with open(PATH + '/uk_iu-ud-train.conllu') as f:
    train_data = f.read()
    
with open(PATH + '/uk_iu-ud-dev.conllu') as f:
    test_data = f.read()

train_trees = parse(train_data)
test_trees = parse(test_data)

In [449]:
tree = train_trees[3]
print(tree)
print('=====')
for node in tree:
    head = node['head']
    print(f"{node['id']}: {node['form']} <-- {node['deprel']} -- {tree[head - 1]['form'] if head > 0 else 'root'}")

TokenList<Це, одна, з, надзвичайно, важливих, сцен, у, драмі, Лесі, Українки, «, Руфін, і, Прісцілла, », .>
=====
1: Це <-- nsubj -- одна
2: одна <-- root -- root
3: з <-- case -- сцен
4: надзвичайно <-- advmod -- важливих
5: важливих <-- amod -- сцен
6: сцен <-- nmod -- одна
7: у <-- case -- драмі
8: драмі <-- nmod -- сцен
9: Лесі <-- nmod -- драмі
10: Українки <-- flat:name -- Лесі
11: « <-- punct -- Руфін
12: Руфін <-- flat:title -- драмі
13: і <-- cc -- Прісцілла
14: Прісцілла <-- conj -- Руфін
15: » <-- punct -- Руфін
16: . <-- punct -- одна


In [448]:
def get_path_to_root(node, tree):
    h = node['head']
#     print('>>> h', tree[h - 1]['form'], h)
    if h:
        return f'{h}_{get_path_to_root(tree[h - 1], tree)}'
    return str(h)

def get_path_to_root_tree(tree):
    res = []
    for node in tree:
        print('>> node', node['form'])
        print('>> path', get_path_to_root(node, tree))
        res.append(get_path_to_root(node, tree))
    return res

get_path_to_root_tree(train_trees[3])
# print(train_trees[0][2])
# print(train_trees[0][4])

>> node Це
>> path 2_0
>> node одна
>> path 0
>> node з
>> path 6_2_0
>> node надзвичайно
>> path 5_6_2_0
>> node важливих
>> path 6_2_0
>> node сцен
>> path 2_0
>> node у
>> path 8_6_2_0
>> node драмі
>> path 6_2_0
>> node Лесі
>> path 8_6_2_0
>> node Українки
>> path 9_8_6_2_0
>> node «
>> path 12_8_6_2_0
>> node Руфін
>> path 8_6_2_0
>> node і
>> path 14_12_8_6_2_0
>> node Прісцілла
>> path 12_8_6_2_0
>> node »
>> path 12_8_6_2_0
>> node .
>> path 2_0


['2_0',
 '0',
 '6_2_0',
 '5_6_2_0',
 '6_2_0',
 '2_0',
 '8_6_2_0',
 '6_2_0',
 '8_6_2_0',
 '9_8_6_2_0',
 '12_8_6_2_0',
 '8_6_2_0',
 '14_12_8_6_2_0',
 '12_8_6_2_0',
 '12_8_6_2_0',
 '2_0']

In [3]:
class Actions(str, Enum):
    SHIFT = "shift"
    REDUCE = "reduce"
    RIGHT = "right"
    LEFT = "left"
    
ROOT = OrderedDict([('id', 0), ('form', 'ROOT'), ('lemma', 'ROOT'), ('upostag', 'ROOT'),
                    ('xpostag', None), ('feats', None), ('head', None), ('deprel', None),
                    ('deps', None), ('misc', None)])

In [645]:
def compose(*funcs):
    def inner(*arg):
        res = {}
        for f in funcs:
            res.update(f(*arg))
        return res
    return inner


def get_ldep_rdep(id, relations):
    left = 100500
    right = -1
    ldep = 'NONE'
    rdep = 'NONE'
    for (ch, head, rel) in relations:
        if head == id:
            if (ch < head) and (ch < left):
                left = ch
                ldep = rel
            if (ch > head) and (ch > right):
                right = ch
                rdep = rel
    return ldep, rdep


def get_path_to_root(id, relations):
    curr_ch = id
    steps = 0
    rels_sorted = sorted(relations, key=lambda x: x[0] == id, reverse=True)
    for (ch, head, rel) in rels_sorted:
        if curr_ch == ch:
            steps =+ 1
            curr_ch = head
    return steps


def feature_extractor_base(stack, queue, _):
    print('%%%%%%%%%%%%%%%')
    feat = {}
    
    if stack:
        top_stack = stack[-1]
        feat['s0-word'] = top_stack['form']
        feat['s0-lemma'] = top_stack['lemma']
        feat['s0-pos'] = top_stack['upostag']
    if (len(stack)) > 1:
        feat['s1-pos'] = stack[-2]['upostag']
    if queue:
        print('---', queue)
        top_queue = queue[0]
        feat['q0-word'] = top_queue['form']
        feat['q0-lemma'] = top_queue['lemma']
        feat['q0-pos'] = top_queue['upostag']
    if (len(queue)) > 1:
        q_next = queue[1]
        feat['q1-word'] = q_next['form']
        feat['q1-pos'] = q_next['upostag']
    if (len(queue)) > 2:
        feat['q2-pos'] = queue[2]['upostag']
    if (len(queue)) > 3:
        feat['q3-pos'] = queue[3]['upostag']
        
    return feat


def feature_extractor_feats(stack, queue, _):
    def get_feats(token):
        token_feats = token['feats']
        return ';'.join([f'{k}={v}' for k, v in token_feats.items()]) if token_feats else 'NONE'
        
    feat = {}
    
    if stack:
        feat['s0-feats'] = get_feats(stack[-1])
    if (len(stack)) > 1:
        feat['s1-feats'] = get_feats(stack[-2])
    if queue:
        feat['q0-feats'] = get_feats(queue[0])
    if (len(queue)) > 1:
        feat['q1-feats'] = get_feats(queue[1])
        
    return feat


def feature_extractor_deprels(stack, queue, relations):
    feat = {}
    if stack:
        top_stack = stack[-1]
        feat['s0-deprel'] = top_stack['deprel'] or 'NONE'
        ldep, rdep = get_ldep_rdep(top_stack['id'], relations)
        feat['s0-ldep'] = ldep
        feat['s0-rdep'] = rdep
    if queue:
        top_queue = queue[0]
        feat['q0-deprel'] = top_queue['deprel'] or 'NONE'
        ldep, rdep = get_ldep_rdep(top_queue['id'], relations)
        feat['q0-ldep'] = ldep
        feat['q0-rdep'] = rdep

    return feat


def feature_extractor_path_to_root(stack, queue, relations):
    feat = {}
    if stack:
        top_stack = stack[-1]
        feat['s0-path-root'] = get_path_to_root(top_stack['id'], relations)
    if queue:
        top_queue = queue[0]
        feat['q0-path-root'] = get_path_to_root(top_queue['id'], relations)

    return feat


def oracle(stack, top_queue, relations):
    """
    Make a decision on the right action to do.
    """
    top_stack = stack[-1]
    # check if both stack and queue are non-empty
    if top_stack and not top_queue:
        return Actions.REDUCE
    # check if there are any clear dependencies
    elif top_queue["head"] == top_stack["id"]:
        return Actions.RIGHT
    elif top_stack["head"] == top_queue["id"]:
        return Actions.LEFT
    # check if we can reduce the top of the stack
    elif top_stack["id"] in [i[0] for i in relations] and \
         (top_queue["head"] < top_stack["id"] or \
          [s for s in stack if s["head"] == top_queue["id"]]):
        return Actions.REDUCE
    # default option
    else:
        return Actions.SHIFT



def get_data(tree, feature_extractor):
    features, labels = [], []
    stack, queue, relations = [ROOT], tree[:], []
    
    while queue or stack:
#         if stack and not queue:
#             stack.pop()
#         else:
        action = oracle(stack if len(stack) > 0 else None,
                            queue[0] if len(queue) > 0 else None,
                            relations)
        features.append(feature_extractor(stack, queue, relations))
        labels.append(action.value)
        if action == Actions.SHIFT:
            stack.append(queue.pop(0))
        elif action == Actions.REDUCE:
            stack.pop()
        elif action == Actions.LEFT:
            rel = (stack[-1]["id"], queue[0]["id"], stack[-1]["deprel"])
            relations.append(rel)
            stack.pop()
        elif action == Actions.RIGHT:
            rel = (queue[0]['id'], stack[-1]["id"], queue[0]["deprel"])
            relations.append(rel)
            stack.append(queue.pop(0))

    return features, labels


def dep_parse(tree, clf, vectorizer, feature_extractor):
    print('**')
    stack, queue, relations = [ROOT], tree[:], []
    
    while queue or stack:
        if stack and not queue:
            stack.pop()
        else:
            features = feature_extractor(stack, queue, relations)
#             print('^^ feat', len(features))
            action = clf.predict(vectorizer.transform([features]))[0]
#             action = clf.predict(features)
    
#             print('^^^ act', action)
            
            if action == Actions.SHIFT:
                stack.append(queue.pop(0))
            elif action == Actions.REDUCE:
                stack.pop()
            elif action == Actions.LEFT:
                rel = (stack[-1]["id"], queue[0]["id"], stack[-1]["deprel"])
                relations.append(rel)
                stack.pop()
            elif action == Actions.RIGHT:
                rel = (queue[0]['id'], stack[-1]["id"], queue[0]["deprel"])
                relations.append(rel)
                stack.append(queue.pop(0))

    return sorted(relations)

In [391]:
def get_classifier():
    pipe = Pipeline([
        ('dict_vect', DictVectorizer()),
        ('lrc', LogisticRegression(random_state=42, multi_class='multinomial',
                                   max_iter=100, solver='sag', n_jobs=20))])

    return pipe


# TODO: unify funcs
def get_train_data(trees, feature_extractor):
    train_feat, train_lab = [], []
    for tree in trees:
        t_f, t_l = get_data([t for t in tree if type(t['id']) == int], feature_extractor)
        train_feat += t_f
        train_lab += t_l
    return train_feat, train_lab


def get_test_data(trees, feature_extractor):
    test_feat, test_lab = [], []
    for tree in trees:
        t_f, t_l = get_data([t for t in tree if type(t['id']) == int], feature_extractor)
        test_feat += t_f
        test_lab += t_l
    return test_feat, test_lab


def calculate_as(trees, clf, vect, feature_extractor):
    total, tpu, tpl = 0, 0, 0
    golden_u, golden_l = None, None
    for tree in trees:
        tree = [t for t in tree if type(t['id']) == int]
        golden_all = [(node['id'], node['head'], node['deprel']) for node in tree]
        golden_u = [(x, y) for x, y, _ in golden_all]

        predicted_all = dep_parse(tree, clf, vect, feature_extractor)
        predicted_u = [(x, y) for x, y, _ in predicted_all]
        
        total += len(tree)
        tpu += len(set(golden_u).intersection(set(predicted_u)))
        tpl += len(set(golden_all).intersection(set(predicted_all)))

    print('Total: ', total)
    print('Match unlabeled: ', tpu)
#     print('Match labeled: ', tpl)
    print('UAS: ', round(tpu/total, 2))
#     print('LAS: ', round(tpl/total, 2))

In [None]:
tree = test_trees[1]
print(tree)
feats, labels = get_data([t for t in tree if type(t['id']) == int], feature_extractor_deps)
vect.fit(feats)
clf.fit(vect.transform(feats), labels)
dep_parse(test_trees[1], clf, vect, feature_extractor_deps)

In [16]:
# vect = DictVectorizer()

In [370]:
# clf = LogisticRegression(random_state=42, multi_class='multinomial',
#                         max_iter=100, solver='sag', n_jobs=20, verbose=1)

clf = get_classifier()

### Baseline

In [332]:
feature_extractor = feature_extractor_base

train_feat, train_lab = get_train_data(train_trees, feature_extractor)
test_feat, test_lab = get_train_data(test_trees, feature_extractor)

In [333]:
vect.fit(train_feat)
clf.fit(vect.transform(train_feat), train_lab)

[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 21 seconds


[Parallel(n_jobs=20)]: Done   1 out of   1 | elapsed:   20.8s finished


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=20, penalty='l2',
                   random_state=42, solver='sag', tol=0.0001, verbose=1,
                   warm_start=False)

In [334]:
print(classification_report(test_lab, clf.predict(vect.transform(test_feat))))

              precision    recall  f1-score   support

        left       0.86      0.87      0.86      6371
      reduce       0.85      0.78      0.81      6875
       right       0.75      0.79      0.77      5996
       shift       0.85      0.87      0.86      6578

    accuracy                           0.83     25820
   macro avg       0.83      0.83      0.83     25820
weighted avg       0.83      0.83      0.83     25820



In [335]:
calculate_as(test_trees, clf, vect, feature_extractor)

Total:  12574
Match unlabeled:  8717
UAS:  0.69


### With token features

In [358]:
feature_extractor = compose(feature_extractor_base, feature_extractor_feats)

train_feat, train_lab = get_train_data(train_trees, feature_extractor)
test_feat, test_lab = get_train_data(test_trees, feature_extractor)

In [359]:
vect.fit(train_feat)
train_feat_vectorized = vect.transform(train_feat)
clf.fit(train_feat_vectorized, train_lab)

[Parallel(n_jobs=20)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 24 seconds


[Parallel(n_jobs=20)]: Done   1 out of   1 | elapsed:   23.2s finished


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=20, penalty='l2',
                   random_state=42, solver='sag', tol=0.0001, verbose=1,
                   warm_start=False)

In [360]:
print(classification_report(test_lab, clf.predict(vect.transform(test_feat))))

              precision    recall  f1-score   support

        left       0.87      0.89      0.88      6371
      reduce       0.86      0.81      0.83      6875
       right       0.78      0.80      0.79      5996
       shift       0.87      0.88      0.87      6578

    accuracy                           0.85     25820
   macro avg       0.85      0.85      0.85     25820
weighted avg       0.85      0.85      0.85     25820



In [361]:
calculate_as(test_trees, clf, vect, feature_extractor)

Total:  12574
Match unlabeled:  9089
UAS:  0.72


### With deprels

In [519]:
feature_extractor = compose(feature_extractor_base, feature_extractor_feats, feature_extractor_deprels)

train_feat, train_lab = get_train_data(train_trees, feature_extractor)
test_feat, test_lab = get_test_data(test_trees, feature_extractor)

In [520]:
clf.fit(train_feat, train_lab)



Pipeline(memory=None,
         steps=[('dict_vect',
                 DictVectorizer(dtype=<class 'numpy.float64'>, separator='=',
                                sort=True, sparse=True)),
                ('lrc',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='multinomial', n_jobs=20,
                                    penalty='l2', random_state=42, solver='sag',
                                    tol=0.0001, verbose=0, warm_start=False))],
         verbose=False)

In [521]:
print(classification_report(test_lab, clf.predict(test_feat)))

              precision    recall  f1-score   support

        left       0.94      0.96      0.95      6371
      reduce       0.92      0.87      0.90      6875
       right       0.89      0.91      0.90      5996
       shift       0.93      0.94      0.94      6578

    accuracy                           0.92     25820
   macro avg       0.92      0.92      0.92     25820
weighted avg       0.92      0.92      0.92     25820



In [522]:
calculate_as(test_trees, clf['lrc'], clf['dict_vect'], feature_extractor)

Total:  12574
Match unlabeled:  10495
UAS:  0.83


### With path to root

In [576]:
feature_extractor = compose(feature_extractor_base, feature_extractor_feats, feature_extractor_deprels, feature_extractor_path_to_root)

train_feat, train_lab = get_train_data(train_trees, feature_extractor)
test_feat, test_lab = get_test_data(test_trees, feature_extractor)

In [577]:
clf.fit(train_feat, train_lab)



Pipeline(memory=None,
         steps=[('dict_vect',
                 DictVectorizer(dtype=<class 'numpy.float64'>, separator='=',
                                sort=True, sparse=True)),
                ('lrc',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='multinomial', n_jobs=20,
                                    penalty='l2', random_state=42, solver='sag',
                                    tol=0.0001, verbose=0, warm_start=False))],
         verbose=False)

In [578]:
print(classification_report(test_lab, clf.predict(test_feat)))

              precision    recall  f1-score   support

        left       0.97      0.98      0.97      6371
      reduce       0.94      0.91      0.92      6875
       right       0.89      0.91      0.90      5996
       shift       0.94      0.95      0.94      6578

    accuracy                           0.94     25820
   macro avg       0.94      0.94      0.94     25820
weighted avg       0.94      0.94      0.94     25820



In [579]:
calculate_as(test_trees, clf['lrc'], clf['dict_vect'], feature_extractor)

Total:  12574
Match unlabeled:  10578
UAS:  0.84


In [None]:
with open('./train_f.json', 'w') as f:
    json.dump(train_feat, f)
print(train_feat)

In [None]:
with open('./test_f.json', 'w') as f:
    json.dump(test_feat, f)
print(test_feat)

In [572]:
arcs = [(1, 6, 'nsubj'), (2, 3, 'cc'), (3, 1, 'nsubj'), (4, 5, 'det'),
        (5, 3, 'conj'), (6, 0, None), (7, 6, 'root'), (8, 7, 'obj'),
        (9, 11, 'punct'), (10, 11, 'mark'), (11, 6, 'root'), (12, 11, 'acl:relcl'),
        (13, 12, 'xcomp'), (14, 15, 'case'), (15, 13, 'xcomp:sp'), (16, 12, 'xcomp'),
        (17, 16, 'nmod'), (18, 20, 'punct'), (19, 20, 'amod'), (20, 12, 'xcomp'),
        (21, 22, 'cc'), (22, 20, 'conj'), (23, 6, 'root')]

rels_sorted = sorted(arcs, key=lambda x: x[0] == 6, reverse=True)
print(rels_sorted)
# get_path_to_root_1(6, arcs)
# print(find_left_right_dependencies(6, arcs))

[(6, 0, None), (1, 6, 'nsubj'), (2, 3, 'cc'), (3, 1, 'nsubj'), (4, 5, 'det'), (5, 3, 'conj'), (7, 6, 'root'), (8, 7, 'obj'), (9, 11, 'punct'), (10, 11, 'mark'), (11, 6, 'root'), (12, 11, 'acl:relcl'), (13, 12, 'xcomp'), (14, 15, 'case'), (15, 13, 'xcomp:sp'), (16, 12, 'xcomp'), (17, 16, 'nmod'), (18, 20, 'punct'), (19, 20, 'amod'), (20, 12, 'xcomp'), (21, 22, 'cc'), (22, 20, 'conj'), (23, 6, 'root')]


In [481]:
left_most = 1000000
right_most = -1
dep_right_most = 'N'
dep_left_most = 'N'
wi, r, wj = (6, 'nsubj', 1)
if wi == 6:
    if (wj > wi) and (wj > right_most):
        right_most = wj
        dep_right_most = r
    if (wj < wi) and (wj < left_most):
        left_most = wj
        dep_left_most = r
print(dep_right_most, dep_left_most)

N nsubj


# II. Use parser

In [583]:
def tokenize_text(text):
    return tokenize_uk.tokenize_uk.tokenize_words(text)

DET = ['будь-який', 'ваш', 'ввесь', 'весь', 'все', 'всенький', 'всякий',
       'всілякий', 'деякий', 'другий', 'жадний', 'жодний', 'ин.', 'ін.',
       'інакший', 'інш.', 'інший', 'їх', 'їхній', 'її', 'його', 'кожний',
       'кожній', 'котрий', 'котрийсь', 'кілька', 'мій', 'наш', 'небагато',
       'ніякий', 'отакий', 'отой', 'оцей', 'сам', 'самий', 'свій', 'сей',
       'скільки', 'такий', 'тамтой', 'твій', 'те', 'той', 'увесь', 'усякий',
       'усілякий', 'це', 'цей', 'чий', 'чийсь', 'який', 'якийсь']

PREP = ["до", "на"]

mapping = {"ADJF": "ADJ", "ADJS": "ADJ", "COMP": "ADJ", "PRTF": "ADJ",
           "PRTS": "ADJ", "GRND": "VERB", "NUMR": "NUM", "ADVB": "ADV",
           "NPRO": "PRON", "PRED": "ADV", "PREP": "ADP", "PRCL": "PART"}

def normalize_pos(word):
    if word.tag.POS == "CONJ":
        if "coord" in word.tag:
            return "CCONJ"
        else:
            return "SCONJ"
    elif "PNCT" in word.tag:
        return "PUNCT"
    elif word.normal_form in PREP:
        return "PREP"
    elif word.normal_form in DET:
        return "DET"
    else:
        return mapping.get(word.tag.POS, word.tag.POS)

In [582]:
morph = pymorphy2.MorphAnalyzer(lang='uk')

In [585]:
sent_1 = 'Отож ми з ним пiймали в лiсi пугутькало i випустили в клубi пiд час лекцiї на тему "Виховання дiтей у сiм\'ї". \
    Лектор упав з трибуни i вилив собi на голову графин з водою.'

In [651]:
# def pym2_to_conllu(tokens):
#     id = 0
#     res = {}
#     for token in tokens:
# #         print('**', token)
#         res['id'] = id
#         res['form'] = token.word
#         res['lemma'] = token.normal_form
#         res['upostag'] = token.tag
#         res['feats'] = ''
#         res['deprels'] = ''
#         id += 1
#     return res

def pym2_to_conllu(i, token):
    res = {}
    res['id'] = i
    res['form'] = token.word
    res['lemma'] = token.normal_form
    res['upostag'] = token.tag
    res['feats'] = ''
    res['deprel'] = ''
    return res

# ROOT = OrderedDict([('id', 0), ('form', 'ROOT'), ('lemma', 'ROOT'), ('upostag', 'ROOT'),
#                     ('xpostag', None), ('feats', None), ('head', None), ('deprel', None),
#                     ('deps', None), ('misc', None)])
text_tokenized = tokenize_text(sent_1)
# print(text_tokenized)
# for w in text_tokenized:
#     print(morph.parse(w))
#     print('===')
    
    
def dep_parse_text(text, clf, vect, feature_extractor):
    res = []
    for sent in text:
        res.append(dep_parse(sent, clf, vect, feature_extractor))
    return res

In [653]:
text_converted = [pym2_to_conllu(i, morph.parse(x)[0]) for i, x in enumerate(text_tokenized)]
print(text_converted)
predicted_all = dep_parse(text_converted, clf, vect, feature_extractor)

[{'id': 0, 'form': 'отож', 'lemma': 'отож', 'upostag': OpencorporaTag('PRCL'), 'feats': '', 'deprel': ''}, {'id': 1, 'form': 'ми', 'lemma': 'ми', 'upostag': OpencorporaTag('NPRO,pers,plur,anim nomn'), 'feats': '', 'deprel': ''}, {'id': 2, 'form': 'з', 'lemma': 'з', 'upostag': OpencorporaTag('PREP,rv_ablt,rv_gent,rv_accs'), 'feats': '', 'deprel': ''}, {'id': 3, 'form': 'ним', 'lemma': 'він', 'upostag': OpencorporaTag('NPRO,pers,masc ablt'), 'feats': '', 'deprel': ''}, {'id': 4, 'form': 'пiймали', 'lemma': 'пiймати', 'upostag': OpencorporaTag('VERB,impf plur,past'), 'feats': '', 'deprel': ''}, {'id': 5, 'form': 'в', 'lemma': 'в', 'upostag': OpencorporaTag('PREP,v-u,rv_loct,rv_gent,rv_accs'), 'feats': '', 'deprel': ''}, {'id': 6, 'form': 'лiсi', 'lemma': 'лiсi', 'upostag': OpencorporaTag('UNKN'), 'feats': '', 'deprel': ''}, {'id': 7, 'form': 'пугутькало', 'lemma': 'пугутькати', 'upostag': OpencorporaTag('VERB,perf neut,past'), 'feats': '', 'deprel': ''}, {'id': 8, 'form': 'i', 'lemma': 'i

AttributeError: items not found