In [1]:
import spacy
from spacy.tokens import DocBin
from spacy.util import minibatch
import random
from spacy import displacy


In [2]:
def load_data(file_path):
    doc_bin = DocBin().from_disk(file_path)
    return list(doc_bin.get_docs(nlp.vocab))

In [3]:
def evaluate(nlp, docs):
    examples = []
    for doc in docs:
        headss = [token.head.i for token in doc]
        depss = [token.dep_ for token in doc]
        dic = {"heads": headss, "deps": depss}
        examples.append(spacy.training.Example.from_dict(doc, dic))
    return nlp.evaluate(examples)

In [4]:
nlp = spacy.blank('tr')

In [5]:

parser = nlp.add_pipe('parser')

In [6]:

train_data = load_data('HW2_Data/Task1/tr_imst-ud-train.spacy')
dev_data = load_data('HW2_Data/Task1/tr_imst-ud-dev.spacy')
test_data = load_data('HW2_Data/Task1/tr_imst-ud-test.spacy')

In [7]:
for doc in train_data:
    for token in doc:
        parser.add_label(token.dep_)

In [8]:

optimizer = nlp.initialize()

In [9]:
for i in range(5): 
    random.shuffle(train_data)
    batches = minibatch(train_data, size=8)  
    losses = {}
    
    for batch in batches:
        for doc in batch:
            headss = [token.head.i for token in doc]
            depss = [token.dep_ for token in doc]
            dic = {"heads": headss, "deps": depss}
            example = spacy.training.Example.from_dict(doc, dic)
            nlp.update([example], drop=0.5, losses=losses, sgd=optimizer)
            
    print(f"Losses at iteration {i}: {losses}")
    
    
    dev_scores = evaluate(nlp, dev_data)
    print(f"Scores on development set: {dev_scores}")

Losses at iteration 0: {'parser': 18269.60273465954}
Scores on development set: {'token_acc': 1.0, 'token_p': 1.0, 'token_r': 1.0, 'token_f': 1.0, 'sents_p': 1.0, 'sents_r': 1.0, 'sents_f': 1.0, 'dep_uas': 0.5298606904906117, 'dep_las': 0.2996971532404603, 'dep_las_per_type': {'root': {'p': 1.0, 'r': 0.5354251012145749, 'f': 0.6974291364535267}, 'nmod:poss': {'p': 0.17135862913096694, 'r': 0.24054982817869416, 'f': 0.20014295925661185}, 'nsubj': {'p': 0.3, 'r': 0.1852409638554217, 'f': 0.22905027932960895}, 'conj': {'p': 0.48655913978494625, 'r': 0.2657856093979442, 'f': 0.34377967711301044}, 'obj': {'p': 0.1392355694227769, 'r': 0.4728476821192053, 'f': 0.21512503766194638}, 'amod': {'p': 0.11059479553903345, 'r': 0.21794871794871795, 'f': 0.1467324290998767}, 'csubj': {'p': 1.0, 'r': 0.1111111111111111, 'f': 0.19999999999999998}, 'cop': {'p': 0.6, 'r': 0.6382978723404256, 'f': 0.6185567010309279}, 'nmod': {'p': 0.0891089108910891, 'r': 0.09388971684053651, 'f': 0.09143686502177069}, 

KeyboardInterrupt: 

In [None]:

test_scores = evaluate(nlp, test_data)
print(f"Scores on test set: {test_scores}")


las = test_scores['dep_las']
uas = test_scores['dep_uas']
print(f"Labeled Attachment Score (LAS) on test set: {las:.2f}")
print(f"Unlabeled Attachment Score (UAS) on test set: {uas:.2f}")

In [None]:
sentences = ["Güneş doğudan yükselir ve batıda batar.",
    "Akıllı telefonlar, günümüzün vazgeçilmez teknolojik aletleridir.",
    "Yarınki toplantıda, müdürün asistanı tarafından sunulan, şirketin kazanç raporunu gözden geçireceğiz.",
    "Çocuğun parkta, köpeğiyle oynarken gördüğümüz adam, eski bir arkadaşım çıktı."]

In [None]:

for sentence in sentences:
    doc = nlp(sentence)
    displacy.render(doc, style="dep", jupyter=True, options={'distance': 90})