In [44]:
from sklearn.metrics import cohen_kappa_score
import json, csv

# read the tagged verbs
def read_data(filename):
    data = []
    sentences = []
    tagged_sents = json.load(open(filename))
    
    # keep track of the sentence and word ids for traceability
    sent_id = -1
    for sent in tagged_sents:
        sentences.append([w[0] for w in sent])
        sent_id += 1
        word_id = -1
        for word in sent:
            word_id += 1
            if word[1] != 'VERB':
                continue
            if len(word) == 3:
                data.append([sent_id, word_id, word[2]])
            else:
                print('Missing tag at sentence %i, word %i' % (
                    sent_id, word_id))
    return data, sentences


In [45]:
data1, sentences = read_data('sampled_tags_tb.json')
print('Read %i tagged items.' % len(data))

# change the filename below to match your file
data2, _ = read_data('sampled_tags_vk.json')
print('Read %i tagged items.' % len(data))

Read 501 tagged items.
Read 501 tagged items.


In [46]:
# compute Cohen's Kappa for two raters

y1 = [x[2] for x in data1]
y2 = [x[2] for x in data2]
kappa = cohen_kappa_score(y1, y2)
print('Cohen\'s Kappa: %0.3f' % kappa)

Cohen's Kappa: 0.987


In [47]:
# write the disagreements out to a file for inspection
disagreed = []

# format of data1/data2: sent_id, word_id, code
for x1, x2 in zip(data1, data2):
    if x1[2] != x2[2]:
        # record the sent_id, word_id, word, codes, plus the sentence
        sent = list(sentences[x1[0]])
        sent[x1[1]] = '[' + sent[x1[1]] + ']'
        disagreed.append([
            x1[0], x1[1], x1[2], x2[2], ' '.join(sent)
        ])

with open('disagreements.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['sent_id', 'word_id', 'code1', 'code2', 'sentence'])
    for row in disagreed:
        writer.writerow(row)
