In [1]:
import re
import numpy as np

from collections import Counter

In [2]:
max_annotators = 5

In [3]:
with open('official-2014.combined-withalt.m2') as fr:
    lines = fr.readlines()

In [4]:
data = [line.split("\n") for line in "".join(lines).split("\n\n")]

In [5]:
def annotation_stat(correction_type=None, data=data):
    agreed_annotations = np.zeros((max_annotators, max_annotators))
    total_annotations = np.zeros((max_annotators, max_annotators))
    
    for sentence in data:
        question, answers = sentence[0], [] if len(sentence) < 2 else sentence[1:]
        sentence_annotations = []
        
        # all annotators answers we gather into a batch which is preprocessed after each testing sentence
        for answer in answers:
            """A -1 -1|||noop|||-NONE-|||REQUIRED|||-NONE-|||0"""
            lst = answer.split("|||")
            corr_type = lst[1]
            annotator = int(lst[5])
            annotation = answer[2:-4]

            if not correction_type or correction_type == corr_type:
                sentence_annotations.append((annotator, annotation))

        # for each batch of answers we count all annotations and common between pair of annotator
        annotators_counter = Counter()
        for a in sentence_annotations:
            annotators_counter[a[0]] += 1
            for b in sentence_annotations:
                if a[1] == b[1]:
                    agreed_annotations[a[0]][b[0]] += 1

        for a_id, a_annotations in annotators_counter.items():
            for b_id, b_annotations in annotators_counter.items():
                total_annotations[a_id][b_id] += np.min([a_annotations, b_annotations])
    
    # to aviod ZeroDivisionError in case when there are no cor_types at all, 
    # we add small delta for all annotation counts
    total_annotations = np.where(total_annotations == 0, 1e-10, total_annotations)
    
    ratio = agreed_annotations / total_annotations
    
    res = ((np.tril(agreed_annotations).sum() - np.trace(agreed_annotations)) / 
           (np.tril(total_annotations).sum() - np.trace(total_annotations)))
#     res = (np.tril(ratio).sum() - np.trace(ratio)) / np.sum(range(max_annotators))

    return res

In [6]:
%%time 

annotation_stat()

CPU times: user 113 ms, sys: 23.1 ms, total: 136 ms
Wall time: 106 ms


0.45197044334975367

In [7]:
%%time 

annotation_stat("Vform")

CPU times: user 12 ms, sys: 1.37 ms, total: 13.3 ms
Wall time: 11.4 ms


0.8189655172413793

In [8]:
corrections = {
    "ArtOrDet": "Article or Determiner",
    "Cit": "Citation",
    "Mec": "Punctuation, capitalization, spelling, typos",
    "Nn": "Noun number",
    "Npos": "Noun possesive",
    "Others": "Other errors",
    "Pform": "Pronoun form",
    "Pref": "Pronoun reference",
    "Prep": "Preposition",
    "Rloc-": "Local redundancy",
    "SVA": "Subject-verb-agreement",
    "Sfrag": "Fragment",
    "Smod": "Dangling modifier",
    "Spar": "Parallelism",
    "Srun": "Runons, comma splice",
    "Ssub": "Subordinate clause",
    "Trans": "Link word/phrases",
    "Um": "Unclear meaning (cannot be corrected)",
    "V0": "Missing verb",
    "Vform": "Verb form",
    "Vm": "Verb modal",
    "Vt": "Verb tense",
    "WOadv": "Adverb/adjective position",
    "WOinc": "Incorrect sentence form",
    "Wa": "Acronyms",
    "Wci": "Wrong collocation/idiom",
    "Wform": "Word form",
    "Wtone": "Tone"
}

In [9]:
for (corr_type, desc) in corrections.items():
    print(f'{annotation_stat(corr_type):.2}\t{corrections[corr_type]}')

0.74	Article or Determiner
0.0	Citation
0.77	Punctuation, capitalization, spelling, typos
0.91	Noun number
0.84	Noun possesive
0.69	Other errors
0.9	Pronoun form
0.76	Pronoun reference
0.8	Preposition
0.77	Local redundancy
0.9	Subject-verb-agreement
1.0	Fragment
1.0	Dangling modifier
1.0	Parallelism
0.9	Runons, comma splice
0.77	Subordinate clause
0.63	Link word/phrases
0.67	Unclear meaning (cannot be corrected)
0.86	Missing verb
0.82	Verb form
0.61	Verb modal
0.81	Verb tense
0.89	Adverb/adjective position
0.8	Incorrect sentence form
1.0	Acronyms
0.54	Wrong collocation/idiom
0.79	Word form
0.83	Tone
