In [1]:
import os
import sys
import json
import numpy as np
from nltk.metrics.agreement import AnnotationTask

sys.path.append(os.path.join(os.path.pardir, "src"))
from interannotator_agreement import get_aligned_docs_list

In [2]:
annotation_path = os.path.join(
    os.path.dirname(os.path.abspath("__file__")),
    os.path.pardir,
    "src",
    "data",
    "annotations",
)

oksana_docs = {
    "oksana": os.path.join(annotation_path, "oksana", "oksana.json"),
    "sneha": os.path.join(annotation_path, "sneha", "oksana_sneha.json"),
    "utkarsh": os.path.join(annotation_path, "utkarsh", "oksana_utkarsh.json"),
    "badr": os.path.join(annotation_path, "badr", "oksana_badr.json")
}

sneha_docs = {
    "sneha": os.path.join(annotation_path, "sneha", "sneha.json"),
    "oksana": os.path.join(annotation_path, "oksana", "sneha_oksana.json"),
    "utkarsh": os.path.join(annotation_path, "utkarsh", "sneha_utkarsh.json"),
    "badr": os.path.join(annotation_path, "badr", "sneha_badr.json")
}

utkarsh_docs = {
    "utkarsh": os.path.join(annotation_path, "utkarsh", "utkarsh.json"),
    "oksana": os.path.join(annotation_path, "oksana", "utkarsh_oksana.json"),
    "sneha": os.path.join(annotation_path, "sneha", "utkarsh_sneha.json"),
    "badr": os.path.join(annotation_path, "badr", "utkarsh_badr.json")
}

badr_docs = {
    "badr": os.path.join(annotation_path, "badr", "badr.json"),
    "oksana": os.path.join(annotation_path, "oksana", "badr_oksana.json"),
    "sneha": os.path.join(annotation_path, "sneha", "badr_sneha.json"),
    "utkarsh": os.path.join(annotation_path, "utkarsh", "badr_utkarsh.json")
}

annotators = {
    "oksana": oksana_docs,
    "sneha": sneha_docs,
    "utkarsh": utkarsh_docs,
    "badr": badr_docs,
}


In [3]:
def doc_to_triples(doc, name="name"):
    triples = []
    for anot in doc:
        span = anot["span"]
        triples.append((name, span, anot["label"]))

    return triples


In [4]:
aligned_docs_dict = {}
for annotator, docs in annotators.items():
    annotations = dict()
    for annotator2, path in docs.items():
        with open(path, encoding="utf-8") as f:
            annotations[annotator2] = json.load(f)
    aligned_docs = get_aligned_docs_list(annotations, annotator)
    aligned_docs_dict[annotator] = aligned_docs

In [5]:
S_score = []
pi_score = []
K_score = []
alpha_score = []
for annotator, docs in aligned_docs_dict.items():
    for doc in docs:
        triples = doc_to_triples(doc["ref_entities"], annotator)
        for reviewer, review_doc in doc["reviewed_entities"].items():
            triples += doc_to_triples(review_doc, reviewer)

        spans = set([span for name, span, tag in triples])
        names = set([name for name, span, tag in triples])

        for anot_name in names:
            spans_name = set([span for name, span, tag in triples if name == anot_name])
            if len(spans_name) != len(spans):
                not_cross = spans - (spans_name & spans)
                for span in not_cross:
                    triples.append((anot_name, span, "UNTAGGED"))

        task = AnnotationTask(triples)        

        S_score.append(task.S())
        pi_score.append(task.pi())
        K_score.append(task.kappa())
        alpha_score.append(task.alpha())

In [6]:
print(f"S score : {np.mean(S_score)}")
print(f"pi score : {np.mean(pi_score)}")
print(f"K score : {np.mean(K_score)}")
print(f"Alpha score : {np.mean(alpha_score)}")

S score : 0.9470038574679591
pi score : 0.9366619900370948
K score : 0.9371439741077994
Alpha score : 0.9360954383660536
