In [1]:
import pandas as pd
import jsonlines
from collections import Counter, defaultdict
import nltk
import numpy as np
from statsmodels.stats.inter_rater import fleiss_kappa
from nltk.metrics.agreement import AnnotationTask
from nltk.metrics.distance import masi_distance


# Token-based (NER/Span) - Two annotators (F1)

In [2]:
from prodigy_kappa import compute_prodigy_ner_F1

In [3]:
gold_f = "/home/vs428/project/UTI_Noise_data/UTI_symptoms_GOLD_Adj_resident_train.jsonl"
perkins_f = "/home/vs428/project/UTI_Noise_data/UTI_symptoms_RESIDENT_KAPPA-perkins.jsonl"
# kearns_f = "/home/vs428/project/UTI_Noise_data/UTI_symptoms_RESIDENT_KAPPA-kearns.jsonl"


In [6]:
gold_anns = []
with jsonlines.open(gold_f) as f:
    for x in f:
        gold_anns.append(x)
        
cmp_anns = []
with jsonlines.open(perkins_f) as f:
    for x in f:
        cmp_anns.append(x)        

In [8]:
labels = ["Dysuria",  "Hematuria", "Urinary_frequency", "Urinary_urgency", "Urinary_incontinence", "Urinary_retention", "Abdominal_pain", "Flank_pain", "Back_pain", "Low_back_pain", "Pelvic_pain", "Fever", "Fatigue", "Altered_mental_status", "Suprapubic_tenderness", "CVA_tenderness", "Abdominal_tenderness"]
x = compute_prodigy_ner_F1(labels, gold_f, perkins_f)
y, supports = compute_prodigy_ner_F1(labels, gold_f, perkins_f, overlap="type", weighted=True)

In [9]:
supports['Urinary_incontinence'] = 0

In [10]:
weights = {label:support/sum(supports.values()) if support != 0 else 0 for label, support in supports.items()}

In [11]:
weights

{'Abdominal_pain': 0.3978494623655914,
 'Abdominal_tenderness': 0.051075268817204304,
 'Suprapubic_tenderness': 0.026881720430107527,
 'CVA_tenderness': 0.021505376344086023,
 'Hematuria': 0.07258064516129033,
 'Flank_pain': 0.1478494623655914,
 'Urinary_retention': 0.01881720430107527,
 'Low_back_pain': 0.04838709677419355,
 'Dysuria': 0.06720430107526881,
 'Fatigue': 0.016129032258064516,
 'Altered_mental_status': 0.008064516129032258,
 'Back_pain': 0.021505376344086023,
 'Pelvic_pain': 0.03763440860215054,
 'Urinary_frequency': 0.03225806451612903,
 'Urinary_urgency': 0.008064516129032258,
 'Fever': 0.024193548387096774,
 'Urinary_incontinence': 0}

In [12]:
# weights
z = {}
for label in labels:
    if weights[label] != 0:
        z[label] = y[label] * weights[label]
    else:
        z[label] = 0

In [13]:
sum(z.values())

0.8214800598735046

In [14]:
pd.DataFrame.from_records([x, y, supports, z]).to_csv("perkins_before_after_overlap_kappa.csv", index=False)

### Check why Urinary_incontinence is 0.0

Sanity check

In [8]:
gold_spans = [ann['spans'] for ann in gold_anns]
cmp_spans = [ann['spans'] for ann in cmp_anns if "spans" in ann.keys()]

In [9]:
gold_spans = [item for sublist in gold_spans for item in sublist]
cmp_spans = [item for sublist in cmp_spans for item in sublist]

In [10]:
gold_spans_dysuria = [span for span in gold_spans if span['label'] == "Urinary_incontinence"]
cmp_spans_dysuria = [span for span in cmp_spans if span['label'] == "Urinary_incontinence"]

In [11]:
gold_spans_dysuria

[]

In [12]:
cmp_spans_dysuria

[]

In [13]:
for span in gold_spans_dysuria:
    if "source" in span:
        span.pop("source")
    if "input_hash" in span:
        span.pop("input_hash")
    if "text" in span:
        span.pop("text")

In [14]:
gold_spans_dysuria

[]

In [44]:
# make sure we only have 5 values in our dictionary
gold_spans = [ann['spans'] for ann in gold_anns if "spans" in ann.keys()]
gold_spans = [item for sublist in gold_spans for item in sublist]

anns_spans = [ann['spans'] for ann in cmp_anns if "spans" in ann.keys()]
anns_spans = [item for sublist in anns_spans for item in sublist]

spans = gold_spans + anns_spans

span_keys = [list(span.keys()) for span in spans]
span_keys = [item for sublist in span_keys for item in sublist]
span_keys = set(span_keys)


# Multi-label Text Classification By Label (Fleiss' Alpha)

In [17]:
anns = []
with jsonlines.open("/home/vs428/project/Incarceration_Data/incarceration_status_initial.jsonl") as reader:
    for ann in reader:
        anns.append(ann)



In [22]:
anns[0].keys()

dict_keys(['text', '_input_hash', '_task_hash', 'options', '_view_id', 'config', 'accept', 'answer', '_timestamp', '_annotator_id', '_session_id'])

In [24]:
anns[0]['accept']

['Arrested', 'On_Probation']

array([[0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0.]])

In [2]:
def compute_prodigy_fleissk_textcat(labels, *ann_files, overlap="full"):
    '''Takes in a list of labels and a set of prodigy jsonl annotation files 
       and computes the per-label Fleiss' Kappa. Fleiss' kappa is a generalization
       of Scott's Pi (similar to Cohen's Kappa) for >2 annotators.
       
       We also assume that there are no missing ratings from any annotator. 
       If so, we drop the row. 

       Requires statsmodels
    '''
    # read in all annotation files    
    anns = []            
    for ann_file in ann_files:
        with jsonlines.open(ann_file) as reader:
            # f_anns = []
            for ann in reader:
                # f_anns.append(ann)

                anns.append(ann)
    
    # combine all text by input hash into a dictionary
    anns_by_input_hash = defaultdict(list)
    for ann in anns:
        anns_by_input_hash[ann['_input_hash']].append(ann)
            
    # compute per-label kappa
    kappas = {}
    N_DOCS = len(anns_by_input_hash.keys())
    for label in labels:
        # for each label, we just check if it exists or not
        rater_mat = np.zeros(shape=(N_DOCS, 2))
        for idx, (input_hash, anns) in enumerate(anns_by_input_hash.items()):
            # skip if we don't have the same number of annotations as annotators
            if len(anns) < len(ann_files):
                continue

            rater_mat[idx, 1] = sum([1 for ann in anns if label in ann['accept']])
            rater_mat[idx, 0] = sum([1 for ann in anns if label not in ann['accept']])

        # drop rows that have all 0s across
        rater_mat[~np.all(rater_mat == 0, axis=1)]
        kappas[label] = fleiss_kappa(rater_mat)
        print(f"Calculated kappa for {label}")
        
    return kappas

In [3]:
incar = "/home/vs428/project/Incarceration_Data/incarceration_status_initial.jsonl"
incar2 = "/home/vs428/project/Incarceration_Data/incarceration_status_initial_v2.jsonl"
incar3 = "/home/vs428/project/Incarceration_Data/incarceration_status_initial_v3.jsonl"

In [4]:
z = [{"id":"Prior_History_Incarceration","text":"Prior_History_Incarceration"},{"id":"Current_Incarceration","text":"Current_Incarceration"},{"id":"Recent_Incarceration","text":"Recent_Incarceration"},{"id":"Family_History_Incarceration","text":"Family_History_Incarceration"},{"id":"Arrested","text":"Arrested"},{"id":"On_Probation","text":"On_Probation"},{"id":"In_Police_Custody","text":"In_Police_Custody"},{"id":"Brought_in_by_Police","text":"Brought_in_by_Police"}]

In [5]:
labels = [x['id'] for x in z]

In [6]:

# kappas = compute_prodigy_fleissk_textcat(labels, incar, incar2, incar3)
kappas2 = compute_prodigy_fleissk_textcat(labels, incar, incar3)

Calculated kappa for Prior_History_Incarceration
Calculated kappa for Current_Incarceration
Calculated kappa for Recent_Incarceration
Calculated kappa for Family_History_Incarceration
Calculated kappa for Arrested
Calculated kappa for On_Probation
Calculated kappa for In_Police_Custody
Calculated kappa for Brought_in_by_Police


In [7]:
# kappas2

# Multi-label Text Classification (Krippendorff's alpha)

In [216]:
def compute_prodigy_KrippendorffA_textcat(labels, *ann_files, verbose=False):
    '''Takes in a list of labels and a set of prodigy jsonl annotation files 
       and computes the overall Krippendorff's alpha. nltk's Krippendorff's alpha 
       handles multi-label multi-annotator settings and returns one score. 
       
       We also assume that there are no missing ratings from any annotator. 
       If so, we drop the document. 

       Requires nltk
    '''
    # read in all annotation files    
    anns = []            
    for ann_file in ann_files:
        with jsonlines.open(ann_file) as reader:
            # f_anns = []
            for ann in reader:
                # f_anns.append(ann)

                anns.append(ann)
                
    # combine all text by input hash into a dictionary
    anns_by_input_hash = defaultdict(list)
    for ann in anns:
        anns_by_input_hash[ann['_input_hash']].append(ann)

                
    # nltk expects an AnnotationTask object which takes in a list of tuples
    # of the form (coder, item, label)
    task_anns = []
    for input_hash, anns in anns_by_input_hash.items():
        # make sure that at least one annotation has some label and all documents have all annotations
        if len(anns) == len(ann_files) and all([False if not ann['accept'] else True for ann in anns]):
            for ann in anns:
                task_anns.append((ann['_annotator_id'], str(ann['_input_hash']), frozenset(ann['accept'])))
        else:
            if verbose:
                print(input_hash)

    # generate nltk AnnotationTask and compute metrics
    task = AnnotationTask(distance = masi_distance)
    task.load_array(task_anns)
    return task.alpha()

In [217]:
incar = "/home/vs428/project/Incarceration_Data/incarceration_status_initial.jsonl"
incar2 = "/home/vs428/project/Incarceration_Data/incarceration_status_initial_v2.jsonl"
incar3 = "/home/vs428/project/Incarceration_Data/incarceration_status_initial_v3.jsonl"

In [218]:
z = [{"id":"Prior_History_Incarceration","text":"Prior_History_Incarceration"},{"id":"Current_Incarceration","text":"Current_Incarceration"},{"id":"Recent_Incarceration","text":"Recent_Incarceration"},{"id":"Family_History_Incarceration","text":"Family_History_Incarceration"},{"id":"Arrested","text":"Arrested"},{"id":"On_Probation","text":"On_Probation"},{"id":"In_Police_Custody","text":"In_Police_Custody"},{"id":"Brought_in_by_Police","text":"Brought_in_by_Police"}]

In [219]:
labels = [x['id'] for x in z]

In [221]:
krippendorffA = compute_prodigy_KrippendorffA_textcat(labels, incar, incar3, verbose=False)

In [222]:
krippendorffA

0.9967416087311426

# Create Review + NER.Manual Data

In [1]:
import jsonlines

In [15]:
gold = []
with jsonlines.open("/home/vs428/project/UTI_Noise_data/UTI_symptoms_GOLD_Adj_resident_train.jsonl") as reader:
    for line in reader:
        gold.append(line)

cmp = []        
with jsonlines.open("/home/vs428/project/UTI_Noise_data/UTI_symptoms_RESIDENT_KAPPA-perkins.jsonl") as reader:
    for line in reader:
        cmp.append(line)

In [16]:
len(gold), len(cmp)

(50, 50)

In [None]:
# gold[0]
for ann in gold:
    if "versions" in ann.keys():
        ann.pop("versions")
    else:
        print(ann)
    ann['view_id'] = "ner_manual"


In [18]:

with jsonlines.open("/home/vs428/project/UTI_Noise_data/UTI_symptoms_GOLD_Adj_resident_train_nermanual.jsonl", "w") as writer:
    writer.write_all(gold)
