Calculate inter-relater reliability for glosses.

In [1]:
import pandas as pd
import re
from itertools import combinations
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr
from sklearn.metrics import cohen_kappa_score

% matplotlib inline

## Functions

In [2]:
def get_subset(df1, df2, col_ls):
    
    """
    Finds the correct participant + generation subset
    in df2 based on the subset found in df1.
    
    Formats subset to contain cols in col_ls and sorts by
    participant and target.
    
    This produces a df based on df2 that is in the same order as df1.
    
    """
    
    pieces = []

    # get unique particpiant and generation combinations
    unique_ps = df1[['participant', 'generation']].drop_duplicates().reset_index()
    
    # for each row 
    # get subset of that participant and gen from df2
    for ind, row in unique_ps.iterrows():
        
        p = unique_ps.loc[ind, "participant"]
        g = unique_ps.loc[ind, 'generation']
        
        subset = df2[(df2.participant==p) 
                    & (df2.generation==g)]
        
        pieces.append(subset)
        
    # concatenate all subsets
    full_subset = pd.concat(pieces, ignore_index=True)
    
    # get only cols in col list and sort
    full_subset = full_subset[col_ls].sort_values(by=['participant',
                                                     'target']).reset_index(drop=True)
    
    return full_subset

In [3]:
def compare_marker_pres(df1, df2):
    
    """
    Get % agreement for marker presence.
    
    Compares marker presence along rows of 2 dfs.
    If same, increases count, then finds % of values
    that are the same.
    
    """
    
    count = 0
    
    for ind, row in df1.iterrows():
        
        mPres1 = df1.loc[ind, 'markerPres']
        mPres2 = df2.loc[ind, 'markerPres']
        
        if mPres1 == mPres2:
            
            count +=1
            
    return (float(count)/len(df1)) * 100

In [4]:
# same shape list function as used for entropy
# used to get list of gesture shapes for a given meaning

def shape_list(alist,reg):
    #join list of code
    allcode=(',').join(alist)
    #search for gesture shapes (expressions starting with 1h or 2h)
    regex=re.compile(reg)
    setlist=re.findall(regex,allcode)
    #return the list of shapes and the set of different shapes
    return setlist, set(setlist)


In [5]:
# find jaccard similarity for two sets

def jaccard_index(setA,setB):
    A=len(list(setA))
    B=len(list(setB))
    AB=len(list(setA.intersection(setB)))
    denom=float(A+B-AB)
    if denom==0:
        j_ind=float('nan')
    else:
        j_ind=AB/denom
    return j_ind

In [6]:
def compare_targets(df):
    
    cols = ['target', 'genA', 'codeA', 'genB', 'codeB']
    
    byTarget_df = pd.DataFrame(columns=cols)
    
    for t in df.target.unique():
        
        t_subset = df[df.target==t].reset_index()
        
        for p in combinations(range(len(t_subset)), 2):
            
            row_ls = [t]
            
            for i in p:
                
                i_info = list(t_subset.loc[i, ['participant', 'code_string']].values)
                
                row_ls += i_info
                
            row_dict = dict(zip(cols, row_ls))
            
            byTarget_df = byTarget_df.append(row_dict, ignore_index=True)
                
    return byTarget_df
        

In [7]:
def add_jaccard(df):
    
    for ind, row in df.iterrows():
        
        code_sets = []
        
        for c in ['A', 'B']:
            
            code = df.loc[ind, 'code' + c]
            
            try:
                
                code_set = set(re.findall(reg, code))
                
            except:
                
                code_set = set([])
                
            code_sets.append(code_set)
        
        j_ind = jaccard_index(code_sets[0], code_sets[1])
        
        df.loc[ind, 'jaccardIndex'] = j_ind

In [8]:
def comparison_df(df_list):
    
    pieces = []
    
    for df_ind in range(len(df_list)):
        
        df = df_list[df_ind]
        
        target_gb = df[['target', 'jaccardIndex']].groupby('target').agg(np.mean).reset_index()
        
        target_gb = target_gb.rename(columns={'jaccardIndex': 'jaccard' + str(df_ind + 1)})
        
        pieces.append(target_gb)
        
    comp_df = pd.concat(pieces, axis=1, copy=False)
    
    return comp_df


In [9]:
def create_jaccard_comparison(df1, df2):
    
    target_dfs = []
    
    for df in [df1, df2]:
        
        target_df = compare_targets(df)
        
        add_jaccard(target_df)
        
        target_dfs.append(target_df)
        
    full_comp_df = comparison_df(target_dfs)
    
    return full_comp_df

In [10]:
# regular expression to extract shape codes
reg = r'1h.*?\b|2h.*?\b'

## Files

**Second coder files**

In [12]:
# seed data
c2seed = pd.read_csv('../data_files/seed_second_coder.csv')

In [16]:
# experiment 1
c2ex1 = pd.read_csv('../data_files/ex1_second_coder.csv')

In [17]:
# get columns to use throughout analysis
cols = c2ex1.columns.tolist()

In [19]:
# experiment 2
c2ex2int = pd.read_csv('../data_files/ex2_int_only_second_coder.csv')
c2ex2trans = pd.read_csv('../data_files/ex2_trans_only_second_coder.csv')

In [20]:
# experiment 3

c2ex3int = pd.read_csv('../data_files/ex3_int_only_second_coder.csv')
c2ex3transint = pd.read_csv('../data_files/ex3_transint_second_coder.csv')

**First coder files**

In [22]:
# experiment 1
c1ex1 = pd.read_csv('../data_files/ex1.csv')
# experiment 2
c1ex2 = pd.read_csv('../data_files/ex2.csv')
# experiment 3
c1ex3 = pd.read_csv('../data_files/ex3.csv')

## Experiment 1

In [23]:
c2seed['generation'] = 0
c2seed['trial'] = np.nan

c2seed = c2seed[cols].sort_values(by=['participant']).reset_index(drop=True)

In [24]:
c1seed = c1ex1[c1ex1.participant.isin(c2seed.participant.unique())].drop_duplicates(subset=
                                                                                    ['participant'])

c1seed = c1seed[cols].sort_values(by='participant').reset_index(drop=True)

In [25]:
# format 2nd coder's df
c2ex1 = c2ex1[cols].sort_values(by=['participant', 
                                    'target']).reset_index(drop=True)

In [26]:
# get subset for c1
c1ex1_subset = get_subset(c2ex1, c1ex1, cols)

In [27]:
len(c1ex1_subset)

120

In [28]:
# add seeds
c2ex1 = pd.concat([c2seed, c2ex1], ignore_index=True)
c1ex1_subset = pd.concat([c1seed, c1ex1_subset], ignore_index=True)

In [29]:
# percent agreement for marker presence
mp_ex1 = compare_marker_pres(c1ex1_subset, c2ex1)

print "Marker presence agreement: %f %%" %round(mp_ex1, 1)

Marker presence agreement: 93.700000 %


In [30]:
cohen_kappa_score(c1ex1_subset.markerPres, c2ex1.markerPres)

0.86825251601097897

In [31]:
ex1_comp = create_jaccard_comparison(c1ex1_subset, c2ex1)

In [32]:
spearmanr(ex1_comp.jaccard1, ex1_comp.jaccard2)

SpearmanrResult(correlation=0.72584856396866848, pvalue=5.9542263133494567e-05)

## Experiment 2

In [33]:
c2ex2 = pd.concat([c2ex2int, c2ex2trans], ignore_index=True)
c2ex2 = c2ex2[cols].sort_values(by=['participant', 'target']).reset_index(drop=True)

In [34]:
c1ex2_subset = get_subset(c2ex2, c1ex2, cols)

In [35]:
mp_ex2 = compare_marker_pres(c1ex2_subset, c2ex2)

print "Marker presence agreement: %f %%" %round(mp_ex2, 1)

Marker presence agreement: 92.900000 %


In [36]:
cohen_kappa_score(c1ex2_subset.markerPres, c2ex2.markerPres)

0.84643179765130983

In [37]:
ex2_comp = create_jaccard_comparison(c1ex2_subset, c2ex2)

In [38]:
spearmanr(ex2_comp.jaccard1, ex2_comp.jaccard2)

SpearmanrResult(correlation=0.83478260869565213, pvalue=3.9327462000564876e-07)

## Experiment 3

In [39]:
c2ex3 = pd.concat([c2ex3int, c2ex3transint], ignore_index=True)

c2ex3 = c2ex3[cols].sort_values(by=['participant', 'target']).reset_index(drop=True)

In [40]:
c1ex3_subset = get_subset(c2ex3, c1ex3, cols)

In [41]:
compare_marker_pres(c1ex3_subset, c2ex3)

91.25

In [42]:
cohen_kappa_score(c1ex3_subset.markerPres, c2ex3.markerPres)

0.81878325902488136

In [43]:
ex3_comp = create_jaccard_comparison(c1ex3_subset, c2ex3)

In [44]:
spearmanr(ex3_comp.jaccard1, ex3_comp.jaccard2)

SpearmanrResult(correlation=0.9086956521739129, pvalue=8.263058014053956e-10)