Raters dataset for analysis

In [32]:
import pandas as pd
import numpy as np
from nltk import agreement

In [33]:
# 1.import files with raters alongside


filenames = [
    "Credit",
    "Direct_payment",
    "Fine",
    "Supplies",
    "Tax_deduction",
    "Technical_assistance"
]

raters = [
    "Rater_1",
    "Rater_2",
    "Rater_3"
]

df_rater=pd.DataFrame()
df_all_raters=pd.DataFrame()

# Downloading the csv files in /data
for filename in filenames:

    df_rater=pd.DataFrame()
    
    for rater in raters:
        df = pd.read_excel('data/' + rater + '/Unique_sentence_IDs_' + filename + '.xlsx', skiprows=1, index_col ="Sentence_Id")
        df2=df.rename(columns = {"Is_policy" : rater + '_policy_tag' , "Is_incentive" : rater + '_incentive_tag',\
                                 "Other_instrument" : rater + '_other_instrument' , "original_rank" : rater + '_original_rank',\
                                "Similarity _score" : rater + '_similarity_score', "Sentence" : rater + '_sentence'}) 
        df3=df2.drop(["Comments", "Unnamed: 8"], axis = 1)


        df_rater = pd.concat([df_rater, df3],axis=1)
        df_rater["Tag"]= filename
        
    df_all_raters = pd.concat([df_all_raters, df_rater],axis=0)   

 


    


In [34]:

df_all_raters.drop(["Rater_2_original_rank","Rater_2_similarity_score", "Rater_2_sentence",\
                    "Rater_3_original_rank","Rater_3_similarity_score", "Rater_3_sentence"], axis = 1, inplace=True)

In [35]:
#remove NaN rows 
df_all_raters.dropna(axis=0, thresh=5, subset=None, inplace=True)


In [36]:
#summarise tags - valid, positive and alternative
df_all_raters["Valid_policy_tags"]=df_all_raters[["Rater_1_policy_tag","Rater_2_policy_tag","Rater_3_policy_tag"]].count(1)
df_all_raters["Valid_incentive_tags"]=df_all_raters[["Rater_1_incentive_tag","Rater_2_incentive_tag","Rater_3_incentive_tag"]].count(1)
df_all_raters["Pos_policy_tags"]=df_all_raters[["Rater_1_policy_tag","Rater_2_policy_tag","Rater_3_policy_tag"]].sum(1)
df_all_raters["Pos_incentive_tags"]=df_all_raters[["Rater_1_incentive_tag","Rater_2_incentive_tag","Rater_3_incentive_tag"]].sum(1)
df_all_raters["Alternative_tags"]=df_all_raters[["Rater_1_other_instrument","Rater_2_other_instrument","Rater_3_other_instrument"]].count(1)


In [37]:
# summarise across all raters - any or majority
df_all_raters["Incentive_tag_majority"] = np.where((df_all_raters["Pos_incentive_tags"]/df_all_raters["Valid_incentive_tags"])>=0.5, 1, 0)
df_all_raters["Incentive_tag_any"] = np.where((df_all_raters["Pos_incentive_tags"])>=1, 1, 0) 
df_all_raters["No_incentive_tag"] = np.where((df_all_raters["Pos_incentive_tags"])==0, 1, 0) 
df_all_raters["Policy_tag_majority"] = np.where((df_all_raters["Pos_policy_tags"]/df_all_raters["Valid_policy_tags"])>=0.5, 1, 0)
df_all_raters["Policy_tag_any"] = np.where((df_all_raters["Pos_policy_tags"])>=1, 1, 0) 
df_all_raters["No_policy_tag"] = np.where((df_all_raters["Pos_policy_tags"])==0, 1, 0) 
df_all_raters["Incentive_uncategorised"] = df_all_raters["Incentive_tag_any"]-df_all_raters["Policy_tag_any"] 


In [38]:
#split the document part from the reference 
df_doc=df_all_raters.reset_index()
refs=df_doc['index'].str.split('_', n=1, expand=True)
df_doc['doc_ref']=refs[0]
df_doc['sentence_ref']=refs[1]



In [39]:
#groupby by tag
df_grouped_inst=df_doc.groupby('Tag').count()['doc_ref']
df_grouped_inst

Tag
Credit                  541
Direct_payment          512
Fine                    691
Supplies                447
Tax_deduction           450
Technical_assistance    448
Name: doc_ref, dtype: int64

In [40]:
#groupby by tag - transposed
df_grouped=df_doc.groupby('Tag').sum()[["Incentive_tag_any","Policy_tag_any","Policy_tag_majority","Incentive_uncategorised"]]
df_grouped


Unnamed: 0_level_0,Incentive_tag_any,Policy_tag_any,Policy_tag_majority,Incentive_uncategorised
Tag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Credit,302,203,191,99
Direct_payment,333,193,178,140
Fine,424,402,362,22
Supplies,147,32,24,115
Tax_deduction,186,39,31,147
Technical_assistance,94,84,68,10


In [41]:
#groupby by tag - unique docs
df_grouped_doc=df_doc.groupby('Tag').doc_ref.nunique()
df_grouped_doc

Tag
Credit                  189
Direct_payment          207
Fine                    252
Supplies                189
Tax_deduction           207
Technical_assistance    198
Name: doc_ref, dtype: int64

In [42]:
#count unique docs overall
df_udoc=df_doc.doc_ref.nunique()
df_udoc

633

In [43]:
#count instances overall
df_inst=df_doc.count()['doc_ref']
df_inst

3089

In [44]:
##summarise by tag
df_summary=df_doc.sum()[["Incentive_tag_any","Policy_tag_any","Policy_tag_majority","Incentive_uncategorised"]]
df_summary


Incentive_tag_any          1486
Policy_tag_any              953
Policy_tag_majority         854
Incentive_uncategorised     533
dtype: object

In [45]:
# summarise by unique docs -
df_any_inc_udoc=df_doc.groupby(by="Incentive_tag_any").doc_ref.nunique()
print("Unique_docs_Incentive_tag_any")
print(df_any_inc_udoc)
df_any_tags_udoc=df_doc.groupby(by="Policy_tag_any").doc_ref.nunique()
print("Unique_docs_policy_tag_any")
print(df_any_tags_udoc)
df_maj_tags_udoc=df_doc.groupby(by="Policy_tag_majority").doc_ref.nunique()
print("Unique_docs_policy_tag_maj")
print(df_maj_tags_udoc)
df_inc_uncateg=df_doc.groupby(by="Incentive_uncategorised").doc_ref.nunique()
print("Unique_docs_incentive_uncategorised")
print(df_inc_uncateg)

Unique_docs_Incentive_tag_any
Incentive_tag_any
0    513
1    391
Name: doc_ref, dtype: int64
Unique_docs_policy_tag_any
Policy_tag_any
0    558
1    341
Name: doc_ref, dtype: int64
Unique_docs_policy_tag_maj
Policy_tag_majority
0    563
1    320
Name: doc_ref, dtype: int64
Unique_docs_incentive_uncategorised
Incentive_uncategorised
-1      5
 0    612
 1    195
Name: doc_ref, dtype: int64


In [46]:
# summarise by unique docs -
df_grouped_udoc=df_doc.groupby(by="Policy_tag_any").doc_ref.nunique()
df_grouped_udoc

Policy_tag_any
0    558
1    341
Name: doc_ref, dtype: int64

In [47]:
#2. Work on the inter-rater reliability What is the similarity in tagging incentives. base - work where instances were tagged by all three raters

#get stats - how many shared sentences, how many each rater has added - start from initial dataset


df_grouped_raters=df_all_raters.groupby('Tag',).sum()[["Rater_1_policy_tag","Rater_2_policy_tag","Rater_3_policy_tag"]]




In [48]:
# transpose for table
df_grouped_raters=df_grouped_raters.transpose()
df_grouped_raters

Tag,Credit,Direct_payment,Fine,Supplies,Tax_deduction,Technical_assistance
Rater_1_policy_tag,54.0,16.0,30.0,15.0,14.0,21.0
Rater_2_policy_tag,164.0,167.0,196.0,21.0,9.0,12.0
Rater_3_policy_tag,135.0,136.0,268.0,17.0,29.0,69.0


In [49]:
# next - shared sentences across tags, shared documents across tags, average instances per doc, max instances per doc

#groupby by tag
uni_tag_by_doc=df_doc.groupby('doc_ref').Tag.nunique()
uni_tags_df=uni_tag_by_doc.to_frame().reset_index()

sum_uniq_tag=uni_tags_df.groupby('Tag').doc_ref.nunique()

uni_tags_df
sum_uniq_tag



Tag
1    327
2    144
3     76
4     46
5     25
6     15
Name: doc_ref, dtype: int64

In [50]:
#tags per sentence

tag_by_sent=df_doc.groupby('index').Tag.nunique()
tag_by_sent_df=tag_by_sent.to_frame().reset_index()
tag_by_sent_df

sum_uniq_tag_sent=tag_by_sent_df.groupby('Tag').index.nunique()
sum_uniq_tag_sent



Tag
1    2274
2     272
3      53
4      28
Name: index, dtype: int64

In [51]:
#start with df_all_raters
df_inter=df_all_raters.drop(["Rater_1_other_instrument","Rater_2_other_instrument", "Rater_3_other_instrument"], axis = 1)


In [52]:
df_inter_trim=df_inter.dropna(axis=0)


In [53]:
coder1=df_inter_trim["Rater_1_incentive_tag"].to_list()
coder2=df_inter_trim["Rater_2_incentive_tag"].to_list()
coder3=df_inter_trim["Rater_3_incentive_tag"].to_list()


In [54]:
#Apply inter-rater reliability on y/n tag 

 
formatted_codes = [[1,i,coder1[i]] for i in range(len(coder1))] + [[2,i,coder2[i]] for i in range(len(coder2))]  + [[3,i,coder3[i]] for i in range(len(coder3))]
    
ratingtask = agreement.AnnotationTask(data=formatted_codes)
print(' Fleiss\'s Kappa:',ratingtask.multi_kappa())

 Fleiss's Kappa: 0.4950915616545709


In [55]:
coder1_tag=df_inter_trim["Rater_1_policy_tag"].to_list()
coder2_tag=df_inter_trim["Rater_2_policy_tag"].to_list()
coder3_tag=df_inter_trim["Rater_3_policy_tag"].to_list()


In [56]:
#Apply inter-rater reliability on policy tag

  
formatted_codes = [[1,i,coder1_tag[i]] for i in range(len(coder1_tag))] + [[2,i,coder2_tag[i]] for i in range(len(coder2_tag))]  + [[3,i,coder3_tag[i]] for i in range(len(coder3_tag))]
    
ratingtask = agreement.AnnotationTask(data=formatted_codes)
print(' Fleiss\'s Kappa:',ratingtask.multi_kappa())

 Fleiss's Kappa: 0.5627594715810887
