# The code calculates inter-annotator agreement (henceforth, IAA) using Krippendorff's Alpha (2019).
### In the Appendix section, there are confusion matrices for each pair of annotators, and IAA is also calculated using Kendall's W, and

In [1]:
# install requirement
!pip install krippendorff

Collecting krippendorff
  Downloading krippendorff-0.8.0-py3-none-any.whl.metadata (2.8 kB)
Downloading krippendorff-0.8.0-py3-none-any.whl (18 kB)
Installing collected packages: krippendorff
Successfully installed krippendorff-0.8.0


In [2]:
# import packages
import os
import pandas as pd
from nltk.metrics import ConfusionMatrix
import numpy as np
import krippendorff
from itertools import combinations

Mounting my Google Drive, to have access to the datasets.

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [4]:
filepaths = [f for f in os.listdir("drive/MyDrive/ColabNotebooks/Sarcasm/Datasets/") if f.endswith('.csv')]
filepaths

['Joely_311024.csv',
 'florian_30102024.csv',
 'ivo_30102024.csv',
 'zsofia_30102024.csv',
 'samba_28102024.csv',
 'maiwenn_27102024.csv']

In [5]:
# gather independent annotator data into single dataframe
df = pd.DataFrame() # empty
for file in filepaths:
    if df.empty:
        df = pd.read_csv("drive/MyDrive/ColabNotebooks/Sarcasm/Datasets/" + file)
        df = df[~df['annotation'].isnull()] # remove null annotations
        df = df.rename(columns={'annotation': file[:-4]})
        df = df.drop(columns=['observations'], errors='ignore')
    else:
        new_csv = pd.read_csv("drive/MyDrive/ColabNotebooks/Sarcasm/Datasets/" + file)[['ID', 'annotation']]
        new_csv = new_csv[~new_csv['annotation'].isnull()] # remove null annotations
        new_csv = new_csv.rename(columns={'annotation': file[:-4]})
        df = pd.merge(df, new_csv, how='inner', on='ID')
df

Unnamed: 0,ID,tweet,Joely_311024,florian_30102024,ivo_30102024,zsofia_30102024,samba_28102024,maiwenn_27102024
0,15430,"If its the news, it must be true? No, its not....",0.75,0,0.00,0.25,0.00,025
1,8514,Covid is going to ruin Christmas this year,0.00,0,0.00,0.00,0.25,0
2,275,today is a big day just went over 300 twitter ...,0.75,0,0.25,0.75,0.50,075
3,11283,my family is being so supportive today and mak...,0.00,0,0.75,1.00,0.50,025
4,16276,@pamparoni If they aren’t out yet just wait to...,0.00,0,0.00,0.00,0.00,05
...,...,...,...,...,...,...,...,...
994,4992,"We are an evolved people.. seriously, can we f...",1.00,05,0.00,0.25,0.00,05
995,18233,i feel loved 0 message,1.00,05,1.00,1.00,0.75,1
996,11964,thanks pendot for all the lines being painted ...,0.00,0,0.50,1.00,0.25,05
997,8156,essays and television and twitter and drinking...,0.75,0,0.75,1.00,0.75,075


In [6]:
# replace comma seperated decimals with a period
df['maiwenn_27102024'] = df['maiwenn_27102024'].replace(',','.',regex=True)
df['florian_30102024'] = df['florian_30102024'].replace(',','.',regex=True)
# convert data type
df['maiwenn_27102024'] = df['maiwenn_27102024'].astype(float)
df['florian_30102024'] = df['florian_30102024'].astype(float)
df['zsofia_30102024'] = df['zsofia_30102024'].astype(float)
df['ivo_30102024'] = df['ivo_30102024'].astype(float)
df['samba_28102024'] = df['samba_28102024'].astype(float)

In [7]:
# print the value counts of each ordinal value for each annotator
for column in df.columns[2:]:
    ratings = df[column].value_counts()
    print(pd.DataFrame(ratings))

              count
Joely_311024       
0.00            505
1.00            183
0.75            122
0.50            114
0.25             75
                  count
florian_30102024       
0.00                728
1.00                100
0.50                 78
0.75                 60
0.25                 33
              count
ivo_30102024       
0.00            573
1.00            161
0.50            119
0.25             78
0.75             68
                 count
zsofia_30102024       
1.00               275
0.00               263
0.25               242
0.50               124
0.75                95
                count
samba_28102024       
0.00              496
0.25              170
0.75              141
1.00               99
0.50               93
                  count
maiwenn_27102024       
0.00                518
1.00                186
0.25                110
0.75                 93
0.50                 92


In [8]:
# keep only the columns that we need
df_ratings = df.iloc[:, 2:8]
df_ratings

Unnamed: 0,Joely_311024,florian_30102024,ivo_30102024,zsofia_30102024,samba_28102024,maiwenn_27102024
0,0.75,0.0,0.00,0.25,0.00,0.25
1,0.00,0.0,0.00,0.00,0.25,0.00
2,0.75,0.0,0.25,0.75,0.50,0.75
3,0.00,0.0,0.75,1.00,0.50,0.25
4,0.00,0.0,0.00,0.00,0.00,0.50
...,...,...,...,...,...,...
994,1.00,0.5,0.00,0.25,0.00,0.50
995,1.00,0.5,1.00,1.00,0.75,1.00
996,0.00,0.0,0.50,1.00,0.25,0.50
997,0.75,0.0,0.75,1.00,0.75,0.75


In [9]:
# write a function to generate all possible combinations of an array
def combs(a):
    if len(a) == 0:
        return [[]]
    cs = []
    for c in combs(a[1:]):
        cs += [c, c+[a[0]]]
    return cs

In [10]:
# generate all possible combination of these annotators
possible_combinations = combs(['samba_28102024', 'ivo_30102024', 'zsofia_30102024', 'maiwenn_27102024', 'florian_30102024', 'Joely_311024'])

In [11]:
# filter the empty sets, and the single annotator sets
filtered_combinations = []

for x in possible_combinations:
    if len(x) >= 2:
        filtered_combinations.append(x)

# store the agreements for all these possible combinations of annotators in a dictionary
alphas = {}

for combination in filtered_combinations:
    this_combo = pd.DataFrame(df_ratings.loc[:, combination])
    this_combo = this_combo.T.to_numpy()
    alpha = krippendorff.alpha(reliability_data=this_combo, level_of_measurement="interval")
    alphas[' '.join(map(lambda x: x[0], combination))] = alpha.round(2)

# return the sorted by ascending value dictionary
sorted_alphas = dict(sorted(alphas.items(), key = lambda item: item[1]))
sorted_alphas

{'f z': 0.35,
 'J s': 0.46,
 'J f z': 0.46,
 'f z s': 0.47,
 'f m z': 0.48,
 'f z i': 0.49,
 'J z': 0.49,
 'J z s': 0.49,
 'J f z s': 0.49,
 'J i': 0.5,
 'J z i': 0.5,
 'J f z i': 0.5,
 'J f m z': 0.5,
 'z s': 0.51,
 'J f': 0.51,
 'J f s': 0.51,
 'z i': 0.52,
 'J z i s': 0.52,
 'J m z': 0.52,
 'J f z i s': 0.52,
 'J f m z s': 0.52,
 'm z': 0.53,
 'f z i s': 0.53,
 'f m': 0.53,
 'f m z s': 0.53,
 'J i s': 0.53,
 'J m': 0.53,
 'J m z s': 0.53,
 'J f i': 0.53,
 'J f m': 0.53,
 'J f m z i': 0.53,
 'f m z i': 0.54,
 'J m z i': 0.54,
 'J f i s': 0.54,
 'J f m s': 0.54,
 'J f m z i s': 0.54,
 'f s': 0.55,
 'J m s': 0.55,
 'J m z i s': 0.55,
 'J f m i': 0.55,
 'z i s': 0.56,
 'm z s': 0.56,
 'f m z i s': 0.56,
 'J m i': 0.56,
 'J f m i s': 0.56,
 'm z i': 0.57,
 'J m i s': 0.57,
 'm z i s': 0.58,
 'f m s': 0.58,
 'f i': 0.59,
 'f m i': 0.59,
 'f i s': 0.6,
 'f m i s': 0.61,
 'm s': 0.64,
 'm i': 0.64,
 'i s': 0.65,
 'm i s': 0.65}

# Appendix

## Confusion Matrices

In [12]:
# generate a dictionary of the annotators and their respective ratings
ratings = {}

for column in df_ratings.iloc[:, 0:6]:
    columnSeriesObj = df_ratings[column]
    ratings[column] = list(columnSeriesObj)

In [13]:
# write a function that integrates the combinations tool from itertools
def rSubset(arr, r):
    return list(combinations(arr, r))

subsets = rSubset(ratings.values(), 2)

In [14]:
# generate confusion matrices for all possible pairs of annotators
for x in subsets:
  ref = x[0]
  test = x[1]

  cm = ConfusionMatrix(ref, test)

  key_ref = list(filter(lambda x: ratings[x] == ref, ratings))[0]
  key_test = list(filter(lambda x: ratings[x] == test, ratings))[0]

  print("GOLD: " + key_ref[0:2].upper() + "\t" + "TEST: " + key_test[0:2].upper())
  print(cm)

GOLD: JO	TEST: FL
     |       0       0     |
     |   0   .   0   .   1 |
     |   .   2   .   7   . |
     |   0   5   5   5   0 |
-----+---------------------+
 0.0 |<461>  8  24   8   4 |
0.25 |  62  <3>  9   .   1 |
 0.5 |  83   7 <11> 10   3 |
0.75 |  76   8  19 <13>  6 |
 1.0 |  46   7  15  29 <86>|
-----+---------------------+
(row = reference; col = test)

GOLD: JO	TEST: IV
     |       0       0     |
     |   0   .   0   .   1 |
     |   .   2   .   7   . |
     |   0   5   5   5   0 |
-----+---------------------+
 0.0 |<353> 43  76  16  17 |
0.25 |  48  <9> 10   4   4 |
 0.5 |  72  11 <12>  9  10 |
0.75 |  70   8  10 <14> 20 |
 1.0 |  30   7  11  25<110>|
-----+---------------------+
(row = reference; col = test)

GOLD: JO	TEST: ZS
     |       0       0     |
     |   0   .   0   .   1 |
     |   .   2   .   7   . |
     |   0   5   5   5   0 |
-----+---------------------+
 0.0 |<201>136  73  41  54 |
0.25 |  23 <19> 11  14   8 |
 0.5 |  23  33 <20> 15  23 |
0.75 |  11  35

## Kendall's W

In [15]:
# calculate necessary values for Kendall's W
df_ratings['Sum_of_row'] = df.loc[0:301, ['samba_28102024', 'ivo_30102024', 'zsofia_30102024', 'maiwenn_27102024', 'Joely_311024', 'florian_30102024']].sum(axis = 1)

rates = []
for rate in df_ratings['Sum_of_row']:
    rates.append(rate)
mean_of_ranks = np.mean(rates)

df_ratings.assign(Name='Mean')
df_ratings['Mean'] = mean_of_ranks
df_ratings

df_ratings["Sum_of_squared"] = (df_ratings['Sum_of_row'] - df_ratings['Mean']) ** 2
df_ratings

Unnamed: 0,Joely_311024,florian_30102024,ivo_30102024,zsofia_30102024,samba_28102024,maiwenn_27102024,Sum_of_row,Mean,Sum_of_squared
0,0.75,0.0,0.00,0.25,0.00,0.25,1.25,,
1,0.00,0.0,0.00,0.00,0.25,0.00,0.25,,
2,0.75,0.0,0.25,0.75,0.50,0.75,3.00,,
3,0.00,0.0,0.75,1.00,0.50,0.25,2.50,,
4,0.00,0.0,0.00,0.00,0.00,0.50,0.50,,
...,...,...,...,...,...,...,...,...,...
994,1.00,0.5,0.00,0.25,0.00,0.50,,,
995,1.00,0.5,1.00,1.00,0.75,1.00,,,
996,0.00,0.0,0.50,1.00,0.25,0.50,,,
997,0.75,0.0,0.75,1.00,0.75,0.75,,,


In [16]:
# calculate Kendall's W for all possible combinations of annotators
for combination in filtered_combinations:
    this_combo = pd.DataFrame(df_ratings.loc[:, combination])
    this_combo = this_combo.multiply(4)
    this_combo = this_combo.add(1)
    this_combo['sum'] = this_combo.sum(axis = 1) # this is r[i] in the Kendall's W formula
    sum_of_sum = this_combo["sum"].sum()
    row_count = this_combo.shape[0]
    mean_of_sum = sum_of_sum/row_count # r bar in Kendall's W formula
    this_combo["variance"] = ((this_combo["sum"] - mean_of_sum) ** 2)
    sum_of_squared_deviations = this_combo["variance"].sum() # this is S in the formula
    kendall_w = (12 * sum_of_squared_deviations) / ((len(combination) ** 2) * ((row_count ** 3) - row_count))
    for x in combination:
        print(x[0], end=' ')
    print(kendall_w)

i s 2.180517973665831e-05
z s 2.1539138902995746e-05
z i 2.3332653999392883e-05
z i s 2.0443193264645312e-05
m s 2.263903097401927e-05
m i 2.450389510997213e-05
m i s 2.134436946536956e-05
m z 2.3852320066369785e-05
m z s 2.0818709452567094e-05
m z i 2.1946154932849037e-05
m z i s 2.0234475269169285e-05
f s 1.8574930886039593e-05
f i 2.083076293960525e-05
f i s 1.8790556310039723e-05
f z 2.016726024501335e-05
f z s 1.8259595119019676e-05
f z i 1.9560804118240648e-05
f z i s 1.837447857867075e-05
f m 2.122036339420056e-05
f m s 1.9108265559125238e-05
f m i 2.04411852425932e-05
f m i s 1.908379429638163e-05
f m z 1.9738875513822136e-05
f m z s 1.8555955077647114e-05
f m z i 1.947369444826822e-05
f m z i s 1.8604064715747545e-05
J s 2.039388561381496e-05
J i 2.2529109838864486e-05
J i s 1.9418748606185877e-05
J z 2.3190769169212545e-05
J z s 1.947674831513914e-05
J z i 2.072435383501961e-05
J z i s 1.8957746736126318e-05
J m 2.3708320788968574e-05
J m s 2.0087395853275292e-05
J m i 2.1366