In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
filepaths = [f for f in os.listdir(".") if f.endswith('.csv')]
df = pd.DataFrame() # empty
# Gather everything into a single dataframe
for file in filepaths:
    if df.empty:
        df = pd.read_csv(file)
        df = df[~df['annotation'].isnull()] # remove null annotations
        df = df.rename(columns={'annotation': file[:-4]})
        df = df.drop(columns=['comments'], errors='ignore')
    else:
        new_csv = pd.read_csv(file)[['ID', 'annotation']]
        new_csv = new_csv[~new_csv['annotation'].isnull()] # remove null annotations
        new_csv = new_csv.rename(columns={'annotation': file[:-4]})
        df = pd.merge(df, new_csv, how='inner', on='ID')
df

Unnamed: 0,ID,tweet,samba,zsofia,florian,ivo,joely,maiwenn
0,15430,"If its the news, it must be true? No, its not....",0.0,0.0,0.0,0.0,0.0,0.0
1,8514,Covid is going to ruin Christmas this year,0.0,0.0,0.0,0.0,0.0,0.0
2,275,today is a big day just went over 300 twitter...,1.0,1.0,0.0,0.0,0.0,0.0
3,11283,my family is being so supportive today and ma...,1.0,1.0,0.0,1.0,0.0,0.0
4,16276,@pamparoni If they aren’t out yet just wait to...,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
95,3477,so glad i have so many friends and i'm not on...,1.0,1.0,0.0,1.0,0.0,0.0
96,15535,try to provide evidence on something on faceb...,1.0,1.0,1.0,1.0,1.0,0.0
97,19656,They been ON US since the layover lol,1.0,0.0,0.0,0.0,0.0,0.0
98,10670,@sonofsama1 @Santandave1 It ain’t funny it’s h...,0.0,0.0,0.0,0.0,0.0,0.0


In [3]:
print("How many tweets were marked as sarcastic by this annotator?")
for column in df.columns[2:]:
    sarcastic = df[column].sum()
    print(f'  {column}: {sarcastic} ({sarcastic / df.shape[0] * 100.0 :.2f} %)')

How many tweets were marked as sarcastic by this annotator?
  samba: 58.0 (58.00 %)
  zsofia: 29.0 (29.00 %)
  florian: 14.0 (14.00 %)
  ivo: 31.0 (31.00 %)
  joely: 22.0 (22.00 %)
  maiwenn: 11.0 (11.00 %)


In [4]:
agreement = df[df.nunique(1).eq(3)] # 3 because the 3 different values on each line should be: id, tweet, and only one value that's the same for each annotator

agreement_sarc = agreement.loc[agreement[filepaths[0][:-4]] == 1.0]
agreement_not_sarc = agreement.loc[agreement[filepaths[0][:-4]] == 0.0]

In [5]:
print(f"All annotators agreed that these {agreement_sarc.shape[0]} tweets are sarcastic:")
# Prints the IDs
print(', '.join(map(str, agreement_sarc['ID'])))
# Prints the tweets
# print('\n-----\n'.join(map(str, agreement_sarc['tweet'])))

All annotators agreed that these 8 tweets are sarcastic:
8189, 2387, 1677, 13865, 19625, 2680, 5231, 18286


In [6]:
print(f"All annotators agreed that these {agreement_not_sarc.shape[0]} tweets are not sarcastic:")
# Prints the IDs
print(', '.join(map(str, agreement_not_sarc['ID'])))
# Prints the tweets
# print('\n-----\n'.join(map(str, agreement_not_sarc['tweet'])))

All annotators agreed that these 37 tweets are not sarcastic:
15430, 8514, 16276, 3577, 1211, 4046, 14286, 10210, 4982, 10404, 19171, 16097, 13447, 11576, 369, 16224, 15559, 11744, 17484, 6788, 1553, 15759, 13722, 13491, 8957, 5370, 804, 12163, 16929, 8789, 4175, 7573, 10995, 14195, 11720, 10670, 7036


In [7]:
print("Annotations where Maïwenn and Florian disagree, and Maïwenn annotated as not-sarc.:")
print(df.loc[(df['maiwenn'] != df['florian']) & (df['maiwenn'] == 0.0)])

Annotations where Maïwenn and Florian disagree, and Maïwenn annotated as not-sarc.:
       ID                                              tweet  samba  zsofia  \
13   9700  I see windyspoons have taken all protective ba...    1.0     1.0   
23  17443  At least half of my days are still bad days an...    1.0     1.0   
36  18012  School psych life if fun because you can feel ...    1.0     0.0   
46   6478   ahh the sweet sound of ' zero cares given ' b...    1.0     1.0   
96  15535   try to provide evidence on something on faceb...    1.0     1.0   

    florian  ivo  joely  maiwenn  
13      1.0  1.0    1.0      0.0  
23      1.0  1.0    1.0      0.0  
36      1.0  1.0    1.0      0.0  
46      1.0  1.0    0.0      0.0  
96      1.0  1.0    1.0      0.0  


## Fleiss' Kappa (Inter-annotator agreement)

In [19]:
agreement = df.copy()
agreement['sarcastic'] = agreement['samba'] + agreement['zsofia'] + agreement['florian'] + agreement['ivo'] + agreement['joely'] + agreement['maiwenn']
agreement['not-sarcastic'] = 6.0 - agreement['sarcastic']
agreement = agreement.drop(columns=['ID', 'tweet', 'samba', 'zsofia', 'florian', 'ivo', 'joely', 'maiwenn'])
agreement

Unnamed: 0,sarcastic,not-sarcastic
0,0.0,6.0
1,0.0,6.0
2,2.0,4.0
3,3.0,3.0
4,0.0,6.0
...,...,...
95,3.0,3.0
96,5.0,1.0
97,1.0,5.0
98,0.0,6.0


Let $N$ be the total number of subjects.
Let $n$ be the number of ratings per subject.
Let $k$ be the number of categories into which assignments are made.

In [20]:
N = agreement.shape[0]
n = 6 # 6 annotators
k = 2

First calculate $p_j$, the proportion of all assignments which were to the $j$-th category:

$$
p_{j} = \frac{1}{N n} \sum_{i=1}^N n_{i j},\quad\quad 1 = \sum_{j=1}^k p_{j}
$$

In [21]:
pj = agreement.sum() / (N*n)
pj

sarcastic        0.275
not-sarcastic    0.725
dtype: float64

Now calculate $P_{i}$, the extent to which raters agree for the $i$-th subject (i.e., compute how many rater-rater pairs are in agreement, relative to the number of all possible rater-rater pairs):

$$
\begin{align}
 P_i &= \frac{1}{n(n - 1)} \sum_{j=1}^k n_{i j} (n_{i j} - 1) \\
 &= \frac{1}{n(n - 1)} \sum_{j=1}^k (n_{i j}^2 - n_{i j}) \\
 &= \frac{1}{n(n - 1)} \biggl[ \sum_{j=1}^k \bigl(n_{i j}^2 \bigr) - n\biggr]
\end{align}
$$

In [22]:
nij2 = np.square(agreement) # square all the results in the dataframe
Pi = (1/(n*(n-1))) * (nij2.sum(axis=1)-n)
Pi

0     1.000000
1     1.000000
2     0.466667
3     0.400000
4     1.000000
        ...   
95    0.400000
96    0.666667
97    0.666667
98    1.000000
99    1.000000
Length: 100, dtype: float64

Now compute $\bar{P}$, the mean of the $P_i$'s, and $\bar{P_e}$, which go into the formula for $\kappa$:
$$
\bar{P} = \frac{1}{N} \sum_{i=1}^N P_{i}
$$

$$
\bar{P_e} = \sum_{j=1}^k p_j^2
$$

In [23]:
P_bar = (1/N) * Pi.sum()
P_bar

0.7660000000000001

In [24]:
Pe_bar = np.square(pj).sum()
Pe_bar

0.6012500000000001

Now compute $\kappa$ :

$$
\kappa = \frac{\bar{P} - \bar{P_e}}{1 - \bar{P_e}}
$$

In [25]:
kappa = (P_bar - Pe_bar) / (1 - Pe_bar)
kappa

0.4131661442006272

Landis & Koch (1977) provide this table as a basis to interpret $\kappa$ values for a 2-class 2-annotators example. Its use here is a bit of a stretch, but provides a good idea for our inter-annotator agreement:

| $\kappa$      | Interpretation           |
|-------------|--------------------------|
| < 0         | Poor agreement           |
| 0.01 – 0.20 | Slight agreement         |
| 0.21 – 0.40 | Fair agreement           |
| **0.41 – 0.60** | **Moderate agreement**       |
| 0.61 – 0.80 | Substantial agreement    |
| 0.81 – 1.00 | Almost perfect agreement |