In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
filename = 'annotate.csv'
df = pd.read_csv(filename)

In [7]:
def cohen_kappa(ann1, ann2):
    """Computes Cohen kappa for pair-wise annotators.
    :param ann1: annotations provided by first annotator
    :type ann1: list
    :param ann2: annotations provided by second annotator
    :type ann2: list
    :rtype: float
    :return: Cohen kappa statistic
    """
    count = 0
    for an1, an2 in zip(ann1, ann2):
        if an1 == an2:
            count += 1
    A = count / len(ann1)  # observed agreement A (Po)

    uniq = set(ann1 + ann2)
    E = 0  # expected agreement E (Pe)
    for item in uniq:
        cnt1 = ann1.count(item)
        cnt2 = ann2.count(item)
        count = ((cnt1 / len(ann1)) * (cnt2 / len(ann2)))
        E += count

    return round((A - E) / (1 - E), 4)

In [8]:
def fleiss_kappa(M):
    """Computes Fleiss' kappa for group of annotators.
    :param M: a matrix of shape (:attr:'N', :attr:'k') with 'N' = number of subjects and 'k' = the number of categories.
        'M[i, j]' represent the number of raters who assigned the 'i'th subject to the 'j'th category.
    :type: numpy matrix
    :rtype: float
    :return: Fleiss' kappa score
    """
    N, k = M.shape  # N is # of items, k is # of categories
    n_annotators = float(np.sum(M[0, :]))  # # of annotators
    tot_annotations = N * n_annotators  # the total # of annotations
    category_sum = np.sum(M, axis=0)  # the sum of each category over all items

    # chance agreement
    p = category_sum / tot_annotations  # the distribution of each category over all annotations
    PbarE = np.sum(p * p)  # average chance agreement over all categories

    # observed agreement
    P = (np.sum(M * M, axis=1) - n_annotators) / (n_annotators * (n_annotators - 1))
    Pbar = np.sum(P) / N  # add all observed agreement chances per item and divide by amount of items

    return round((Pbar - PbarE) / (1 - PbarE), 4)

In [9]:
df['afinn_Sentiment_Type']=''

df.loc[df.affin_polarity > 0, 'afinn_Sentiment_Type'] = 'POSITIVE'
df.loc[df.affin_polarity == 0, 'afinn_Sentiment_Type'] = 'NEUTRAL'
df.loc[df.affin_polarity < 0, 'afinn_Sentiment_Type'] = 'NEGATIVE'
df

Unnamed: 0,Datetime,Tweet Id,Text,Username,lang,likeCount,retweetCount,quoteCount,hashtags,place,url,affin_polarity,Vader_Sentiment_Type,Blob_Sentiment_Type,annotator:Qianyi,annotator:Aaron,Cleaned_Text,afinn_Sentiment_Type
0,2022-07-28 11:26:58+00:00,1.550000e+18,@91Grigor91 @johhann79 @caitoz @IAPonomarenko ...,cocotrolololo8,en,0,0,0,,,https://twitter.com/cocotrolololo8/status/1552...,0,POSITIVE,POSITIVE,W,W,91grigor91 johhann79 caitoz iaponomarenko the ...,NEUTRAL
1,2022-07-30 05:59:45+00:00,1.550000e+18,@atypica19508822 @ashleeofjupiter It was a pri...,sassafrasass,en,3,0,0,,,https://twitter.com/sassafrasass/status/155325...,1,POSITIVE,POSITIVE,W,W,atypica19508822 ashleeofjupiter it was a privi...,POSITIVE
2,2022-07-30 02:55:22+00:00,1.550000e+18,@MehdiHasanShow @RachelBitecofer The daft far ...,AyissaBeach,en,0,0,0,,,https://twitter.com/AyissaBeach/status/1553212...,-3,NEGATIVE,POSITIVE,N,N,mehdihasanshow rachelbitecofer the daft far rt...,NEGATIVE
3,2022-07-30 05:55:11+00:00,1.550000e+18,@StuffyWings4 @Breaking911 @VP It’s virtue sig...,ThePostalDude50,en,0,0,0,,,https://twitter.com/ThePostalDude50/status/155...,1,POSITIVE,NEGATIVE,P,P,mehdihasanshow rachelbitecofer the daft far rt...,POSITIVE
4,2022-07-30 00:36:10+00:00,1.550000e+18,New pod being recorded tonight with video be s...,JointPainPod,en,0,0,0,"['shitpost', 'LGBT', 'podcasting', 'comedy']",,https://twitter.com/JointPainPod/status/155317...,1,POSITIVE,POSITIVE,N,W,new pod being recorded tonight with video be s...,POSITIVE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2022-07-30 23:12:16+00:00,1.550000e+18,@Emeraid_Gem @harleysuks @MrBeast No we don’t ...,kay87674832,en,0,0,0,,,https://twitter.com/kay87674832/status/1553518...,-5,NEGATIVE,NEGATIVE,N,N,emeraid_gem harleysuks mrbeast no we don’t it’...,NEGATIVE
996,2022-07-30 14:56:14+00:00,1.550000e+18,@LGBT_Yenta Yes…yes it is…absolutely horrid…,StevensEspresso,en,1,0,0,,,https://twitter.com/StevensEspresso/status/155...,0,NEUTRAL,NEUTRAL,W,N,lgbt_yenta yes…yes it is…absolutely horrid…,NEUTRAL
997,2022-07-28 14:35:22+00:00,1.550000e+18,"Like, both Stonewall and Mermaids have actuall...",Z_R1Nzoe,en,5,2,0,,,https://twitter.com/Z_R1Nzoe/status/1552664011...,0,POSITIVE,NEUTRAL,W,P,"like, both stonewall and mermaids have actuall...",NEUTRAL
998,2022-07-29 00:27:24+00:00,1.550000e+18,Where the fu&amp;&amp; is LGBT??? https://t.co...,JpmChung,en,0,0,0,,,https://twitter.com/JpmChung/status/1552813002...,0,NEUTRAL,NEUTRAL,W,W,where the fu&amp;&amp; is lgbt,NEUTRAL


In [10]:
blob = df['Blob_Sentiment_Type'].tolist()
vader = df['Vader_Sentiment_Type'].tolist()
afinn = df['afinn_Sentiment_Type'].tolist()

In [11]:
# convert manual labels to POSITIVE, NEUTRAL and NEGATIVE

df['a1']=''
df.loc[df['annotator:Qianyi'] == 'P','a1'] ='POSITIVE'
df.loc[df['annotator:Qianyi'] == 'W','a1'] ='NEUTRAL'
df.loc[df['annotator:Qianyi'] == 'N','a1'] ='NEGATIVE'


df['a2']=''
df.loc[df['annotator:Aaron'] == 'P','a2'] ='POSITIVE'
df.loc[df['annotator:Aaron'] == 'W','a2'] ='NEUTRAL'
df.loc[df['annotator:Aaron'] == 'N','a2'] ='NEGATIVE'

df

Unnamed: 0,Datetime,Tweet Id,Text,Username,lang,likeCount,retweetCount,quoteCount,hashtags,place,url,affin_polarity,Vader_Sentiment_Type,Blob_Sentiment_Type,annotator:Qianyi,annotator:Aaron,Cleaned_Text,afinn_Sentiment_Type,a1,a2
0,2022-07-28 11:26:58+00:00,1.550000e+18,@91Grigor91 @johhann79 @caitoz @IAPonomarenko ...,cocotrolololo8,en,0,0,0,,,https://twitter.com/cocotrolololo8/status/1552...,0,POSITIVE,POSITIVE,W,W,91grigor91 johhann79 caitoz iaponomarenko the ...,NEUTRAL,NEUTRAL,NEUTRAL
1,2022-07-30 05:59:45+00:00,1.550000e+18,@atypica19508822 @ashleeofjupiter It was a pri...,sassafrasass,en,3,0,0,,,https://twitter.com/sassafrasass/status/155325...,1,POSITIVE,POSITIVE,W,W,atypica19508822 ashleeofjupiter it was a privi...,POSITIVE,NEUTRAL,NEUTRAL
2,2022-07-30 02:55:22+00:00,1.550000e+18,@MehdiHasanShow @RachelBitecofer The daft far ...,AyissaBeach,en,0,0,0,,,https://twitter.com/AyissaBeach/status/1553212...,-3,NEGATIVE,POSITIVE,N,N,mehdihasanshow rachelbitecofer the daft far rt...,NEGATIVE,NEGATIVE,NEGATIVE
3,2022-07-30 05:55:11+00:00,1.550000e+18,@StuffyWings4 @Breaking911 @VP It’s virtue sig...,ThePostalDude50,en,0,0,0,,,https://twitter.com/ThePostalDude50/status/155...,1,POSITIVE,NEGATIVE,P,P,mehdihasanshow rachelbitecofer the daft far rt...,POSITIVE,POSITIVE,POSITIVE
4,2022-07-30 00:36:10+00:00,1.550000e+18,New pod being recorded tonight with video be s...,JointPainPod,en,0,0,0,"['shitpost', 'LGBT', 'podcasting', 'comedy']",,https://twitter.com/JointPainPod/status/155317...,1,POSITIVE,POSITIVE,N,W,new pod being recorded tonight with video be s...,POSITIVE,NEGATIVE,NEUTRAL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2022-07-30 23:12:16+00:00,1.550000e+18,@Emeraid_Gem @harleysuks @MrBeast No we don’t ...,kay87674832,en,0,0,0,,,https://twitter.com/kay87674832/status/1553518...,-5,NEGATIVE,NEGATIVE,N,N,emeraid_gem harleysuks mrbeast no we don’t it’...,NEGATIVE,NEGATIVE,NEGATIVE
996,2022-07-30 14:56:14+00:00,1.550000e+18,@LGBT_Yenta Yes…yes it is…absolutely horrid…,StevensEspresso,en,1,0,0,,,https://twitter.com/StevensEspresso/status/155...,0,NEUTRAL,NEUTRAL,W,N,lgbt_yenta yes…yes it is…absolutely horrid…,NEUTRAL,NEUTRAL,NEGATIVE
997,2022-07-28 14:35:22+00:00,1.550000e+18,"Like, both Stonewall and Mermaids have actuall...",Z_R1Nzoe,en,5,2,0,,,https://twitter.com/Z_R1Nzoe/status/1552664011...,0,POSITIVE,NEUTRAL,W,P,"like, both stonewall and mermaids have actuall...",NEUTRAL,NEUTRAL,POSITIVE
998,2022-07-29 00:27:24+00:00,1.550000e+18,Where the fu&amp;&amp; is LGBT??? https://t.co...,JpmChung,en,0,0,0,,,https://twitter.com/JpmChung/status/1552813002...,0,NEUTRAL,NEUTRAL,W,W,where the fu&amp;&amp; is lgbt,NEUTRAL,NEUTRAL,NEUTRAL


In [12]:
annotator1 = df['a1'].tolist()
annotator1

['NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEUTRAL',
 'POSITIVE',
 'NEUTRAL',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'POSITIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'NEGATIVE',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEGATIVE',
 'NEUTRAL',
 'NEGATIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEGATIVE',
 'NEGATIVE',


In [13]:
annotator2 = df['a2'].tolist()
annotator2

['NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEUTRAL',
 'POSITIVE',
 'NEUTRAL',
 'POSITIVE',
 'NEUTRAL',
 'NEGATIVE',
 'NEUTRAL',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEUTRAL',
 'POSITIVE',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'POSITIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'NEGATIVE',
 'NEUTRAL',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEUTRAL',
 'POSITIVE',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEUTRAL',
 'NEGATIVE',
 'NEUTRAL',
 'NEUTRAL',
 'NEUTRAL',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 

In [14]:
manual_result = cohen_kappa(annotator1, annotator2)
manual_result

0.8152