# Inter-Annotator Agreement Calculation

In [1]:
# imports for this project
import gspread
import pandas as pd
import os
from nltk import agreement

In [2]:
# Google Sheets API
gc = gspread.service_account(filename='fignews-7b178eec49aa.json')
SHEET_ID = "1MN0ynKpEweU52-LcIRgJ_oR2-g429evqjX7T21Uz8y8"

In [3]:
spreadsheet = gc.open_by_key(SHEET_ID)

In [4]:
def fetch_annotations(sheet_name, column_index):
    worksheet = spreadsheet.worksheet(sheet_name)
    # Construct the range string to fetch from row 2 to row 201 (200 rows of data, skipping the header)
    range_string = '{}2:{}201'.format(chr(64 + column_index), chr(64 + column_index))  # Constructs A2:A201 if column_index is 9
    cells = worksheet.range(range_string)
    # Return the values of the cells, not the cell objects themselves
    values = [cell.value for cell in cells]
    if any(value == '' for value in values):
        raise ValueError(f"Empty string found in sheet '{sheet_name}' in column {column_index}")
    return values

def calculate_agreement(sheets, column_index):
    try:
        annotators = [fetch_annotations(sheet, column_index) for sheet in sheets]
        taskdata = []
        for annotator_index, ratings in enumerate(annotators):
            taskdata += [[annotator_index, str(i), str(rating)] for i, rating in enumerate(ratings)]
        rating_task = agreement.AnnotationTask(data=taskdata)
        return {
            "Cohen's Kappa": rating_task.kappa(),
            "Fleiss' Kappa": rating_task.multi_kappa(),
            "Krippendorff's Alpha": rating_task.alpha(),
            "Scott's Pi": rating_task.pi()
        }
    except ValueError as e:
        print(e)
        return None




In [5]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-2', 'IAA-3', 'IAA-4']
column_index_1 = 9
column_index_2 = 10
# Calculate agreement for the first task
print("Bias Task Agreement Scores:")
agreement_scores_1 = calculate_agreement(sheets, column_index_1)
if agreement_scores_1:
    for metric, score in agreement_scores_1.items():
        print(f"{metric}: {score}")

Bias Task Agreement Scores:
Cohen's Kappa: 0.433797972797157
Fleiss' Kappa: 0.4321715382743423
Krippendorff's Alpha: 0.4297047067460643
Scott's Pi: 0.42899094542784894


In [6]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-2']
column_index_1 = 9
column_index_2 = 10
# Calculate agreement for the first task
print("Bias Task Agreement Scores:")
agreement_scores_1 = calculate_agreement(sheets, column_index_1)
if agreement_scores_1:
    for metric, score in agreement_scores_1.items():
        print(f"{metric}: {score}")

Bias Task Agreement Scores:
Cohen's Kappa: 0.6529052498080967
Fleiss' Kappa: 0.6529052498080967
Krippendorff's Alpha: 0.6503774602318684
Scott's Pi: 0.649501213265031


In [7]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-3']
column_index_1 = 9
column_index_2 = 10
# Calculate agreement for the first task
print("Bias Task Agreement Scores:")
agreement_scores_1 = calculate_agreement(sheets, column_index_1)
if agreement_scores_1:
    for metric, score in agreement_scores_1.items():
        print(f"{metric}: {score}")

Bias Task Agreement Scores:
Cohen's Kappa: 0.32525784789384055
Fleiss' Kappa: 0.32525784789384055
Krippendorff's Alpha: 0.31591064955422743
Scott's Pi: 0.3141961399039874


In [8]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-4']
column_index_1 = 9
column_index_2 = 10
# Calculate agreement for the first task
print("Bias Task Agreement Scores:")
agreement_scores_1 = calculate_agreement(sheets, column_index_1)
if agreement_scores_1:
    for metric, score in agreement_scores_1.items():
        print(f"{metric}: {score}")

Bias Task Agreement Scores:
Cohen's Kappa: 0.41463414634146345
Fleiss' Kappa: 0.41463414634146345
Krippendorff's Alpha: 0.4106448277559863
Scott's Pi: 0.4091677471237958


In [9]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-2', 'IAA-3']
column_index_1 = 9
column_index_2 = 10
# Calculate agreement for the first task
print("Bias Task Agreement Scores:")
agreement_scores_1 = calculate_agreement(sheets, column_index_1)
if agreement_scores_1:
    for metric, score in agreement_scores_1.items():
        print(f"{metric}: {score}")

Bias Task Agreement Scores:
Cohen's Kappa: 0.4455079543204172
Fleiss' Kappa: 0.4455079543204172
Krippendorff's Alpha: 0.44493392070484583
Scott's Pi: 0.44354277764896816


In [10]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-2', 'IAA-4']
column_index_1 = 9
column_index_2 = 10
# Calculate agreement for the first task
print("Bias Task Agreement Scores:")
agreement_scores_1 = calculate_agreement(sheets, column_index_1)
if agreement_scores_1:
    for metric, score in agreement_scores_1.items():
        print(f"{metric}: {score}")

Bias Task Agreement Scores:
Cohen's Kappa: 0.40773167669874794
Fleiss' Kappa: 0.40773167669874794
Krippendorff's Alpha: 0.4003234607027919
Scott's Pi: 0.39882051198274876


In [11]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-3', 'IAA-4']
column_index_1 = 9
column_index_2 = 10
# Calculate agreement for the first task
print("Bias Task Agreement Scores:")
agreement_scores_1 = calculate_agreement(sheets, column_index_1)
if agreement_scores_1:
    for metric, score in agreement_scores_1.items():
        print(f"{metric}: {score}")

Bias Task Agreement Scores:
Cohen's Kappa: 0.35675096172037585
Fleiss' Kappa: 0.35675096172037585
Krippendorff's Alpha: 0.3498514329531295
Scott's Pi: 0.34822198792293685


In [12]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-2', 'IAA-3', 'IAA-4']
print("Propaganda Task Agreement Scores:")
agreement_scores_2 = calculate_agreement(sheets, column_index_2)
if agreement_scores_2:
    for metric, score in agreement_scores_2.items():
        print(f"{metric}: {score}")


Propaganda Task Agreement Scores:
Cohen's Kappa: 0.31565493913276316
Fleiss' Kappa: 0.3144698067029619
Krippendorff's Alpha: 0.3047482739600966
Scott's Pi: 0.3038781216121117


In [13]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-2']
print("Propaganda Task Agreement Scores:")
agreement_scores_2 = calculate_agreement(sheets, column_index_2)
if agreement_scores_2:
    for metric, score in agreement_scores_2.items():
        print(f"{metric}: {score}")

Propaganda Task Agreement Scores:
Cohen's Kappa: 0.4687108648628136
Fleiss' Kappa: 0.4687108648628136
Krippendorff's Alpha: 0.4631633574874584
Scott's Pi: 0.4618179022430661


In [14]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-3']
print("Propaganda Task Agreement Scores:")
agreement_scores_2 = calculate_agreement(sheets, column_index_2)
if agreement_scores_2:
    for metric, score in agreement_scores_2.items():
        print(f"{metric}: {score}")

Propaganda Task Agreement Scores:
Cohen's Kappa: 0.21977597528003093
Fleiss' Kappa: 0.21977597528003093
Krippendorff's Alpha: 0.2011774500475737
Scott's Pi: 0.19917538851887087


In [15]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-1', 'IAA-4']
print("Propaganda Task Agreement Scores:")
agreement_scores_2 = calculate_agreement(sheets, column_index_2)
if agreement_scores_2:
    for metric, score in agreement_scores_2.items():
        print(f"{metric}: {score}")

Propaganda Task Agreement Scores:
Cohen's Kappa: 0.35885625852094766
Fleiss' Kappa: 0.35885625852094766
Krippendorff's Alpha: 0.3481738803868182
Scott's Pi: 0.34654023096422865


In [16]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-2', 'IAA-3']
print("Propaganda Task Agreement Scores:")
agreement_scores_2 = calculate_agreement(sheets, column_index_2)
if agreement_scores_2:
    for metric, score in agreement_scores_2.items():
        print(f"{metric}: {score}")

Propaganda Task Agreement Scores:
Cohen's Kappa: 0.25127623369256946
Fleiss' Kappa: 0.25127623369256946
Krippendorff's Alpha: 0.21732152410390537
Scott's Pi: 0.21535992391368958


In [17]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-2', 'IAA-4']
print("Propaganda Task Agreement Scores:")
agreement_scores_2 = calculate_agreement(sheets, column_index_2)
if agreement_scores_2:
    for metric, score in agreement_scores_2.items():
        print(f"{metric}: {score}")

Propaganda Task Agreement Scores:
Cohen's Kappa: 0.39038506057566164
Fleiss' Kappa: 0.39038506057566164
Krippendorff's Alpha: 0.3879417475728155
Scott's Pi: 0.38640776699029117


In [18]:
# Define the sheet names and the column index for annotations
sheets = ['IAA-3', 'IAA-4']
print("Propaganda Task Agreement Scores:")
agreement_scores_2 = calculate_agreement(sheets, column_index_2)
if agreement_scores_2:
    for metric, score in agreement_scores_2.items():
        print(f"{metric}: {score}")

Propaganda Task Agreement Scores:
Cohen's Kappa: 0.20492524186455582
Fleiss' Kappa: 0.20492524186455582
Krippendorff's Alpha: 0.1475164968140824
Scott's Pi: 0.14537994668078427


## GPT vs human

In [19]:
def fetch_annotations_from_columns(sheet_name, columns):
    worksheet = spreadsheet.worksheet(sheet_name)
    annotators = []
    for column in columns:
        range_string = '{}2:{}201'.format(chr(64 + column), chr(64 + column))  # Constructs A2:A201 if column is 1
        cells = worksheet.range(range_string)
        values = [cell.value for cell in cells]
        if any(value == '' for value in values):
            raise ValueError(f"Empty string found in sheet '{sheet_name}' in column {column}")
        annotators.append(values)
    print(annotators)
    return annotators

def calculate_agreement_from_columns(sheet_name, columns):
    try:
        annotators = fetch_annotations_from_columns(sheet_name, columns)
        taskdata = []
        for annotator_index, ratings in enumerate(annotators):
            taskdata += [[annotator_index, str(i), str(rating)] for i, rating in enumerate(ratings)]
        rating_task = agreement.AnnotationTask(data=taskdata)
        return {
            "Cohen's Kappa": rating_task.kappa(),
            "Fleiss' Kappa": rating_task.multi_kappa(),
            "Krippendorff's Alpha": rating_task.alpha(),
            "Scott's Pi": rating_task.pi()
        }
    except ValueError as e:
        print(e)
        return None

In [23]:
sheet_name = 'Main'  # Example sheet name
columns = [9,13]  # Assuming annotations are in the first four columns

# Calculate agreement for the annotations in the specified columns
print("Agreement Scores:")
agreement_scores = calculate_agreement_from_columns(sheet_name, columns)
if agreement_scores:
    for metric, score in agreement_scores.items():
        print(f"{metric}: {score}")

Agreement Scores:
[['Biased against others', 'Unclear', 'Biased against others', 'Biased against others', 'Unbiased', 'Unclear', 'Unbiased', 'Biased against Israel', 'Unclear', 'Unbiased', 'Unclear', 'Unbiased', 'Biased against others', 'Unclear', 'Biased against others', 'Unclear', 'Unbiased', 'Unclear', 'Biased against Israel', 'Unclear', 'Biased against others', 'Biased against others', 'Biased against others', 'Biased against Israel', 'Biased against Israel', 'Unclear', 'Not Applicable', 'Biased against others', 'Unbiased', 'Unbiased', 'Biased against others', 'Unbiased', 'Biased against others', 'Not Applicable', 'Unclear', 'Unbiased', 'Unbiased', 'Biased against others', 'Biased against Israel', 'Biased against others', 'Biased against Israel', 'Biased against others', 'Unclear', 'Unbiased', 'Unclear', 'Not Applicable', 'Unbiased', 'Biased against others', 'Unbiased', 'Biased against others', 'Biased against others', 'Unclear', 'Unclear', 'Biased against others', 'Biased against 

In [24]:
sheet_name = 'Main'  # Example sheet name
columns = [10,12]  # Assuming annotations are in the first four columns

# Calculate agreement for the annotations in the specified columns
print("Agreement Scores:")
agreement_scores = calculate_agreement_from_columns(sheet_name, columns)
if agreement_scores:
    for metric, score in agreement_scores.items():
        print(f"{metric}: {score}")

Agreement Scores:
[['Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Not Propaganda', 'Propaganda', 'Unclear', 'Propaganda', 'Propaganda', 'Not Propaganda', 'Unclear', 'Not Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Not Propaganda', 'Unclear', 'Unclear', 'Unclear', 'Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Unclear', 'Unclear', 'Propaganda', 'Unclear', 'Not Propaganda', 'Propaganda', 'Not Propaganda', 'Propaganda', 'Unclear', 'Unclear', 'Not Propaganda', 'Not Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Unclear', 'Not Propaganda', 'Propaganda', 'Unclear', 'Not Propaganda', 'Propaganda', 'Not Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Unclear', 'Propaganda', 'Propaganda', 'Not Propaganda', 'Propaganda', 'Propaganda', 'Propaganda', 'Not Propaganda', 'Not Propaganda', 'Not Propaganda', 'Propaganda', 'Unclear', 'Unclear', 'Not Propaganda', 'Unclear', 'Not Propaganda', 'Propaga

64 times unbiased against someone as label introduced --> Filtered out for bias
1 "please provide me with the text to analyse" --> filtered out
788 biased against palestine --> filtered out

In [25]:
sheet_name = 'MainClean'  # Example sheet name
columns = [9,13]  # Assuming annotations are in the first four columns

# Calculate agreement for the annotations in the specified columns
print("Agreement Scores:")
agreement_scores = calculate_agreement_from_columns(sheet_name, columns)
if agreement_scores:
    for metric, score in agreement_scores.items():
        print(f"{metric}: {score}")

Agreement Scores:
[['Unclear', 'Biased against others', 'Unbiased', 'Unclear', 'Unbiased', 'Biased against Israel', 'Unbiased', 'Unbiased', 'Unclear', 'Unclear', 'Unclear', 'Biased against Israel', 'Unclear', 'Biased against Israel', 'Biased against Israel', 'Unclear', 'Not Applicable', 'Biased against others', 'Unbiased', 'Not Applicable', 'Unbiased', 'Not Applicable', 'Unbiased', 'Unclear', 'Biased against others', 'Unbiased', 'Biased against Israel', 'Unbiased', 'Biased against Israel', 'Unclear', 'Biased against others', 'Biased against Israel', 'Biased against Israel', 'Biased against Israel', 'Unbiased', 'Unbiased', 'Unclear', 'Biased against Israel', 'Unclear', 'Biased against Israel', 'Biased against Israel', 'Biased against others', 'Biased against Israel', 'Biased against Israel', 'Unbiased', 'Unbiased', 'Unbiased', 'Biased against others', 'Unbiased', 'Bias for', 'Bias for', 'Unbiased', 'Unbiased', 'Unbiased', 'Biased against others', 'Unclear', 'Biased against Israel', 'Bia