In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score

In [None]:
data = pd.read_csv('/content/drive/My Drive/AnnotationDataset_paper/datasets/multi_selection_onelabel_three.csv')
# data = pd.read_csv('/content/drive/My Drive/AnnotationDataset_paper/datasets/multi_selection_twolabel_three.csv')
# data = pd.read_csv('/content/drive/My Drive/AnnotationDataset_paper/datasets/multi_selection_threelabel_three.csv')
# data = pd.read_csv('/content/drive/My Drive/AnnotationDataset_paper/datasets/multi_selection_unequallabel_three.csv')
data.head()

In [None]:
reshaped_data = {}
for _, row in data.iterrows():
    review_id = row['review_id']
    annotator = row['annotator']
    topics = [(f'main', row[f'main{i+1}_sentiment3']) for i in range(3) if pd.notna(row[f'main{i+1}_sentiment3'])]

    if review_id not in reshaped_data:
        reshaped_data[review_id] = {}
    reshaped_data[review_id][annotator] = topics

In [None]:
def extract_annotations(reshaped_data, annotator):
    annotations = []
    for entry in reshaped_data.values():
        if annotator in entry:
            annotations.append(entry[annotator])
        else:
            annotations.append([])
    return annotations

def prepare_comparison_data(annotations1, annotations2):
    comparison_data = []
    for ann1, ann2 in zip(annotations1, annotations2):
        combined = set(ann1).union(set(ann2))
        for topic in combined:
            sentiment1 = next((s for t, s in ann1 if t == topic[0]), 'none')
            sentiment2 = next((s for t, s in ann2 if t == topic[0]), 'none')
            comparison_data.append((sentiment1, sentiment2))
    return comparison_data

def calculate_kappa(reshaped_data):
    all_annotators = set(annotator for entry in reshaped_data.values() for annotator in entry.keys())
    annotators = list(all_annotators)
    kappas = {}
    for i in range(len(annotators)):
        for j in range(i + 1, len(annotators)):
            annotations1 = extract_annotations(reshaped_data, annotators[i])
            annotations2 = extract_annotations(reshaped_data, annotators[j])
            comparison_data = prepare_comparison_data(annotations1, annotations2)
            if comparison_data:  # Check if there is comparison data to compute kappa
                sentiments1, sentiments2 = zip(*comparison_data)
                kappa = cohen_kappa_score(sentiments1, sentiments2)
                kappas[(annotators[i], annotators[j])] = kappa
    return kappas

kappa_results = calculate_kappa(reshaped_data)

for annotators_pair, kappa in kappa_results.items():
    print(f"Cohen's Kappa between {annotators_pair[0]} and {annotators_pair[1]}: {kappa:.3f}")