In [None]:
!pip install krippendorff

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score
import krippendorff
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv('/content/drive/My Drive/AnnotationDataset_paper/datasets/single_selection.csv')
df

# brennan prediger& Fleiss’s Generalized Kappa Coefficient


In [None]:
def calculate_brennan_prediger(data1, data2):
    observed_agreement = np.mean(data1 == data2)
    n_categories = len(np.unique(np.concatenate([data1, data2])))
    chance_agreement = 1 / n_categories
    bp_coefficient = (observed_agreement - chance_agreement) / (1 - chance_agreement)
    return bp_coefficient

def calculate_fleiss_kappa(df, annotators):
    le = LabelEncoder()

    all_annotations = []
    for annotator in annotators:
        all_annotations.extend(df[annotator][df[annotator] != '0'].values)
    le.fit(all_annotations)

    n_categories = len(le.classes_)

    n_annotators = len(annotators)
    n_items = len(df)

    M = np.zeros((n_items, n_categories))

    for i in range(n_items):
        for annotator in annotators:
            label = df.iloc[i][annotator]
            if label != '0':  # Skip missing values
                category_idx = le.transform([label])[0]
                M[i, category_idx] += 1

    n = np.sum(M, axis=1)  
    mask = n > 1  
    n = n[mask]
    M = M[mask]

    P_i = (np.sum(M * (M - 1), axis=1)) / (n * (n - 1))
    P = np.mean(P_i)

    P_e = np.sum(np.sum(M, axis=0) ** 2) / (np.sum(n) ** 2)

    kappa = (P - P_e) / (1 - P_e)

    return kappa

def prepare_data(df, annotator1_col, annotator2_col):
    data1 = df[annotator1_col].replace('0', np.nan)
    data2 = df[annotator2_col].replace('0', np.nan)
    valid_idx = ~(data1.isna() | data2.isna())
    return data1[valid_idx], data2[valid_idx]

def calculate_agreement_scores(df):
    annotators = ['ft_sentiment_five', 'a1_sentiment_five', 'a2_sentiment_five']

    fleiss_kappa = calculate_fleiss_kappa(df, annotators)
    print(f"\nFleiss' Kappa (overall agreement): {fleiss_kappa:.3f}")

    pairs = []
    for i in range(len(annotators)):
        for j in range(i + 1, len(annotators)):
            pairs.append((annotators[i], annotators[j]))

    results = []
    for ann1, ann2 in pairs:
        data1, data2 = prepare_data(df, ann1, ann2)

        if len(data1) > 0:
            le = LabelEncoder()
            all_labels = pd.concat([data1, data2])
            le.fit(all_labels)

            data1_encoded = le.transform(data1)
            data2_encoded = le.transform(data2)

            kappa = cohen_kappa_score(data1_encoded, data2_encoded)
            bp = calculate_brennan_prediger(data1_encoded, data2_encoded)
            agreement = (data1 == data2).mean() * 100

            results.append({
                'Annotator Pair': f'{ann1} - {ann2}',
                'Cohen\'s Kappa': kappa,
                'Brennan-Prediger': bp,
                'Percentage Agreement': agreement,
                'Valid Annotations': len(data1),
                'Number of Categories': len(le.classes_)
            })

    return pd.DataFrame(results)

In [None]:
results = calculate_agreement_scores(df)
print("\nPairwise Agreement Scores:")
print(results.round(3).to_string(index=False))