### Cohen's kappa for sport and art users' posts between Yuval and Valerie

In [47]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score

# Load the files
file = 'posts20_tagged_with_GPT4.xlsx' 

# Read the files
df1 = pd.read_excel(file, sheet_name="sport and art users posts")

# Extract the relevant columns
Yuval_grade_1 = df1["Yuval's grade1"] 
Yuval_grade_2 = df1["Yuval's grade2"] 
Val_grade_1 = df1["Val's grade1"] 
Val_grade_2 = df1["Val's grade2"] 

# Ensure the columns have the same length
if len(Yuval_grade_1) != len(Yuval_grade_2) or len(Val_grade_1) != len(Val_grade_2):
    raise ValueError("Columns do not have the same number of entries.")

# Calculate Cohen's kappa
kappa_sport_and_art_grade_1 = cohen_kappa_score(Yuval_grade_1, Val_grade_1)
kappa_sport_and_art_grade_2 = cohen_kappa_score(Yuval_grade_2, Val_grade_2)

# Print the results

print(f"Kappa for Grade 1 (Only Art):{kappa_sport_and_art_grade_1:.4f}")
print(f"Kappa for Grade 2 (Only Art):{ kappa_sport_and_art_grade_2:.4f}")


Kappa for Grade 1 (Only Art):0.8462
Kappa for Grade 2 (Only Art):1.0000


### Cohen's kappa for only art users posts between Yuval and Valerie

In [46]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score

# Load the files
file = 'posts20_tagged_with_GPT4.xlsx' 

# Read the files
df1 = pd.read_excel(file, sheet_name="only art users posts")


# Extract the relevant columns
Yuval_grade_1 = df1["Yuval's grade1"] 
Yuval_grade_2 = df1["Yuval's grade2"] 
Val_grade_1 = df1["Val's grade1"] 
Val_grade_2 = df1["Val's grade2"] 

# Ensure the columns have the same length
if len(Yuval_grade_1) != len(Yuval_grade_2) or len(Val_grade_1) != len(Val_grade_2):
    raise ValueError("Columns do not have the same number of entries.")

# Calculate Cohen's kappa
kappa_sport_and_art_grade_1 = cohen_kappa_score(Yuval_grade_1, Val_grade_1)
kappa_sport_and_art_grade_2 = cohen_kappa_score(Yuval_grade_2, Val_grade_2)

# Print the results
print(f"Kappa for Grade 1 (Only Art):{kappa_sport_and_art_grade_1:.4f}")
print(f"Kappa for Grade 2 (Only Art):{ kappa_sport_and_art_grade_2:.4f}")


Kappa for Grade 1 (Only Art):0.8387
Kappa for Grade 2 (Only Art):0.8305


### fleiss kappa between Yuval and valerie and GPT

In [None]:
import pandas as pd
import numpy as np
from statsmodels.stats.inter_rater import fleiss_kappa

# File path
file = 'posts20_tagged_with_GPT4.xlsx'

# Function to calculate Fleiss' Kappa for a specific grade
def calculate_fleiss_kappa(df, grade_columns):
    # Extract the grade data
    grade_data = df[grade_columns].apply(lambda col: col.str.lower())
    
    # Identify all unique categories across columns
    categories = sorted(set(grade_data.stack().unique()))
    
    # Create a matrix of counts for each row
    counts = []
    for _, row in grade_data.iterrows():
        row_counts = [list(row).count(category) for category in categories]
        counts.append(row_counts)
    
    # Convert counts to a NumPy array
    counts_matrix = np.array(counts)
    
    # Calculate Fleiss' Kappa
    kappa = fleiss_kappa(counts_matrix, method='fleiss')
    
    return categories, counts_matrix, kappa

# Read all sheets from the Excel file
sheets = pd.read_excel(file, sheet_name=None)

# Iterate over each sheet and calculate Fleiss' Kappa for grade1 and grade2
results = []
for sheet_name, df in sheets.items():
    print(f"Processing sheet: {sheet_name}")
    try:
        # Calculate for Grade1
        categories1, counts_matrix1, kappa1 = calculate_fleiss_kappa(
            df, ["GPT Grade1", "Val's grade1", "Yuval's grade1"]
        )
        
        # Calculate for Grade2
        categories2, counts_matrix2, kappa2 = calculate_fleiss_kappa(
            df, ["GPT Grade2", "Val's grade2", "Yuval's grade2"]
        )
        
        # Print results for the sheet
        print(f"Grade1 fleiss Kappa for {sheet_name}: {kappa1:.4f}")
        print(f"Grade2 fleiss Kappa for {sheet_name}: {kappa2:.4f}")
        print(f"Average fleiss Kappa for {sheet_name}: {(kappa1 + kappa2) / 2:.4f}\n")
        
    except Exception as e:
        print(f"Error processing sheet {sheet_name}: {e}")

# Convert results to a DataFrame and display


Processing sheet: sport and art users posts
Grade1 fleiss Kappa for sport and art users posts: 0.6441
Grade2 fleiss Kappa for sport and art users posts: 0.1304
Average fleiss Kappa for sport and art users posts: 0.3873

Processing sheet: only art users posts
Grade1 fleiss Kappa for only art users posts: 0.5636
Grade2 fleiss Kappa for only art users posts: 0.1787
Average fleiss Kappa for only art users posts: 0.3712



### cohen kappa between all 100 ads

In [33]:
from sklearn.metrics import cohen_kappa_score

mapping = {
    'rather productive': 'Category A',  
    'rather not hard to create': 'Category A',
    'rather not productive': 'Category B', 
    'rather hard to create': 'Category B', 
    'rather neutral': 'Category C',
    'rather neutral to creation': 'Category C',
    'author asks for a relative or close friend only': 'Category D',
}

# file = 'posts100_tagged_with_GPT4.xlsx'

file = 'posts20_tagged_with_GPT4.xlsx'

# Read all sheets from the Excel file
sheets = pd.read_excel(file, sheet_name=None)

# Iterate over each sheet and calculate Fleiss' Kappa for grade1 and grade2
results = []
for sheet_name, df in sheets.items():
    # Apply mappings to the DataFrame
    df['GPT Grade1'] = df['GPT Grade1'].str.lower().map(mapping)
    df['GPT Grade2'] = df['GPT Grade2'].str.lower().map(mapping)

    df["Val's grade1"] = df["Val's grade1"].str.lower().map(mapping)
    df["Val's grade2"] = df["Val's grade2"].str.lower().map(mapping)

    df["Yuval's grade1"] = df["Yuval's grade1"].str.lower().map(mapping)
    df["Yuval's grade2"] = df["Yuval's grade2"].str.lower().map(mapping)

    print(f"Processing sheet: {sheet_name}")
    try:
        kappa = cohen_kappa_score(df["Yuval's grade1"], df["Yuval's grade2"])

        # Print results for the sheet
        print(f"Kappa for {sheet_name}: {kappa:.4f}")

    except Exception as e:
        print(f"Error processing sheet {sheet_name}: {e}")

# Convert results to a DataFrame and display


Processing sheet: sport and art users posts
Kappa for sport and art users posts: 0.6610
Processing sheet: only art users posts
Kappa for only art users posts: 0.8246
