In [104]:
import pandas as pd
from crowdkit.metrics.data._classification import alpha_krippendorff

In [126]:
def precision_analysis(fname):
    df = pd.read_csv(fname, sep="\t")
    a_better = (
        (df[df["INPUT:label"] == 0]["OUTPUT:result"] == "a").sum() +
        (df[df["INPUT:label"] == 1]["OUTPUT:result"] == "b").sum()
    )
    
    b_better = (
        (df[df["INPUT:label"] == 0]["OUTPUT:result"] == "b").sum() +
        (df[df["INPUT:label"] == 1]["OUTPUT:result"] == "a").sum()
    )
    
    df["label"] = df["OUTPUT:result"]
    df[(df["INPUT:label"] == 1) & (df["label"] == "a")] = "b"
    df[(df["INPUT:label"] == 1) & (df["label"] == "b")] = "a"
    
    df_agreement = df[["ASSIGNMENT:task_id", "ASSIGNMENT:worker_id", "label"]]
    df_agreement = df_agreement.rename(columns={
        "ASSIGNMENT:task_id": "task",
        "ASSIGNMENT:worker_id": "worker",
    })
    
    alpha_inc_none = alpha_krippendorff(df_agreement)
    
    df_agreement = df_agreement[df_agreement["label"] != "none"]
    
    none = (df["OUTPUT:result"] == "none").sum()
    
    print(f"A was preferred over B in {a_better / (a_better + b_better) * 100:.2f}% cases (exc. none).")
    print(f"A was equal to B in {none / (a_better + b_better + none) * 100:.2f}% cases.")
    print(f"Alpha: {alpha_krippendorff(df_agreement)} for {len(df_agreement) / len(df) * 100:.2f}% nonequal")
    print(f"Alpha: {alpha_inc_none} for all")
    
    return df, (a_better, b_better, none)

## Precision big

In [133]:
result = precision_analysis("results_precision_sd_ldm_big.tsv")

A was preferred over B in 82.93% cases (exc. none).
A was equal to B in 50.05% cases.
Alpha: 0.7449859451256315 for 49.95% nonequal
Alpha: 0.5618428799730844 for all


In [134]:
result = precision_analysis("results_precision_sd_unclip_big.tsv")

A was preferred over B in 50.87% cases (exc. none).
A was equal to B in 67.83% cases.
Alpha: 0.8241141374837873 for 32.17% nonequal
Alpha: 0.5971487302683917 for all


In [135]:
result = precision_analysis("results_precision_sd_75_sd_25_big.tsv")

A was preferred over B in 74.73% cases (exc. none).
A was equal to B in 57.31% cases.
Alpha: 0.809749959778646 for 42.69% nonequal
Alpha: 0.6109506954335422 for all


## Diversity big

In [136]:
result = precision_analysis("results_diversity_sd_ldm_big.tsv")

A was preferred over B in 78.14% cases (exc. none).
A was equal to B in 22.33% cases.
Alpha: 0.5824673773880005 for 77.67% nonequal
Alpha: 0.5184648489425314 for all


In [137]:
result = precision_analysis("results_diversity_sd_unclip_big.tsv")

A was preferred over B in 73.17% cases (exc. none).
A was equal to B in 21.60% cases.
Alpha: 0.6342236674423759 for 78.40% nonequal
Alpha: 0.546370333060505 for all


In [138]:
result = precision_analysis("results_diversity_sd_75_sd_25_big.tsv")

A was preferred over B in 42.53% cases (exc. none).
A was equal to B in 29.65% cases.
Alpha: 0.5787748934450325 for 70.35% nonequal
Alpha: 0.520690406379795 for all
