In [1]:
import pandas as pd
import numpy as np


def percentage_distro(e):
    return np.around((e*100)/72,2)
    


df = pd.read_csv('../data/crowdsourced/labeling_accuracies.csv', sep=",")
distros = list(set(df['distro'].tolist()))

df['total'] = df['con_distro']+df['neu_distro']+df['pro_distro']+df['unk_distro']


df['con_distro_per'] = df['con_distro'].apply(percentage_distro)
df['neu_distro_per'] = df['neu_distro'].apply(percentage_distro)
df['pro_distro_per'] = df['pro_distro'].apply(percentage_distro)
df['unk_distro_per'] = df['unk_distro'].apply(percentage_distro)

df['total_per'] = df['con_distro_per']+df['neu_distro_per']+df['pro_distro_per']+df['unk_distro_per']

### Total Accuracies

In [2]:
total = []
print("distro|acc_avg")
print("---------------")
for e in distros:
    vals = np.around(np.mean(df.query(f"distro=='{e}'")['acc'].values),2)
    total.append(vals)
    print(f"{e}   |{vals}")
print("-----------")
print(f"       {np.around(np.mean(np.hstack(total)),2)}")
# print(total)

distro|acc_avg
---------------
0:2   |0.55
2:2   |0.76
4:4   |0.62
2:4   |0.44
0:4   |0.76
0:0   |0.84
-----------
       0.66


### Total Precisions Pro/Con

In [3]:
print("dist| pro|con")
print("-------------")
total_pro = []
total_con = []
for e in distros:
    pro_vals = np.around(np.mean(df.query(f"distro=='{e}'")['pro_prec'].values),2)
    con_vals = np.around(np.mean(df.query(f"distro=='{e}'")['con_prec'].values),2)
    total_pro.append(pro_vals)
    total_con.append(con_vals)

    print(f"{e} |{pro_vals} |{con_vals}")
    
    
print(f"     {np.round(np.mean(total_pro),2)}|{np.round(np.mean(total_con),2)}")

dist| pro|con
-------------
0:2 |0.52 |0.6
2:2 |0.8 |0.66
4:4 |0.61 |0.4
2:4 |0.44 |0.35
0:4 |0.8 |0.62
0:0 |0.8 |0.9
     0.66|0.59


### Precisions for Setup 1 and Setup 2

#### Setup 1

In [4]:
# setup 1
total_pro = []
total_con = []
print("dist|pro |con")
print("-------------")
for e in distros:
    pro_vals = np.around(np.mean(df.query(f"distro=='{e}' and setup=='setup_1'")['pro_prec'].values),2)
    con_vals = np.around(np.mean(df.query(f"distro=='{e}' and setup=='setup_1'")['con_prec'].values),2)
    total_pro.append(pro_vals)
    total_con.append(con_vals)    
    print(f"{e}|{pro_vals} |{np.around(np.mean(con_vals),2)}")
    
print("--------------")
print(f"    {np.around(np.nanmean(np.hstack(total_pro)),2)} |{np.around(np.nanmean(np.hstack(total_con)),2)}")


dist|pro |con
-------------
0:2|0.2 |0.28
2:2|0.72 |0.61
4:4|0.57 |0.4
2:4|0.58 |0.25
0:4|0.82 |0.58
0:0|0.71 |0.88
--------------
    0.6 |0.5


#### Setup 2

In [5]:
# setup 2
total_pro = []
total_con = []
print("dist|pro |con")
print("-------------")
for e in distros:
    pro_vals = np.around(np.mean(df.query(f"distro=='{e}' and setup=='setup_2'")['pro_prec'].values),2)
    con_vals = np.around(np.mean(df.query(f"distro=='{e}' and setup=='setup_2'")['con_prec'].values),2)
    total_pro.append(pro_vals)
    total_con.append(con_vals)    
    print(f"{e}|{pro_vals} |{np.around(np.mean(con_vals),2)}")
    
print("--------------")
print(f"    {np.around(np.nanmean(np.hstack(total_pro)),2)} |{np.around(np.nanmean(np.hstack(total_con)),2)}")


dist|pro |con
-------------
0:2|0.85 |0.91
2:2|0.89 |0.72
4:4|0.68 |0.42
2:4|0.37 |0.4
0:4|0.8 |0.66
0:0|0.88 |0.92
--------------
    0.74 |0.67
