In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, balanced_accuracy_score, recall_score, f1_score, confusion_matrix

In [4]:
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,rev_id,auto_labeled,damaging,goodfaith,rev_link,username,anonymous,first_edit_time,edit_years,label_damaging,confidence_damaging,label_goodfaith,confidence_goodfaith
0,644933637,False,False,True,https://en.wikipedia.org/w/index.php?oldid=644...,WP_1.0_bot,False,2/4/07 22:52,13,False,0.003599,True,0.997142
1,629393521,False,False,True,https://en.wikipedia.org/w/index.php?oldid=629...,Konesama,False,4/9/14 18:10,6,False,0.051282,True,0.981273
2,655365754,False,False,True,https://en.wikipedia.org/w/index.php?oldid=655...,Tachs,False,9/12/05 11:35,15,False,0.008296,True,0.99304
3,616502017,False,False,True,https://en.wikipedia.org/w/index.php?oldid=616...,195.73.134.163,True,9/5/08 13:09,12,True,0.58194,True,0.531591
4,651762922,False,False,True,https://en.wikipedia.org/w/index.php?oldid=651...,Northamerica1000,False,6/8/11 12:22,9,False,0.004405,True,0.997209


In [5]:
def compute_metrics(df, thresh, model="damaging"):
    # model = "damaging" or "goodfaith"
    # return a dictionary with metrics
    y_true = df[model]
    y_pred_scores = df["confidence_"+model]
    y_pred = y_pred_scores > thresh
    
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    acc = accuracy_score(y_true, y_pred)
    bacc = balanced_accuracy_score(y_true, y_pred)
    fpr = fp/(fp+tn)
    fnr = fn/(fn+tp)
    f1 = f1_score(y_true, y_pred)
    
    d = {
        "threshold": round(thresh,2),
        "accuracy":acc,
        "balanced_accuracy":bacc,
        "FPR": fpr,
        "FNR": fnr,
        "f1": f1
        }
    
    return d

In [None]:
def compute_metrics(y_true, y_pred_scores, thresh, model="damaging"):
    # model = "damaging" or "goodfaith"
    # return a dictionary with metrics
    # y_true: a np array of true labels 1/0
    # y_pred_scores: a np array of predcit scores within the range [0,1]
    
    y_pred_scores = df["confidence_"+model]
    y_pred = y_pred_scores > thresh
    
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    acc = accuracy_score(y_true, y_pred)
    bacc = balanced_accuracy_score(y_true, y_pred)
    fpr = fp/(fp+tn)
    fnr = fn/(fn+tp)
    f1 = f1_score(y_true, y_pred)
    
    d = {
        "threshold": round(thresh,2),
        "accuracy":acc,
        "balanced_accuracy":bacc,
        "FPR": fpr,
        "FNR": fnr,
        "f1": f1
        }
    
    return d

In [6]:
d = compute_metrics(df, 0.5, "damaging")
d2 = compute_metrics(df, 0.1, "goodfaith")
print(d)
print(d2)

{'threshold': 0.5, 'accuracy': 0.9688214508418208, 'balanced_accuracy': 0.9519938311185796, 'FPR': 0.029685807150595885, 'FNR': 0.0663265306122449, 'f1': 0.7093023255813954}
{'threshold': 0.1, 'accuracy': 0.9798378715443775, 'balanced_accuracy': 0.6459854014598541, 'FPR': 0.708029197080292, 'FNR': 0.0, 'f1': 0.9897300158814187}


In [7]:
# create data
def create_data(df, interval, model="damaging"):
    columns = ["threshold", "accuracy", "balanced_accuracy", "FPR", "FNR", "f1"]
    df_new = pd.DataFrame(columns=columns)
    
    for i in np.arange(0, 1.01, interval):
        d = compute_metrics(df, i, model)
        df_new = df_new.append(d, ignore_index=True)
        
    df_new.to_csv(model+str(interval)+".csv", index=False)

In [8]:
create_data(df, 0.01, "damaging")

In [9]:
create_data(df, 0.01, "goodfaith")