In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import re
import gc
import os
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.metrics import f1_score, classification_report, confusion_matrix

In [None]:
#Write a method to measure termwise FNRs and FPRs along with the difference with Overall FNR and FPR
wordslists = ['Jew', 'Muslim', 'Christian', 'Asian', 'Black', 'White', 'Female', 'Male', 'Heterosexual', 'Homosexual_gay_or_lesbian']
scoresBeforeROC = {}
scoresAfterROC = {}

In [None]:
def evaluations(df, theta):
    val = '@'+str(theta)
    df.Toxic = df.Toxic.astype('int')
    df[val] = df[val].astype('int')
    accuracy = accuracy_score( df.Toxic, df[val] )
    precision = precision_score( df.Toxic, df[val] )
    recall = recall_score( df.Toxic, df[val] )
    f1 = f1_score( df.Toxic, df[val] )
    cr = classification_report( df.Toxic, df[val] )
    cm = confusion_matrix( df.Toxic, df[val] )

    _tn, _fp, _fn, _tp = cm.ravel()

    return accuracy, precision, recall, f1, cr, cm, _tn, _fp, _fn, _tp

#FNRs, FPRs, Equalized Error Rates
def EERs(df, theta, wordslists, scores, tnoverall, fpoverall, fnoverall, tpoverall):
    val = '@'+str(theta)
    fnrd, fprd = 0, 0
    for i in wordslists:
        temp = df[df[i] == 1][['Comment', 'Toxic', val, 'Maximum']]
        if(len(temp) > 0):
            cm = confusion_matrix( temp.Toxic, temp[val] )
            fp, fn = cm.ravel()[1], cm.ravel()[2]
            scores[i] = {}
            scores[i]['fp'] = fp
            scores[i]['fn'] = fn
        fprd = fprd + (fpoverall - fp)
        fnrd = fnrd + (fnoverall - fn)
    pbr = (fpoverall - fnoverall) / (tnoverall + fpoverall + fnoverall + tpoverall)
    return scores, fnrd, fprd, pbr


def Overall_Discrimination(df, theta):
    val = '@'+str(theta)
    val1 = len(df[(df[val]==0) & (df['Sum']==1)]) / len(df[df['Sum']==1])
    val2 = len(df[(df[val]==0) & (df['Sum']==0)]) / len(df[df['Sum']==0])
    discrimination = val1 - val2
#     print(term, theta, val1, val2, discrimination)
    return discrimination

In [1]:
sr_models = []
for i in os.listdir('SR_Results/'):
    if((i[-len('_biased_synthetic_predictions.csv'):] == '_biased_synthetic_predictions.csv')):
        sr_models.append(i)

sr_models

NameError: name 'os' is not defined

In [None]:
for filename in sr_models:
    df = pd.read_csv(r'SR_Results\\' + filename)
    df['@0.5'] = ( df['@0.5'] >= 0.5 ).astype( 'int32' )
    df.head()

    results = pd.DataFrame(columns=['Model', 'Data', 'FP', 'FN', 'TP', 'TN', 'Accuracy', 'Precision', 'Recall', 'F1', 'FPRD', 'FNRD', 'Discrimination'])
    results['Theta'] = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
    # results['Theta'] = [0.5]

    # Debiased Dataset Results
    accuracy, precision, recall, f1, cr, cm, _tnAfter, _fpAfter, _fnAfter, _tpAfter = evaluations(df, 0.5)
    scoresAfter, fnrdAfter, fprdAfter, pbrAfter = EERs(df, 0.5, wordslists, scoresAfterROC, tnoverall=_tnAfter, fpoverall=_fpAfter, fnoverall=_fnAfter, tpoverall=_tpAfter)
    results['Model'].iloc[0] = 'SR_'+filename
    results['Theta'].iloc[0] = 'N/A'
    results['Data'].iloc[0] = 'Biased'
    results['FP'].iloc[0] = _fpAfter
    results['FN'].iloc[0] = _fnAfter
    results['TP'].iloc[0] = _tpAfter
    results['TN'].iloc[0] = _tnAfter
    results['Accuracy'].iloc[0] = accuracy
    results['Precision'].iloc[0] = precision
    results['Recall'].iloc[0] = recall
    results['F1'].iloc[0] = f1
    results['FPRD'].iloc[0] = fprdAfter
    results['FNRD'].iloc[0] = fnrdAfter
    results['FNRD'].iloc[0] = fnrdAfter
    results['Discrimination'].iloc[0] = Overall_Discrimination(df, 0.5)


    # ROC Applied Biased Dataset Results with Every Theta
    for i in list(results.index)[1:]:
        accuracy, precision, recall, f1, cr, cm, _tnAfter, _fpAfter, _fnAfter, _tpAfter = evaluations(df, results.loc[i, 'Theta'])
        scoresAfter, fnrdAfter, fprdAfter, pbrAfter = EERs(df, results.loc[i, 'Theta'], wordslists, scoresAfterROC, tnoverall=_tnAfter, fpoverall=_fpAfter, fnoverall=_fnAfter, tpoverall=_tpAfter)
        results['Model'].iloc[i] = 'SR_'+filename
        results['Data'].iloc[i] = 'Biased'
        results['FP'].iloc[i] = _fpAfter
        results['FN'].iloc[i] = _fnAfter
        results['TP'].iloc[i] = _tpAfter
        results['TN'].iloc[i] = _tnAfter
        results['Accuracy'].iloc[i] = accuracy
        results['Precision'].iloc[i] = precision
        results['Recall'].iloc[i] = recall
        results['F1'].iloc[i] = f1
        results['FPRD'].iloc[i] = fprdAfter
        results['FNRD'].iloc[i] = fnrdAfter
        results['Discrimination'].iloc[i] = Overall_Discrimination(df, results.loc[i, 'Theta'])

    # results.index = results.Theta
    # results.drop(columns='Theta', inplace=True)


    results.to_csv(r'SR_Results\SR_ROC_Results\\SR_D_' + filename[:-len('_biased_synthetic_predictions.csv')] + '.csv', index=False)