In [1]:
# imports
import pandas as pd
import plotly
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import linear_model, metrics, preprocessing
from datetime import datetime, timedelta
from sklearn.linear_model import LogisticRegression
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1.0 Read Data and Rename Colums

In [6]:
# read in data to plot
#df = pd.read_csv("/content/drive/MyDrive/Project/WB_Output/wandb_compas_good.csv")
df = pd.read_csv("/content/drive/MyDrive/Project/WB_Output/wandb_adult_dp.csv")

# rename columns for plotting
df = df.rename( columns = {'alpha_0':'alpha_dp', 
                                         'alpha_1':'alpha_eo', 
                                         'train_dpd':'train_demographic_parity',  
                                         'train_edd':'train_equalized_odds',
                                         'val_dpd':'val_demographic_parity_gap',  
                                         'val_edd':'val_equalized_odds_gap', 
                                         'test_dpd':'test_demographic_parity',  
                                         'test_edd':'test_equalized_odds'})

## 2.0 Train DP, EO, Accuracy, Precision, and Recall Graphs | Demographic Parity 

In [3]:
# columns and alphas to plot
train_cols = [ 'train_demographic_parity',  'train_equalized_odds', 'train_accuracy', 'train_precision', 'train_recall']
alpha = [0,1,10,100,1000]

# subplots
fig_train = make_subplots(rows=2, cols=3, subplot_titles=("train alpha_eo = 0", "train alpha_eo = 1", "train alpha_eo = 10", "train alpha_eo = 100",  "train alpha_eo = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[1,3], 100:[2,1],  1000:[2,2]}

# trace colours
trace_colour = {'train_demographic_parity' :'#636EFA',
                'train_equalized_odds': '#EF553B',
                'train_accuracy': '#00CC96',
                'train_precision' :'#AB63FA',
                'train_recall':'#FFA15A',}

# to determine which subplot
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = df[df["alpha_eo"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols:
    # add trace  
    fig_train.add_trace(go.Scatter(x=subset["alpha_dp"], y=subset[col], mode = 'lines+markers', name = col[6:], marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # update axes
  fig_train.update_xaxes(title_text="log alpha_dp", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show the figure
fig_train['layout'].update(height=800, width=1000)
fig_train.show()

## 3.0 Train DP, EO, Accuracy, Precision, and Recall Graphs | Equalized Odds

In [None]:
# columns and alphas to plot
train_cols = [ 'train_demographic_parity',  'train_equalized_odds', 'train_accuracy', 'train_precision', 'train_recall']
alpha = [0,1,10,100,1000]

# subplots
fig_train = make_subplots(rows=2, cols=3, subplot_titles=("train alpha_dp = 0", "train alpha_dp = 1", "train alpha_dp = 10", "train alpha_dp = 100",  "train alpha_dp = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[1,3], 100:[2,1],  1000:[2,2]}

# trace colours
trace_colour = {'train_demographic_parity' :'#636EFA',
                'train_equalized_odds': '#EF553B',
                'train_accuracy': '#00CC96',
                'train_precision' :'#AB63FA',
                'train_recall':'#FFA15A',}


# to determine which subplot
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = df[df["alpha_dp"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols: 
    # add trace 
    fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name = col[6:], marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # udpates axes
  fig_train.update_xaxes(title_text="log alpha_eo", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show plots
fig_train['layout'].update(height=800, width=1000)
fig_train.show()

 ## 4.0 Validation DP, EO, Accuracy, Precision, and Recall Graphs | Equalized Odds

In [None]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']
alpha = [0,1,10,100,1000]

# subplots
fig_train = make_subplots(rows=2, cols=3, subplot_titles=("validation alpha_dp = 0", "validation alpha_dp = 1", "validation alpha_dp = 10", "validation alpha_dp = 100",  "validation alpha_dp = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[1,3], 100:[2,1],  1000:[2,2]}

# trace colours 
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}

# to determine which subplot
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = df[df["alpha_dp"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols:  
    # add trace
    fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name = col[3:], marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # update axes
  fig_train.update_xaxes(title_text="log alpha_eo", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show plots
fig_train['layout'].update(height=800, width=1000) 
fig_train.show()

## 5.0  Validation DP, EO, Accuracy, Precision, and Recall Graphs | Demographic Parity

In [None]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity',  'val_equalized_odds', 'val_accuracy', 'val_precision', 'val_recall']
alpha = [0,1,10,100,1000]

# subplots
fig_train = make_subplots(rows=2, cols=3, subplot_titles=("validation alpha_eo = 0", "validation alpha_eo = 1", "validation alpha_eo = 10", "validation alpha_eo = 100",  "validation alpha_eo = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[1,3], 100:[2,1],  1000:[2,2]}

# trace colour
trace_colour = {'val_demographic_parity' :'#636EFA',
 'val_equalized_odds': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}

# to determine which subplot
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = df[df["alpha_eo"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols: 
    # add trace 
    fig_train.add_trace(go.Scatter(x=subset["alpha_dp"], y=subset[col], mode = 'lines+markers', name = col[3:], marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # update axes
  fig_train.update_xaxes(title_text="log alpha_dp", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show plots
fig_train['layout'].update(height=800, width=1000)  
fig_train.show()

KeyError: ignored

 ## 6.0 Test DP, EO, Accuracy, Precision, and Recall Graphs | Demographic Parity

In [None]:
# columns and alphas to plot
train_cols = [ 'test_demographic_parity',  'test_equalized_odds', 'test_accuracy', 'test_precision', 'test_recall']
alpha = [0,1,10,100,1000]

# subplots
fig_train = make_subplots(rows=2, cols=3, subplot_titles=("test alpha_dp = 0", "test alpha_dp = 1", "test alpha_dp = 10", "test alpha_dp = 100",  "test alpha_dp = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[1,3], 100:[2,1],  1000:[2,2]}

# trace colour
trace_colour = {'test_demographic_parity' :'#636EFA',
 'test_equalized_odds': '#EF553B',
 'test_accuracy': '#00CC96',
  'test_precision' :'#AB63FA',
 'test_recall':'#FFA15A',}

# to determine which subplot
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = df[df["alpha_dp"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols:  
    # add trace
    fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name = col[5:], marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # updates axes
  fig_train.update_xaxes(title_text="log alpha_eo", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show plots
fig_train['layout'].update(height=800, width=1000)
fig_train.show()

## 7.0  Test DP, EO, Accuracy, Precision, and Recall Graphs | Equalized Odds

In [None]:
# columns and alphas to plot
train_cols = [ 'test_demographic_parity',  'test_equalized_odds', 'test_accuracy', 'test_precision', 'test_recall']
alpha = [0,1,10,100,1000]

# subplots
fig_train = make_subplots(rows=2, cols=3, subplot_titles=("test alpha_eo = 0", "test alpha_eo = 1", "test alpha_eo = 10", "test alpha_eo = 100",  "test alpha_eo = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[1,3], 100:[2,1],  1000:[2,2]}

# trace colours
trace_colour = {'test_demographic_parity' :'#636EFA',
 'test_equalized_odds': '#EF553B',
 'test_accuracy': '#00CC96',
  'test_precision' :'#AB63FA',
 'test_recall':'#FFA15A',}

# to determine which subplots
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = df[df["alpha_eo"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols:  
    # add trace
    fig_train.add_trace(go.Scatter(x=subset["alpha_dp"], y=subset[col], mode = 'lines+markers', name = col[5:], text = subset[col], textposition="bottom center", marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # updates axes
  fig_train.update_xaxes(title_text="log alpha_dp", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show figure
fig_train['layout'].update(height=800, width=1000)
fig_train.show()

# 8.0 Validation Single Fairness Regularizers | Demographic Parity

In [7]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']


# make figure
fig_train = go.Figure()

# trace colours
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}


# only care for alpha == 0
subset = df[df["alpha_eo"] == 0]

 
# plot each metric
for col in train_cols:  
  fig_train.add_trace(go.Scatter(x=subset["alpha_dp"], y=subset[col], mode = 'lines+markers', name =col[4:], marker=dict(color=trace_colour[col])))

# update the figure and show
fig_train.update_xaxes(title_text=r"$\alpha_{Demographic Parity}$", type="log")
fig_train.update_yaxes(title_text=r"$\text{Metric}$")
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout(
    title=r"$\text{ADULT Validation } \alpha_{Equalized Odds} = 0$")


fig_train.show()

# 9.0 Validation Single Fairness Regularizers | Equalized Odds

In [9]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']

# make figure
fig_train = go.Figure()

# trace colours
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}



# only care about alpha = 0
subset = df[df["alpha_dp"] == 0]

# plot each metric
for col in train_cols:  
  fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name =col[4:], marker=dict(color=trace_colour[col])))

# update the figrue and show
fig_train.update_xaxes(title_text=r"$\alpha_{Equalized Odds}$", type="log")
fig_train.update_yaxes(title_text=r"$\text{Metric}$")
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout(
    title=r"$\text{ADULT Validation } \alpha_{Demographic Parity} = 0$")



fig_train.show()

## 10 Accuracy vs. Fairness Tradeoff for Presentation

In [None]:
fig_train = go.Figure()
fig_train.update_xaxes(title_text="Accuracy")
fig_train.update_yaxes(title_text="Fairness", )
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout(
    title="Accuracy vs. Fairness Tradeoff", yaxis_range=[0,1], xaxis_range=[0,1])