# Single Regularizer Analysis 

This notebook looks at the impacts of using a single non-convex, true definition fairneess regularizer (i.e. for both demographic parity and equalized odds) on on accuracy, precision, recall, demographic parity gap, and equalized odds gap. This is completed for two dasets: COMPAS Recidivism (Section 1) and Adult Income (Section 2).

This code can be used to recreate the graphs in section 5.3.1 of the report. 

## 0.0 Notebook Setup

In [1]:
import pandas as pd
import plotly
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

Uncomment this if opening in Google Drive.

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1.0 COMPAS Recidivism


### 1.1 Load Data

In [3]:
# Load data
dfc = pd.read_csv("/content/drive/MyDrive/Project/WB_Output/wandb_compas.csv")

# Rename columns 
dfc = dfc.rename( columns = {'alpha_0':'alpha_dp', 
                                         'alpha_1':'alpha_eo', 
                                         'train_dpd':'train_demographic_parity',  
                                         'train_edd':'train_equalized_odds',
                                         'val_dpd':'val_demographic_parity_gap',  
                                         'val_edd':'val_equalized_odds_gap', 
                                         'test_dpd':'test_demographic_parity',  
                                         'test_edd':'test_equalized_odds'})


In [4]:
# Get baseline with no regularization for future comparisons 
baseline = dfc[(dfc['alpha_dp'] == 0)& (dfc['alpha_eo'] == 0)]
baseline = baseline[['val_accuracy', 'val_demographic_parity_gap','val_equalized_odds_gap']]
baseline

Unnamed: 0,val_accuracy,val_demographic_parity_gap,val_equalized_odds_gap
99,0.653533,0.129877,0.155741


### 1.2 Regularizing for Demographic Parity 

In [5]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']


# make figure
fig_train = go.Figure()

# trace colours
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}

# set equalized odds alpha == 0 so we are isolating demographic parity regularizer 
subset = dfc[dfc["alpha_eo"] == 0]

# plot each metric
for col in train_cols:  
  fig_train.add_trace(go.Scatter(x=subset["alpha_dp"], y=subset[col], mode = 'lines+markers', name =col[4:], marker=dict(color=trace_colour[col])))

# update the figure and show
fig_train.update_xaxes(title_text=r"$\alpha_{Demographic Parity}$", type="log")
fig_train.update_yaxes(title_text=r"$\text{Metric}$")
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout(
    title=r"$\text{COMPAS Validation } \alpha_{Equalized Odds} = 0$")


fig_train.show()

### 1.3 Regularizing for Equalized Odds 

In [6]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']

# make figure
fig_train = go.Figure()

# trace colours
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}


# set demographic parity alpha == 0 so we are isolating equalized odds regularizer 
subset = dfc[dfc["alpha_dp"] == 0]

# plot each metric
for col in train_cols:  
  fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name =col[4:], marker=dict(color=trace_colour[col])))

# update the figrue and show
fig_train.update_xaxes(title_text=r"$\alpha_{Equalized Odds}$", type="log")
fig_train.update_yaxes(title_text=r"$\text{Metric}$")
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout(
    title=r"$\text{COMPAS Validation } \alpha_{Demographic Parity} = 0$")


fig_train.show()

 ### 1.4 Combination View
 
 Validation DP, EO, Accuracy, Precision, and Recall Graphs 

In [7]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']
alpha = [0,1,10,100]

# subplots
fig_train = make_subplots(rows=2, cols=2, subplot_titles=("validation alpha_dp = 0", "validation alpha_dp = 1", "validation alpha_dp = 10", "validation alpha_dp = 100",  "validation alpha_dp = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[2,1], 100:[2,2]}

# trace colours 
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}

# to determine which subplot
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = dfc[dfc["alpha_dp"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols:  
    # add trace
    fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name = col[3:], marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # update axes
  fig_train.update_xaxes(title_text="log alpha_eo", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show plots
fig_train['layout'].update(height=800, width=1000) 
fig_train.show()

### 1.5 DP Tradoeff 

In [8]:
alpha = [200, 150, 100,  50,  40,  30,  20,  10,   1,   0]
fig_train = go.Figure()

# single regularizer 
subset = dfc[dfc["alpha_eo"] ==0]

# store X and Y values for each alpha to plot
Y, X = [], []
for a in alpha:
  new_subset = subset[subset["alpha_dp"] == a]

  Y.append((new_subset["val_demographic_parity_gap"].values[0]-baseline["val_demographic_parity_gap"].values[0])/baseline["val_demographic_parity_gap"].values[0])
  X.append((new_subset["val_accuracy"].values[0]-baseline["val_accuracy"].values[0])/baseline["val_accuracy"].values[0])


# plot results
fig_train.add_trace(go.Scatter(y=Y, x=X, mode = 'markers+text', text=alpha, marker=dict(
        
        cmax=np.log(200),
        cmin=1,
        color=np.log(alpha),
       
        colorbar=dict(
            title=r'$\alpha_$', 
            ticktext = alpha, 
            tickvals = np.log(alpha)
        ),
        colorscale="Bluered"
    ),))   #trace_colour[a] ,

# update figure and plot
fig_train.update_xaxes(title_text= "Accuracy")
fig_train.update_yaxes(title_text= "Demographic Parity")
fig_train.update_traces(textposition='bottom center') #text=alpha,
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout( 
    legend_title_text=r'$\alpha_ \text{values}$',
    title=r"$\text{COMPAS Validation } \alpha_{Equalized Odds} = 0$")

fig_train.show()


divide by zero encountered in log


divide by zero encountered in log



### 1.6 EO Tradeoff

In [9]:
alpha = [200, 150, 100,  50,  40,  30,  20,  10,   1,   0]
fig_train = go.Figure() 

# single regularizer 
subset = dfc[dfc["alpha_dp"] ==0]

# store X and Y values for each alpha to plot
Y, X = [], []
for a in alpha:
  new_subset = subset[subset["alpha_eo"] == a]

  Y.append((new_subset["val_equalized_odds_gap"].values[0]-baseline["val_equalized_odds_gap"].values[0])/baseline["val_equalized_odds_gap"].values[0])
  X.append((new_subset["val_accuracy"].values[0]-baseline["val_accuracy"].values[0])/baseline["val_accuracy"].values[0])

# plot

fig_train.add_trace(go.Scatter(y=Y, x=X, mode = 'markers+text',  text=alpha, marker=dict(
        
        cmax=np.log(200),
        cmin=1,
        color=np.log(alpha),
       
        colorbar=dict(
            title=r'$\alpha_$', 
            ticktext = alpha, 
            tickvals = np.log(alpha)
        ),
        colorscale="Bluered"
    ),))   #trace_colour[a] ,

# update figure and plot
fig_train.update_xaxes(title_text= "Accuracy")
fig_train.update_yaxes(title_text= "Equalized Odds")
fig_train.update_traces(textposition='bottom center') 
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout( 
    legend_title_text=r'$\alpha_ \text{values}$',
    title=r"$\text{COMPAS Validation } \alpha_{Demographic Parity} = 0$")

  
fig_train.show()


divide by zero encountered in log


divide by zero encountered in log



## 2.0 Adult Income (Balanced)


### 2.1 Load Data

In [10]:
# Load data
dfa = pd.read_csv("/content/drive/MyDrive/Project/WB_Output/wandb_adult.csv")

# Rename columns 
dfa = dfa.rename( columns = {'alpha_0':'alpha_dp', 
                                         'alpha_1':'alpha_eo', 
                                         'train_dpd':'train_demographic_parity',  
                                         'train_edd':'train_equalized_odds',
                                         'val_dpd':'val_demographic_parity_gap',  
                                         'val_edd':'val_equalized_odds_gap', 
                                         'test_dpd':'test_demographic_parity',  
                                         'test_edd':'test_equalized_odds'})


In [11]:
# Get baseline with no regularization for future comparisons 
baseline = dfa[(dfa['alpha_dp'] == 0)& (dfa['alpha_eo'] == 0)]
baseline = baseline[['val_accuracy', 'val_demographic_parity_gap','val_equalized_odds_gap']]
baseline

Unnamed: 0,val_accuracy,val_demographic_parity_gap,val_equalized_odds_gap
99,0.590369,0.007384,0.105257


### 2.2 Regularizing for Demographic Parity 

In [12]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']


# make figure
fig_train = go.Figure()

# trace colours
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}

# set equalized odds alpha == 0 so we are isolating demographic parity regularizer 
subset = dfa[dfa["alpha_eo"] == 0]

# plot each metric
for col in train_cols:  
  fig_train.add_trace(go.Scatter(x=subset["alpha_dp"], y=subset[col], mode = 'lines+markers', name =col[4:], marker=dict(color=trace_colour[col])))

# update the figure and show
fig_train.update_xaxes(title_text=r"$\alpha_{Demographic Parity}$", type="log")
fig_train.update_yaxes(title_text=r"$\text{Metric}$")
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout(
    title=r"$\text{ADULT Validation } \alpha_{Equalized Odds} = 0$")


fig_train.show()

### 2.3 Regularizing for Equalized Odds 

In [13]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']

# make figure
fig_train = go.Figure()

# trace colours
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}


# set demographic parity alpha == 0 so we are isolating equalized odds regularizer 
subset = dfa[dfa["alpha_dp"] == 0]

# plot each metric
for col in train_cols:  
  fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name =col[4:], marker=dict(color=trace_colour[col])))

# update the figrue and show
fig_train.update_xaxes(title_text=r"$\alpha_{Equalized Odds}$", type="log")
fig_train.update_yaxes(title_text=r"$\text{Metric}$")
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout(
    title=r"$\text{ADULT Validation } \alpha_{Demographic Parity} = 0$")


fig_train.show()

 ### 2.4 Combination View
 
 Validation DP, EO, Accuracy, Precision, and Recall Graphs 

In [14]:
# columns and alphas to plot
train_cols = [ 'val_demographic_parity_gap',  'val_equalized_odds_gap', 'val_accuracy', 'val_precision', 'val_recall']
alpha = [0,1,10,100]

# subplots
fig_train = make_subplots(rows=2, cols=2, subplot_titles=("validation alpha_dp = 0", "validation alpha_dp = 1", "validation alpha_dp = 10", "validation alpha_dp = 100",  "validation alpha_dp = 1000")) #, "blank"))
plots = {0:[1,1], 1:[1,2], 10:[2,1], 100:[2,2]}

# trace colours 
trace_colour = {'val_demographic_parity_gap' :'#636EFA',
 'val_equalized_odds_gap': '#EF553B',
 'val_accuracy': '#00CC96',
  'val_precision' :'#AB63FA',
 'val_recall':'#FFA15A',}

# to determine which subplot
row, cols = 0, 0

# loop through each alpha
for a in alpha:
  # subset the dataset for that alpha and location on subplot
  subset = dfa[dfa["alpha_dp"] == a]
  spot = plots[a] 

  # for each metric we want to plot
  for col in train_cols:  
    # add trace
    fig_train.add_trace(go.Scatter(x=subset["alpha_eo"], y=subset[col], mode = 'lines+markers', name = col[3:], marker=dict(color=trace_colour[col]
                      
                    )),  row=spot[0], col=spot[1])
  # update axes
  fig_train.update_xaxes(title_text="log alpha_eo", type="log",  row=spot[0], col=spot[1])
  fig_train.update_yaxes(title_text="Metric", row=spot[0], col=spot[1])

# show plots
fig_train['layout'].update(height=800, width=1000) 
fig_train.show()

### 2.5 DP Tradoeff 

In [15]:
alpha = [200, 150, 100,  50,  40,  30,  20,  10,   1,   0]
fig_train = go.Figure()

# single regularizer 
subset = dfa[dfa["alpha_eo"] ==0]

# store X and Y values for each alpha to plot
Y, X = [], []
for a in alpha:
  new_subset = subset[subset["alpha_dp"] == a]

  Y.append((new_subset["val_demographic_parity_gap"].values[0]-baseline["val_demographic_parity_gap"].values[0])/baseline["val_demographic_parity_gap"].values[0])
  X.append((new_subset["val_accuracy"].values[0]-baseline["val_accuracy"].values[0])/baseline["val_accuracy"].values[0])


# plot results
fig_train.add_trace(go.Scatter(y=Y, x=X, mode = 'markers+text', text=alpha, marker=dict(
        
        cmax=np.log(200),
        cmin=1,
        color=np.log(alpha),
       
        colorbar=dict(
            title=r'$\alpha_$', 
            ticktext = alpha, 
            tickvals = np.log(alpha)
        ),
        colorscale="Bluered"
    ),))   #trace_colour[a] ,

# update figure and plot
fig_train.update_xaxes(title_text= "Accuracy")
fig_train.update_yaxes(title_text= "Demographic Parity")
fig_train.update_traces(textposition='bottom center') #text=alpha,
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout( 
    legend_title_text=r'$\alpha_ \text{values}$',
    title=r"$\text{ADULT Validation } \alpha_{Equalized Odds} = 0$")

fig_train.show()


divide by zero encountered in log


divide by zero encountered in log



### 2.6 EO Tradoeff 

In [16]:
alpha = [200, 150, 100,  50,  40,  30,  20,  10,   1,   0]
fig_train = go.Figure() 

# single regularizer 
subset = dfa[dfa["alpha_dp"] ==0]

# store X and Y values for each alpha to plot
Y, X = [], []
for a in alpha:
  new_subset = subset[subset["alpha_eo"] == a]

  Y.append((new_subset["val_equalized_odds_gap"].values[0]-baseline["val_equalized_odds_gap"].values[0])/baseline["val_equalized_odds_gap"].values[0])
  X.append((new_subset["val_accuracy"].values[0]-baseline["val_accuracy"].values[0])/baseline["val_accuracy"].values[0])

# plot

fig_train.add_trace(go.Scatter(y=Y, x=X, mode = 'markers+text',  text=alpha, marker=dict(
        
        cmax=np.log(200),
        cmin=1,
        color=np.log(alpha),
       
        colorbar=dict(
            title=r'$\alpha_$', 
            ticktext = alpha, 
            tickvals = np.log(alpha)
        ),
        colorscale="Bluered"
    ),))   #trace_colour[a] ,

# update figure and plot
fig_train.update_xaxes(title_text= "Accuracy")
fig_train.update_yaxes(title_text= "Equalized Odds")
fig_train.update_traces(textposition='bottom center') 
fig_train['layout'].update(height=400, width=600)
fig_train.update_layout( 
    legend_title_text=r'$\alpha_ \text{values}$',
    title=r"$\text{ADULT Validation } \alpha_{Demographic Parity} = 0$")

  
fig_train.show()


divide by zero encountered in log


divide by zero encountered in log

