In [25]:
# Import libraries

import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Connect to Google Drive (to download raw data, upload clean data)

!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [44]:
# Download Alice's Hiring Prediction dataset (this is already augmented with predictions!):

csv_files = {
  'file0_TEST': '1RDwpiO5NvUUf6GApzvm0lDsOyL4pAxSd',
}

dfs = {}

for key, value in csv_files.items():
  csv_name = key + '.csv'
  downloaded = drive.CreateFile({'id': value})
  downloaded.GetContentFile(csv_name)
  dfs[key] = pd.read_csv(csv_name, sep=" ", low_memory=False, header=None) # Re-use the original index 
  print("Saved: ", key, "\n")

f0_test = dfs['file0_TEST']


Saved:  file0_TEST 



In [45]:
# Rename columns and print to sanity check

f0_test = f0_test.rename(columns={
    0: 'A', 
    1: 'M',
    2: 'D',
    3: 'Q',
    4: 'Y',
    5: 'M0',
    6: 'D0',
    7: 'D1',
    8: 'Y_hat',
})

f0_test.head()

Unnamed: 0,A,M,D,Q,Y,M0,D0,D1,Y_hat
0,1,4.499319,2.0,5.0,1.0,1.499319,1.0,2.0,1
1,0,4.019354,4.0,4.0,0.0,4.019354,4.0,5.0,1
2,0,7.04825,4.0,6.0,1.0,7.04825,4.0,5.0,1
3,1,6.038554,1.0,3.0,1.0,3.038554,0.0,1.0,1
4,1,7.842121,6.0,9.0,1.0,4.842121,5.0,6.0,1


In [49]:
# Subset at all levels of Q and A to evaluate counterfactual fairness

def stratifed_hiring_rates(df):
  A_Q_strat = df.groupby(['A'])['Y'].agg(['sum','count'])
  A_Q_strat['rate'] = 100.0 * A_Q_strat['sum'] / A_Q_strat['count']
  print("Hiring rate \n", A_Q_strat['rate'])
  return


In [50]:
print("Hiring rate by gender and qualifications: \n\nFemale: A=1; Male: A=2 \nUnqualified: Q=1: Qualified: Q=2:")
stratifed_hiring_rates(f0_test)

Hiring rate by gender and qualifications: 

Female: A=1; Male: A=2 
Unqualified: Q=1: Qualified: Q=2:


FACTUAL
Hiring rate 
 A
0    35.096154
1    71.404110
Name: rate, dtype: float64
