# Reproduction study - DECAF: Generating Fair Synthetic Data Using Causally-Aware Generative Networks

This notebook can be used to run the experiments as described in our report. Note that this notebook uses pretrained models.

In [27]:
from pyparsing import col
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import statistics
import pickle

from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.metrics import precision_score, recall_score, roc_auc_score
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier

from util import data, metrics, adult_data, credit_data
from model.DECAF import DECAF

## Experiment 1 - Debiasing Census Data

First, load the adult data, the DAG and the biased edges for different fairness definitions.

In [41]:
dfr, Xy, min_max_scaler = adult_data.load()
X = Xy[:, :-1].astype(float32)
y = np.round(Xy[:, -1]).astype(int)
dm = data.DataModule(Xy)

dag_seed = [
        [0, 6],
        [0, 12],
        [0, 14],
        [0, 1],
        [0, 5],
        [0, 3],
        [1, 14],
        [3, 6],
        [3, 12],
        [3, 14],
        [3, 1],
        [3, 7],
        [5, 6],
        [5, 12],
        [5, 14],
        [5, 1],
        [5, 7],
        [5, 3],
        [6, 14],
        [7, 14],
        [8, 6],
        [8, 14],
        [8, 12],
        [8, 3],
        [8, 5],
        [9, 6],
        [9, 5],
        [9, 14],
        [9, 12],
        [9, 1],
        [9, 3],
        [9, 7],
        [12, 14],
        [13, 5],
        [13, 12],
        [13, 3],
        [13, 1],
        [13, 14],
        [13, 7],
    ]
bias_dict_FTU = {14: [9]}
bias_dict_CF = {14: [7, 9, 5]}
bias_dict_DP = {14: [7, 13, 1, 9, 5, 12, 6]}

NameError: name 'float32' is not defined

In [None]:
DECAF_ND = {
    "precision": [],
    "recall": [],
    "auroc": [],
    "FTU": [],
    "DP": []
}
DECAF_FTU = {
    "precision": [],
    "recall": [],
    "auroc": [],
    "FTU": [],
    "DP": []
}
DECAF_CF = {
    "precision": [],
    "recall": [],
    "auroc": [],
    "FTU": [],
    "DP": []
}
DECAF_DP = {
    "precision": [],
    "recall": [],
    "auroc": [],
    "FTU": [],
    "DP": []
}

def calculate_scores(Xy_synth, fairness_type):
    ## Takes Xy_synthetic data (including label) of the adult data set and fairness type
    ## Calculates the precision, recall, auroc, FTU and DP metrics
    X_synth = Xy_synth[:, :14]
    print("nan in X_synth", np.isnan(X_synth).any())
    y_synth = np.round(Xy_synth[:, 14]).astype(int)
    print("nan in y_synth", np.isnan(y_synth).any())    
    
    synth_clf = MLPClassifier().fit(X_synth, y_synth)
    y_pred_synth = synth_clf.predict(X)
    print("nan in y_pred_synth", np.isnan(y_pred_synth).any())
    y_pred_synth_proba = synth_clf.predict_proba(X)
    print("nan in y_pred_synth_proba", np.isnan(y_pred_synth_proba).any())
    
    if fairness_type == "ND":
        DECAF_ND["precision"].append(precision_score(y, y_pred_synth))
        DECAF_ND["recall"].append(recall_score(y, y_pred_synth))
        DECAF_ND["auroc"].append(roc_auc_score(y, y_pred_synth_proba[:, 1]))
        DECAF_ND["FTU"].append(metrics.ftu(synth_clf, X_synth, 4))
        DECAF_ND["DP"].append(metrics.dp(synth_clf, X_synth, 4))
    elif fairness_type == "FTU":
        DECAF_FTU["precision"].append(precision_score(y, y_pred_synth))
        DECAF_FTU["recall"].append(recall_score(y, y_pred_synth))
        DECAF_FTU["auroc"].append(roc_auc_score(y, y_pred_synth_proba[:, 1]))
        DECAF_FTU["FTU"].append(metrics.ftu(synth_clf, X_synth, 4))
        DECAF_FTU["DP"].append(metrics.dp(synth_clf, X_synth, 4))
    elif fairness_type == "CF":
        DECAF_CF["precision"].append(precision_score(y, y_pred_synth))
        DECAF_CF["recall"].append(recall_score(y, y_pred_synth))
        DECAF_CF["auroc"].append(roc_auc_score(y, y_pred_synth_proba[:, 1]))
        DECAF_CF["FTU"].append(metrics.ftu(synth_clf, X_synth, 4))
        DECAF_CF["DP"].append(metrics.dp(synth_clf, X_synth, 4))    
    elif fairness_type == "DP":
        DECAF_DP["precision"].append(precision_score(y, y_pred_synth))
        print("precision done")
        DECAF_DP["recall"].append(recall_score(y, y_pred_synth))
        print("recall done")
        DECAF_DP["auroc"].append(roc_auc_score(y, y_pred_synth_proba[:, 1]))
        print("auroc done")
        DECAF_DP["FTU"].append(metrics.ftu(synth_clf, X_synth, 4))
        print("ftu done")
        DECAF_DP["DP"].append(metrics.dp(synth_clf, X_synth, 4))
        print("dp done")
    else:
        print("Warning: fairness_type not recognized")
    return

If you would like to generate the synthetic data from the pre-trained models yourself, run the cell below. This will take approximately 1 hour for the 10 models (UPDATE THIS NUMBER). Otherwise, run the second cell.

In [None]:
# Run if you want to generate synthetic data yourself. Otherwise, skip this cell.

# models = ["logs/DECAF_adult/version_0/checkpoints/epoch=49-step=21249.ckpt","logs/DECAF_adult/version_1/checkpoints/epoch=49-step=21249.ckpt" ]
models = ["logs/DECAF_adult/version_2/checkpoints/epoch=49-step=21249.ckpt", "logs/DECAF_adult/version_4/checkpoints/epoch=49-step=21249.ckpt"] # to be added
# only version 1 & 3 has error

data_all_models = [] # delete if file generated date already exists

for trained_model in models:
    model = DECAF(input_dim=dm.dims[0])    
    model = model.load_from_checkpoint(trained_model)
    
    synthetic_data = model.gen_synthetic(dm.dataset.x, gen_order=model.get_gen_order(), biased_edges={}).detach().numpy()
    calculate_scores(synthetic_data, fairness_type="ND")
    
    synthetic_data_FTU = model.gen_synthetic(dm.dataset.x, gen_order=model.get_gen_order(), biased_edges=bias_dict_FTU).detach().numpy()
    calculate_scores(synthetic_data_FTU, fairness_type="FTU")
    
    synthetic_data_CF = model.gen_synthetic(dm.dataset.x, gen_order=model.get_gen_order(), biased_edges=bias_dict_CF).detach().numpy()
    calculate_scores(synthetic_data_CF, fairness_type="CF")
    
    synthetic_data_DP = model.gen_synthetic(dm.dataset.x, gen_order=model.get_gen_order(), biased_edges=bias_dict_DP).detach().numpy()    
    calculate_scores(synthetic_data_DP, fairness_type="DP")
    
#     synthetic_data_to_store = {}        
#     synthetic_data_to_store["synthetic_data"] = synthetic_data
#     synthetic_data_to_store["synthetic_data_FTU"] = synthetic_data_FTU
#     synthetic_data_to_store["synthetic_data_CF"] = synthetic_data_CF
#     synthetic_data_to_store["synthetic_data_DP"] = synthetic_data_DP
    
#     data_all_models.append(synthetic_data_to_store)
    
# with open('synthetic_data.pkl', 'wb') as f:
#     pickle.dump(data_all_models, f)


Initialised adjacency matrix as parsed:
 Parameter containing:
tensor([[0., 1., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1.],
        [0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 1., 0., 1., 0.

In [41]:
# Run if you want to use pre-generated synthetic data

with open('synthetic_data.pkl', 'rb') as f:
    all_syn_data = pickle.load(f)

for synthetic_data_stored in all_syn_data:
    synthetic_data = synthetic_data_stored["synthetic_data"]
    calculate_scores(synthetic_data, fairness_type="ND")
    synthetic_data_FTU = synthetic_data_stored["synthetic_data_FTU"]
    calculate_scores(synthetic_data_FTU, fairness_type="FTU")
    synthetic_data_CF = synthetic_data_stored["synthetic_data_CF"]
    calculate_scores(synthetic_data_CF, fairness_type="CF")
    synthetic_data_DP = synthetic_data_stored["synthetic_data_DP"]
    calculate_scores(synthetic_data_DP, fairness_type="DP")


In [40]:
def calc_metrics(data):
    # Calculate mean and stdev of the data, returns a string
    mean = statistics.mean(data)
    stdev = statistics.stdev(data, mean)
    return "{:.3f}±{:.3f}".format(mean, stdev)
    
d = [ ["Original Data", 0 ,0,0,0,0],
     ["GAN", 0 ,0,0,0,0],
     ["WGAN-GP", 0 ,0,0,0,0],
     ["GAN-PR", 0 ,0,0,0,0],
     ["WGAN-GP-PR", 0 ,0,0,0,0],
     ["DECAF-ND", calc_metrics(DECAF_ND["precision"]), calc_metrics(DECAF_ND["recall"]), calc_metrics(DECAF_ND["auroc"]), calc_metrics(DECAF_ND["FTU"]), calc_metrics(DECAF_ND["DP"])],
     ["DECAF-FTU", calc_metrics(DECAF_FTU["precision"]), calc_metrics(DECAF_FTU["recall"]), calc_metrics(DECAF_FTU["auroc"]), calc_metrics(DECAF_FTU["FTU"]), calc_metrics(DECAF_FTU["DP"])],
     ["DECAF-CF", calc_metrics(DECAF_CF["precision"]), calc_metrics(DECAF_CF["recall"]), calc_metrics(DECAF_CF["auroc"]), calc_metrics(DECAF_CF["FTU"]), calc_metrics(DECAF_CF["DP"])],
     ["DECAF-DP", calc_metrics(DECAF_DP["precision"]), calc_metrics(DECAF_DP["recall"]), calc_metrics(DECAF_DP["auroc"]), calc_metrics(DECAF_DP["FTU"]), calc_metrics(DECAF_DP["DP"])]
]

df = pd.DataFrame(d, columns = ["Model", "Precision", "Recall", "AUROC", "FTU", "DP"])
df

TypeError: don't know how to coerce float32 and float64

## Experiment 2 - Fair Credit Approval

First, load the credit data, the DAG and the biased edges for different fairness definitions of the credit data.

In [5]:
X, y, dfr, Xy, min_max_scaler = credit_data.load(0.5)
dm = data.DataModule(Xy)

dag_seed = [[1, 7], [8, 1], [10, 2], [6, 1], [7, 10], [13, 2], [7, 2], [9, 10], [9, 15], [8, 7], [14, 15], [4, 9], [4, 3], [8, 15], [7, 11], [13, 11], [12, 9], [9, 8]]
bias_dict_FT = {15: [6]}
bias_dict_DP = {15: [6, 3]}

             male         age        debt     married  bankcustomer  \
count  678.000000  678.000000  678.000000  678.000000    678.000000   
mean     1.659292   31.568171    4.777625    2.213864      1.463127   
std      0.510302   11.957862    4.997240    0.476927      0.858168   
min      0.000000   13.750000    0.000000    0.000000      0.000000   
25%      1.000000   22.602500    1.000000    2.000000      1.000000   
50%      2.000000   28.460000    2.750000    2.000000      1.000000   
75%      2.000000   38.230000    7.437500    2.000000      1.000000   
90%      2.000000   48.524000   12.000000    3.000000      3.000000   
95%      2.000000   56.432000   14.075000    3.000000      3.000000   
99%      2.000000   68.785000   21.615000    3.000000      3.000000   
max      2.000000   80.250000   28.000000    3.000000      3.000000   

       educationlevel   ethnicity  yearsemployed  priordefault    employed  \
count      678.000000  678.000000     678.000000    678.000000  678.0