In [1]:
import numpy as np
from dataLoad import dataset_from_name
from optimizer.linear import Optimal
import itertools
from matplotlib import pyplot as plt
from tqdm import tqdm_notebook
import pickle

Auxillary functions

In [2]:
def learn(X, y):
    N, n = X.shape
    num_feat = n - 1

    def get_counts(X):
        N, n = X.shape
        p = np.zeros(tuple([2] * n))
        for i in range(N):
            p[tuple(X[i, :])] += 1
        return p

    Xy = np.hstack((X, y))
    p = get_counts(X) / N
    py = get_counts(X[Xy[:, -1] == 1]) / N / (p + 10e-9)
    return p, py

def get_fairness(optimizer, alpha3, fairness_def):
    volfs = []
    gfs = []
    lfs = []
    for alpha1 in np.linspace(alpha3, 1, 5):
        for alpha2 in np.linspace(alpha3, alpha1, 5):
            gf, lf, volf = optimizer.minimize_violation([alpha1, alpha2, alpha3],  fairness_def=fairness_def, num_stage=3)
            gfs.append(gf)
            lfs.append(lf)
            volfs.append(volf)
    return np.array(gfs) / np.array(lfs), volfs

## Simulation procedure

Choose dataset_name:
* adult
* compas
* german

and fairness_def $\in$ {dp, eo}

In [3]:
dataset_name = "german"
fairness_def = "eo"

optimizer = Optimal()
alpha3 = 0.3

data = dataset_from_name(dataset_name).astype(int)
features = list(data)[:-1]
label = list(data)[-1]

try:
    polfs = pickle.load(open( "./data/polf-3-fair-{}-{}.pickle".format(dataset_name, fairness_def), "rb" ) )
    volfs = pickle.load(open( "./data/volf-3-fair-{}-{}.pickle".format(dataset_name, fairness_def), "rb" ) )
    keys = pickle.load(open( "./data/key-3-fair-{}-{}.pickle".format(dataset_name, fairness_def), "rb" ) )
except:
    polfs = {}
    volfs = {}
    keys = []
for Xs in features:   
    if Xs not in polfs:
        polfs[Xs] = []
        volfs[Xs] = []
    perm = itertools.permutations(features, 3)
    print (Xs)
    for feature_comb in tqdm_notebook(perm):
        if Xs not in feature_comb:
            key = tuple([Xs] + list(np.sort(feature_comb)))
            if key not in keys:
                p, py = learn(data[[Xs] + list(np.sort(feature_comb))].values, data[[label]].values)
                ps = {1: np.zeros((2,2,2,2,2,2,2)), 2: np.zeros((2,2,2,2,2,2,2)), 3: np.zeros((2,2,2,2,2,2,2)), 4: np.zeros((2,2,2,2))}
                pys = {1: np.zeros((2,2,2,2,2,2,2)), 2: np.zeros((2,2,2,2,2,2,2)), 3: np.zeros((2,2,2,2,2,2,2)), 4: np.zeros((2,2,2,2))}
                for xs, x1, x2, x3, x4 in itertools.product([0, 1], repeat=5): 
                    ps[1][xs, xs, x1, x2, x2, x3, x3] = p[xs,x1,x2,x3]
                    pys[1][xs, xs, x1, x2, x2, x3, x3] = py[xs,x1,x2,x3]

                    ps[2][xs, x1, x1, xs, x2, x3, x3] = p[xs,x1,x2,x3]
                    pys[2][xs, x1, x1, xs, x2, x3, x3] = py[xs,x1,x2,x3]

                    ps[3][xs, x1, x1, x2, x2, x3,xs] = p[xs,x1,x2,x3]
                    pys[3][xs, x1, x1, x2, x2, x3,xs] = py[xs,x1,x2,x3]
                    
                    ps[4][xs, x1, x2, x3] = p[xs,x1,x2,x3]
                    pys[4][xs, x1, x2, x3] = py[xs,x1,x2,x3]
                polf_temp, volf_temp = [], []
                for stage in [1, 2, 3, 4]:
                    optimizer.p = ps[stage]
                    optimizer.py = pys[stage]
                    if stage == 4:
                        optimizer.num_feat = 3
                    else:
                        optimizer.num_feat = 6
                    polf, volf = get_fairness(optimizer, alpha3, fairness_def)
                    polf_temp.append(polf)
                    volf_temp.append(volf)

                polfs[Xs].append(polf_temp)
                volfs[Xs].append(volf_temp)

                keys.append(key)
            pickle.dump(keys, open("./data/key-3-fair-{}-{}.pickle".format(dataset_name, fairness_def), "wb"))
            pickle.dump(polfs, open("./data/polf-3-fair-{}-{}.pickle".format(dataset_name, fairness_def), "wb"))
            pickle.dump(volfs, open("./data/volf-3-fair-{}-{}.pickle".format(dataset_name, fairness_def), "wb"))

job


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


housing


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


sex


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


savings


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


credit_history


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


age


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


