In [10]:
import numpy as np
from matplotlib import pyplot as plt
import pickle

## Synthetic

In [21]:
alphas = np.linspace(0.4, 0.8, 5)
methods = ['case-control', 'stealth', 'sliced', 'tree']
res = {alpha:{method:[] for method in methods} for alpha in alphas}
significance = 0.05
num_itr = 100

In [22]:
baseline = []
for seed in range(100):
    for alpha in alphas:
        with open('./synthetic/eval/synthetic_alpha%0.2f_%03d.pkl' % (alpha, seed), 'rb') as f:
            r = pickle.load(f)
        for method in methods:
            res[alpha][method].append(r[method])
baseline = np.array(baseline)
for alpha in alphas:
    for i, method in enumerate(methods):
        res[alpha][i] = np.array(res[alpha][method])

base_mean = np.mean(baseline, axis=0)
base_std = np.std(baseline, axis=0)

result_average = np.zeros((5, 5, 7))
result_std = np.zeros((5, 5, 7))

for i in range(5):
    result_average[i, 0, :] = base_mean
    result_std[i, 0, :] = base_std

for i, alpha in enumerate(alphas):
    for j, method in enumerate(methods):
        result_average[i, j+1, :] = np.mean(np.array(res[alpha][method]), axis=0)
        result_std[i, j+1, :] = np.std(np.array(res[alpha][method]), axis=0)


In [None]:
test_names = ['Pr[x]', 'Pr[x|s=1]', 'Pr[x|s=0]']

plt.figure(figsize=(20, 5))
for i in range(4):
    plt.subplot(1, 4, i+1)

    if i == 0:
        shade1 = result_std[:, 1, 2*i]
        shade2 = result_std[:, 2, 2*i]
        shade3 = result_std[:, 3, 2*i]
        shade4 = result_std[:, 4, 2*i]
    else:
        shade1 = np.sqrt(result_average[:, 1, 2*i] * (1 - result_average[:, 1, 2*i]) / num_itr)
        shade2 = np.sqrt(result_average[:, 2, 2*i] * (1 - result_average[:, 2, 2*i]) / num_itr)
        shade3 = np.sqrt(result_average[:, 3, 2*i] * (1 - result_average[:, 3, 2*i]) / num_itr)
        shade4 = np.sqrt(result_average[:, 4, 2*i] * (1 - result_average[:, 4, 2*i]) / num_itr)
        
    plt.fill_between(alphas, result_average[:, 1, 2*i]-shade1, result_average[:, 1, 2*i]+shade1, color='orange', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 2, 2*i]-shade2, result_average[:, 2, 2*i]+shade2, color='g', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 3, 2*i]-shade3, result_average[:, 3, 2*i]+shade3, color='b', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 4, 2*i]-shade4, result_average[:, 4, 2*i]+shade4, color='r', alpha=0.2)
    plt.plot(alphas, result_average[:, 1, 2*i], color='orange')
    plt.plot(alphas, result_average[:, 2, 2*i], color='g')
    plt.plot(alphas, result_average[:, 3, 2*i], color='b')
    plt.plot(alphas, result_average[:, 4, 2*i], color='r')

    if i == 0:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(np.linspace(0, 0.20, 5), fontsize=20)
        plt.plot(alphas, [0.2]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average DP', fontsize=20)
        plt.title('(a) Demographic Parity (DP)', fontsize=24)
    elif i == 1:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.plot(alphas, [significance]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Rejection ratio', fontsize=20)
        plt.title('(b) Test results on %s' % (test_names[i-1],), fontsize=24)
    elif i == 2:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.plot(alphas, [significance]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Rejection ratio', fontsize=20)
        plt.title('(c) Test results on %s' % (test_names[i-1],), fontsize=24)
    elif i == 3:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--',)
        plt.plot(alphas, [significance]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Rejection ratio', fontsize=20)
        plt.title('(d) Test results on %s' % (test_names[i-1],), fontsize=24)
    plt.legend(['Case-Control', 'Stealth', 'Sliced', 'Tree-Sliced'], fontsize=18)
plt.tight_layout()
plt.show()

## COMPAS

In [8]:
alphas = np.linspace(0.4, 0.8, 5)
methods = ['case-control', 'stealth', 'sliced', 'tree']
res = {alpha:{method:[] for method in methods} for alpha in alphas}

In [18]:
baseline = []
for seed in range(100):
    with open('./compas/eval/compas_baseline_%03d.pkl' % (seed,), 'rb') as f:
        r = pickle.load(f)
    baseline.append(r)
    for alpha in alphas:
        with open('./compas/eval/compas_alpha%0.2f_%03d.pkl' % (alpha, seed), 'rb') as f:
            r = pickle.load(f)
        for method in methods:
            res[alpha][method].append(r[method])
baseline = np.array(baseline)
for alpha in alphas:
    for i, method in enumerate(methods):
        res[alpha][i] = np.array(res[alpha][method])

base_mean = np.mean(baseline, axis=0)
base_std = np.std(baseline, axis=0)

result_average = np.zeros((5, 5, 4))
result_std = np.zeros((5, 5, 4))

for i in range(5):
    result_average[i, 0, :] = base_mean
    result_std[i, 0, :] = base_std

for i, alpha in enumerate(alphas):
    for j, method in enumerate(methods):
        result_average[i, j+1, :] = np.mean(np.array(res[alpha][method]), axis=0)
        result_std[i, j+1, :] = np.std(np.array(res[alpha][method]), axis=0)


In [None]:
test_names = ['Pr[x]', 'Pr[x|s=1]', 'Pr[x|s=0]']
plt.figure(figsize=(20, 5))
for i in range(4):
    plt.subplot(1, 4, i+1)

    shade1 = result_std[:, 1, i]
    shade2 = result_std[:, 2, i]
    shade3 = result_std[:, 3, i]
    shade4 = result_std[:, 4, i]

    plt.fill_between(alphas, result_average[:, 1, i]-shade1, result_average[:, 1, i]+shade1, color='orange', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 2, i]-shade2, result_average[:, 2, i]+shade2, color='g', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 3, i]-shade3, result_average[:, 3, i]+shade3, color='b', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 4, i]-shade4, result_average[:, 4, i]+shade4, color='r', alpha=0.2)
    plt.plot(alphas, result_average[:, 1, i], color='orange')
    plt.plot(alphas, result_average[:, 2, i], color='g')
    plt.plot(alphas, result_average[:, 3, i], color='b')
    plt.plot(alphas, result_average[:, 4, i], color='r')
    
    if i == 0:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average DP', fontsize=20)
        plt.title('(a) Demographic Parity (DP)', fontsize=24)
    elif i == 1:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average WD ', fontsize=20)
        plt.title('(b) WD on %s' % (test_names[i-1],), fontsize=24)
    elif i == 2:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average WD ', fontsize=20)
        plt.title('(c) WD on %s' % (test_names[i-1],), fontsize=24)
    else:
        plt.xticks(np.linspace(0.4, 0.8, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average WD ', fontsize=20)
        plt.title('(d) WD on %s' % (test_names[i-1],), fontsize=24)
    plt.legend(['Case-Control', 'Stealth', 'Sliced', 'Tree-Sliced'], fontsize=18)
plt.tight_layout()
plt.show()

## Adult

In [None]:
alphas = np.linspace(0.1, 0.4, 4)
methods = ['case-control', 'stealth', 'sliced', 'tree']
res = {alpha:{method:[] for method in methods} for alpha in alphas}

In [None]:
baseline = []
for seed in range(100):
    with open('./adult/eval/adult_baseline_%03d.pkl' % (seed,), 'rb') as f:
        r = pickle.load(f)
    baseline.append(r)
    for alpha in alphas:
        with open('./adult/eval/adult_alpha%0.2f_%03d.pkl' % (alpha, seed), 'rb') as f:
            r = pickle.load(f)
        for method in methods:
            res[alpha][method].append(r[method])
baseline = np.array(baseline)
for alpha in alphas:
    for i, method in enumerate(methods):
        res[alpha][i] = np.array(res[alpha][method])

base_mean = np.mean(baseline, axis=0)
base_std = np.std(baseline, axis=0)

result_average = np.zeros((5, 5, 5))
result_std = np.zeros((5, 5, 5))

for i in range(5):
    result_average[i, 0, :] = base_mean
    result_std[i, 0, :] = base_std

for i, alpha in enumerate(alphas):
    for j, method in enumerate(methods):
        result_average[i, j+1, :] = np.mean(np.array(res[alpha][method]), axis=0)
        result_std[i, j+1, :] = np.std(np.array(res[alpha][method]), axis=0)


In [None]:

test_names = ['Pr[x]', 'Pr[x|s=1]', 'Pr[x|s=0]']
plt.figure(figsize=(20, 5))
for i in range(1, 5):
    plt.subplot(1, 4, i)

    shade1 = result_std[:, 1, i]
    shade2 = result_std[:, 2, i]
    shade3 = result_std[:, 3, i]
    shade4 = result_std[:, 4, i]

    plt.fill_between(alphas, result_average[:, 1, i]-shade1, result_average[:, 1, i]+shade1, color='orange', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 2, i]-shade2, result_average[:, 2, i]+shade2, color='g', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 3, i]-shade3, result_average[:, 3, i]+shade3, color='b', alpha=0.2)
    plt.fill_between(alphas, result_average[:, 4, i]-shade4, result_average[:, 4, i]+shade4, color='r', alpha=0.2)
    plt.plot(alphas, result_average[:, 1, i], color='orange')
    plt.plot(alphas, result_average[:, 2, i], color='g')
    plt.plot(alphas, result_average[:, 3, i], color='b')
    plt.plot(alphas, result_average[:, 4, i], color='r')

    
    if i == 1:
        plt.xticks(np.linspace(0, 0.4, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average DP', fontsize=20)
        plt.title('(a) Demographic Parity (DP)', fontsize=24)
    elif i == 2:
        plt.xticks(np.linspace(0, 0.4, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average WD ', fontsize=20)
        plt.title('(b) WD on %s' % (test_names[i-2],), fontsize=24)
    elif i == 3:
        plt.xticks(np.linspace(0, 0.4, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average WD ', fontsize=20)
        plt.title('(c) WD on %s' % (test_names[i-2],), fontsize=24)
    else:
        plt.xticks(np.linspace(0, 0.4, 5), fontsize=20)
        plt.yticks(fontsize=20)
        plt.plot(alphas, [result_average[0][0][i]]*alphas.size, 'k--')
        plt.xlabel('alpha', fontsize=20)
        plt.ylabel('Average WD ', fontsize=20)
        plt.title('(d) WD on %s' % (test_names[i-2],), fontsize=24)
    plt.legend(['Case-Control', 'Stealth', 'Sliced', 'Tree-Sliced'], fontsize=18)
plt.tight_layout()
plt.show()