In [1]:

from aif360.datasets import MEPSDataset19,AdultDataset
from methods.mitigate_disparity import MultiLevelReweighing , BiasRemoverModel
from aif360.algorithms.preprocessing import Reweighing
# Fairness metrics
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from collections import defaultdict



import numpy as np


from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from metrics.eval_metrics import print_metrics_binary


pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
2023-05-05 10:17:02.439103: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-05 10:17:02.583914: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-05 10:17:03.385324: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object

In [2]:
# 评价数据集
def test(dataset, model, thresh_arr,unprivileged_groups,privileged_groups):
    try:
        # sklearn classifier
        # 得到一个n行2列的概率矩阵，每行两个概率和为1
        y_val_pred_prob = model.predict_proba(dataset.features)
        # 得到favorable_label对应位置
        pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]
    except AttributeError:
        # aif360 inprocessing algorithm
        y_val_pred_prob = model.predict(dataset).scores
        pos_ind = 0
    #创建一个dict，其中每个value是一个list
    metric_arrs = defaultdict(list)
    # 这里的thresh_arr为等间隔区间
    for thresh in thresh_arr:
        # 阈值筛选，对应概率大于阈值为1，小于为0
        y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_val_pred
        # 生成计算指标
        metric = ClassificationMetric(
                dataset, dataset_pred,
                unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
        acc_metrics_binary = print_metrics_binary(dataset.labels.ravel(),y_val_pred)
        # 添加指标属性
        metric_arrs['acc'].append(acc_metrics_binary['acc'])
        metric_arrs['auroc'].append(acc_metrics_binary['auroc'])
        metric_arrs['auprc'].append(acc_metrics_binary['auprc'])
        metric_arrs['bal_acc'].append((metric.true_positive_rate()
                                     + metric.true_negative_rate()) / 2)
        metric_arrs['avg_odds_diff'].append(metric.average_odds_difference())
        metric_arrs['disp_imp'].append(metric.disparate_impact())
        metric_arrs['stat_par_diff'].append(metric.statistical_parity_difference())
        metric_arrs['eq_opp_diff'].append(metric.equal_opportunity_difference())
        metric_arrs['theil_ind'].append(metric.theil_index())
    
    return metric_arrs
def describe_metrics(metrics, thresh_arr):
    best_ind = np.argmax(metrics['bal_acc'])
    print("Threshold corresponding to Best balanced accuracy: {:6.4f}".format(thresh_arr[best_ind]))
    print("Best balanced accuracy: {:6.4f}".format(metrics['bal_acc'][best_ind]))
    print("acc value: {:6.4f}".format(metrics['acc'][best_ind]))
    print("auroc value: {:6.4f}".format(metrics['auroc'][best_ind]))
    print("auprc value: {:6.4f}".format(metrics['auprc'][best_ind]))
#     disp_imp_at_best_ind = np.abs(1 - np.array(metrics['disp_imp']))[best_ind]
    disp_imp_at_best_ind = min(metrics['disp_imp'][best_ind], 1/metrics['disp_imp'][best_ind])
    print("Corresponding min(DI, 1/DI) value: {:6.4f}".format(disp_imp_at_best_ind))
    print("Corresponding average odds difference value: {:6.4f}".format(metrics['avg_odds_diff'][best_ind]))
    print("Corresponding statistical parity difference value: {:6.4f}".format(metrics['stat_par_diff'][best_ind]))
    print("Corresponding equal opportunity difference value: {:6.4f}".format(metrics['eq_opp_diff'][best_ind]))
    print("Corresponding Theil index value: {:6.4f}".format(metrics['theil_ind'][best_ind]))


In [3]:
multi_privileged_groups = [
    {"feature_name": "race", "privileged_value": 1, "level": 1},
    {"feature_name": "sex", "privileged_value": 1, "level": 2},
]
multi_unprivileged_groups = [
    {"feature_name": "race", "unprivileged_value": 0, "level": 1},
    {"feature_name": "sex", "unprivileged_value": 0, "level": 2},
]
privileged_groups1 = [{"sex": 1}]
unprivileged_groups1 = [{"sex": 0}]
privileged_groups2 = [{"race": 1}]
unprivileged_groups2 = [{"race": 0}]


dataset = AdultDataset()

(dataset_orig_train,
 dataset_orig_val) = dataset.split([0.7], shuffle=True)



In [4]:
ori_sex = BinaryLabelDatasetMetric(
    dataset=dataset, unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)
ori_race = BinaryLabelDatasetMetric(
    dataset=dataset, unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

print(
    "before reweighing ,sex disparate impact and spd is "

    + str(ori_sex.disparate_impact())+" "
    + str(ori_sex.statistical_parity_difference())
)
print(
    "before reweighing ,race disparate impact and spd is "

    + str(ori_race.disparate_impact())+" "
    + str(ori_race.statistical_parity_difference())
)
# 逻辑回归的pipeline
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': dataset_orig_train.instance_weights}
# 原始训练数据逻辑回归
lr_orig = model.fit(dataset_orig_train.features,
                    dataset_orig_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("sex:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_orig,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)
lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])
describe_metrics(val_metrics, thresh_arr)
print("race:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_orig,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)
lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])
describe_metrics(val_metrics, thresh_arr)


before reweighing ,sex disparate impact and spd is 0.3634695423643793 -0.198901432678815
before reweighing ,race disparate impact and spd is 0.6037688467181627 -0.10395937026830099
sex:
Threshold corresponding to Best balanced accuracy: 0.2200
Best balanced accuracy: 0.8217
acc value: 0.7986
auroc value: 0.8217
auprc value: 0.7332
Corresponding min(DI, 1/DI) value: 0.3056
Corresponding average odds difference value: -0.1890
Corresponding statistical parity difference value: -0.3451
Corresponding equal opportunity difference value: -0.1360
Corresponding Theil index value: 0.0788
race:
Threshold corresponding to Best balanced accuracy: 0.2200
Best balanced accuracy: 0.8217
acc value: 0.7986
auroc value: 0.8217
auprc value: 0.7332
Corresponding min(DI, 1/DI) value: 0.6072
Corresponding average odds difference value: -0.0712
Corresponding statistical parity difference value: -0.1607
Corresponding equal opportunity difference value: -0.0413
Corresponding Theil index value: 0.0788


In [5]:
mmrw = MultiLevelReweighing(multi_unprivileged_groups, multi_privileged_groups)
trans_adult_dataset = mmrw.fit(dataset).transform(dataset)
trans_adult_dataset_train = mmrw.fit(dataset_orig_train).transform(dataset_orig_train)
# 给定测量数据集的指标
trans_sex = BinaryLabelDatasetMetric(dataset=trans_adult_dataset,unprivileged_groups=unprivileged_groups1,privileged_groups=privileged_groups1)
trans_race = BinaryLabelDatasetMetric(dataset=trans_adult_dataset,unprivileged_groups=unprivileged_groups2,privileged_groups=privileged_groups2)


print(
    "after reweighing ,sex disparate impact and spd are "

    + str(trans_sex.disparate_impact())+" "
    + str(trans_sex.statistical_parity_difference())
)


print(
    "after reweighing ,race disparate impact and spd are "

    + str(trans_race.disparate_impact())+" "
    + str(trans_race.statistical_parity_difference())
)

# 逻辑回归的pipeline
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': trans_adult_dataset_train.instance_weights}
# 原始训练数据逻辑回归
lr_mmrw = model.fit(trans_adult_dataset_train.features,
                    trans_adult_dataset_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("sex:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_mmrw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)

describe_metrics(val_metrics, thresh_arr)
print("race:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_mmrw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

describe_metrics(val_metrics, thresh_arr)

after reweighing ,sex disparate impact and spd are 0.9999999999999998 -5.551115123125783e-17
after reweighing ,race disparate impact and spd are 0.9999999999999999 -2.7755575615628914e-17
sex:
Threshold corresponding to Best balanced accuracy: 0.1900
Best balanced accuracy: 0.8088
acc value: 0.7816
auroc value: 0.8088
auprc value: 0.7198
Corresponding min(DI, 1/DI) value: 0.5501
Corresponding average odds difference value: -0.0450
Corresponding statistical parity difference value: -0.2109
Corresponding equal opportunity difference value: 0.0204
Corresponding Theil index value: 0.0824
race:
Threshold corresponding to Best balanced accuracy: 0.1900
Best balanced accuracy: 0.8088
acc value: 0.7816
auroc value: 0.8088
auprc value: 0.7198
Corresponding min(DI, 1/DI) value: 0.7806
Corresponding average odds difference value: -0.0019
Corresponding statistical parity difference value: -0.0908
Corresponding equal opportunity difference value: 0.0306
Corresponding Theil index value: 0.0824


In [6]:

rw_sex = Reweighing(unprivileged_groups=unprivileged_groups1,
                    privileged_groups=privileged_groups1)
rw_race = Reweighing(unprivileged_groups=unprivileged_groups2,
                     privileged_groups=privileged_groups2)

trans_sex_dataset = rw_sex.fit(dataset).transform(dataset)
trans_sex_race_dataset = rw_race.fit(
    trans_sex_dataset).transform(trans_sex_dataset)

trans_sex_dataset_train = rw_sex.fit(dataset_orig_train).transform(dataset_orig_train)
trans_sex_race_dataset_train = rw_race.fit(
    trans_sex_dataset_train).transform(trans_sex_dataset_train)

trans_sex_metric = BinaryLabelDatasetMetric(
    dataset=trans_sex_race_dataset, unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)
trans_race_metric = BinaryLabelDatasetMetric(
    dataset=trans_sex_race_dataset, unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

print(
    "after reweighing ,sex disparate impact and spd are "

    + str(trans_sex_metric.disparate_impact())+" "
    + str(trans_sex_metric.statistical_parity_difference())
)


print(
    "after reweighing ,race disparate impact and spd are "

    + str(trans_race_metric.disparate_impact())+" "
    + str(trans_race_metric.statistical_parity_difference())
)
# 逻辑回归的pipeline
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': trans_sex_race_dataset_train.instance_weights}
# 原始训练数据逻辑回归
lr_rw = model.fit(trans_sex_race_dataset_train.features,
                    trans_sex_race_dataset_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("sex:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)

describe_metrics(val_metrics, thresh_arr)
print("race:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

describe_metrics(val_metrics, thresh_arr)



after reweighing ,sex disparate impact and spd are 1.0218649052280424 0.005380859115217379
after reweighing ,race disparate impact and spd are 0.9999999999999996 -1.1102230246251565e-16
sex:
Threshold corresponding to Best balanced accuracy: 0.1900
Best balanced accuracy: 0.8083
acc value: 0.7816
auroc value: 0.8083
auprc value: 0.7192
Corresponding min(DI, 1/DI) value: 0.5589
Corresponding average odds difference value: -0.0402
Corresponding statistical parity difference value: -0.2058
Corresponding equal opportunity difference value: 0.0245
Corresponding Theil index value: 0.0828
race:
Threshold corresponding to Best balanced accuracy: 0.1900
Best balanced accuracy: 0.8083
acc value: 0.7816
auroc value: 0.8083
auprc value: 0.7192
Corresponding min(DI, 1/DI) value: 0.7851
Corresponding average odds difference value: -0.0001
Corresponding statistical parity difference value: -0.0887
Corresponding equal opportunity difference value: 0.0322
Corresponding Theil index value: 0.0828


In [7]:
rw_sex = Reweighing(unprivileged_groups=unprivileged_groups1,
                    privileged_groups=privileged_groups1)
rw_race = Reweighing(unprivileged_groups=unprivileged_groups2,
                     privileged_groups=privileged_groups2)
trans_race_dataset = rw_race.fit(
    dataset).transform(dataset)
trans_race_sex_dataset = rw_sex.fit(trans_race_dataset).transform(trans_race_dataset)


trans_race_dataset_train = rw_race.fit(dataset_orig_train).transform(dataset_orig_train)
trans_race_sex_dataset_train = rw_sex.fit(
    trans_race_dataset_train).transform(trans_race_dataset_train)

trans_sex_metric2 = BinaryLabelDatasetMetric(
    dataset=trans_race_sex_dataset, unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)
trans_race_metric2 = BinaryLabelDatasetMetric(
    dataset=trans_race_sex_dataset, unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

print(
    "after reweighing ,sex disparate impact and spd are "

    + str(trans_sex_metric2.disparate_impact())+" "
    + str(trans_sex_metric2.statistical_parity_difference())
)


print(
    "after reweighing ,race disparate impact and spd are "

    + str(trans_race_metric2.disparate_impact())+" "
    + str(trans_race_metric2.statistical_parity_difference())
)
# 逻辑回归的pipeline
model = make_pipeline(StandardScaler(),
                      LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': trans_race_sex_dataset_train.instance_weights}
# 原始训练数据逻辑回归
lr_rw = model.fit(trans_race_sex_dataset_train.features,
                    trans_race_sex_dataset_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("sex:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)

describe_metrics(val_metrics, thresh_arr)
print("race:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

describe_metrics(val_metrics, thresh_arr)


after reweighing ,sex disparate impact and spd are 1.0000000000000002 5.551115123125783e-17
after reweighing ,race disparate impact and spd are 1.0949907626554443 0.02323425989713221
sex:
Threshold corresponding to Best balanced accuracy: 0.1800
Best balanced accuracy: 0.8079
acc value: 0.7758
auroc value: 0.8079
auprc value: 0.7191
Corresponding min(DI, 1/DI) value: 0.5538
Corresponding average odds difference value: -0.0497
Corresponding statistical parity difference value: -0.2143
Corresponding equal opportunity difference value: 0.0145
Corresponding Theil index value: 0.0811
race:
Threshold corresponding to Best balanced accuracy: 0.1800
Best balanced accuracy: 0.8079
acc value: 0.7758
auroc value: 0.8079
auprc value: 0.7191
Corresponding min(DI, 1/DI) value: 0.8238
Corresponding average odds difference value: 0.0069
Corresponding statistical parity difference value: -0.0744
Corresponding equal opportunity difference value: 0.0281
Corresponding Theil index value: 0.0811
