In [1]:
from aif360.algorithms.preprocessing import Reweighing
# Fairness metrics
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from collections import defaultdict
from methods.mitigate_disparity import MultiLevelReweighing
import numpy as np

from sklearn.pipeline import make_pipeline
from metrics.eval_metrics import print_metrics_binary

from sklearn.linear_model import LogisticRegression

from aif360.datasets import StandardDataset
import pandas as pd
np.random.seed(1)

pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.
2023-05-04 22:36:26.339758: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-04 22:36:26.484198: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-05-04 22:36:27.282441: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object

In [2]:
# 评价数据集
def test(dataset, model, thresh_arr,unprivileged_groups,privileged_groups):
    try:
        # sklearn classifier
        # 得到一个n行2列的概率矩阵，每行两个概率和为1
        y_val_pred_prob = model.predict_proba(dataset.features)
        # 得到favorable_label对应位置
        pos_ind = np.where(model.classes_ == dataset.favorable_label)[0][0]
    except AttributeError:
        # aif360 inprocessing algorithm
        y_val_pred_prob = model.predict(dataset).scores
        pos_ind = 0
    #创建一个dict，其中每个value是一个list
    metric_arrs = defaultdict(list)
    # 这里的thresh_arr为等间隔区间
    for thresh in thresh_arr:
        # 阈值筛选，对应概率大于阈值为1，小于为0
        y_val_pred = (y_val_pred_prob[:, pos_ind] > thresh).astype(np.float64)

        dataset_pred = dataset.copy()
        dataset_pred.labels = y_val_pred
        # 生成计算指标
        metric = ClassificationMetric(
                dataset, dataset_pred,
                unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
        acc_metrics_binary = print_metrics_binary(dataset.labels.ravel(),y_val_pred)
        # 添加指标属性
        metric_arrs['acc'].append(acc_metrics_binary['acc'])
        metric_arrs['auroc'].append(acc_metrics_binary['auroc'])
        metric_arrs['auprc'].append(acc_metrics_binary['auprc'])
        metric_arrs['bal_acc'].append((metric.true_positive_rate()
                                     + metric.true_negative_rate()) / 2)
        metric_arrs['avg_odds_diff'].append(metric.average_odds_difference())
        metric_arrs['disp_imp'].append(metric.disparate_impact())
        metric_arrs['stat_par_diff'].append(metric.statistical_parity_difference())
        metric_arrs['eq_opp_diff'].append(metric.equal_opportunity_difference())
        metric_arrs['theil_ind'].append(metric.theil_index())
    
    return metric_arrs
def describe_metrics(metrics, thresh_arr):
    best_ind = np.argmax(metrics['bal_acc'])
    print("Threshold corresponding to Best balanced accuracy: {:6.4f}".format(thresh_arr[best_ind]))
    print("Best balanced accuracy: {:6.4f}".format(metrics['bal_acc'][best_ind]))
    print("acc value: {:6.4f}".format(metrics['acc'][best_ind]))
    print("auroc value: {:6.4f}".format(metrics['auroc'][best_ind]))
    print("auprc value: {:6.4f}".format(metrics['auprc'][best_ind]))
#     disp_imp_at_best_ind = np.abs(1 - np.array(metrics['disp_imp']))[best_ind]
    disp_imp_at_best_ind = min(metrics['disp_imp'][best_ind], 1/metrics['disp_imp'][best_ind])
    print("Corresponding min(DI, 1/DI) value: {:6.4f}".format(disp_imp_at_best_ind))
    print("Corresponding average odds difference value: {:6.4f}".format(metrics['avg_odds_diff'][best_ind]))
    print("Corresponding statistical parity difference value: {:6.4f}".format(metrics['stat_par_diff'][best_ind]))
    print("Corresponding equal opportunity difference value: {:6.4f}".format(metrics['eq_opp_diff'][best_ind]))
    print("Corresponding Theil index value: {:6.4f}".format(metrics['theil_ind'][best_ind]))


In [3]:
feature_name_level_dic={'age': 2, 'gender': 1}
data_df = pd.read_csv(
    './datasets/tjh.csv')
grouped_data = data_df.groupby('patiend_id')
data_first_visit = grouped_data.first().reset_index()
# data_first_visit=data_df
data_first_visit.drop('patiend_id', axis=1, inplace=True)
protect_feature_list = list(feature_name_level_dic.keys())
privileged_classes = []
protected_attribute_names = []
for protect_feature_index in range(len(protect_feature_list)):
    protect_feature_name = protect_feature_list[protect_feature_index]
    avg = data_first_visit[protect_feature_name].mean()
    data_first_visit[protect_feature_name] = (
        data_first_visit[protect_feature_name] > avg).astype(int)
    privileged_classes.append([1.0])
    protected_attribute_names.append(protect_feature_name)

dataset = StandardDataset(df=data_first_visit, label_name='outcome',
                            favorable_classes=[1.0],
                            protected_attribute_names=protected_attribute_names,
                            privileged_classes=privileged_classes)
(dataset_orig_train,
 dataset_orig_val) = dataset.split([0.7], shuffle=True)
multi_privileged_groups = [
    {"feature_name": "age", "privileged_value": 1, "level": 2},
    {"feature_name": "gender", "privileged_value": 1, "level": 1},
]
multi_unprivileged_groups = [
    {"feature_name": "age", "unprivileged_value": 0, "level": 2},
    {"feature_name": "gender", "unprivileged_value": 0, "level": 1},
]
privileged_groups1 = [{"age": 1}]
unprivileged_groups1 = [{"age": 0}]
privileged_groups2 = [{"gender": 1}]
unprivileged_groups2 = [{"gender": 0}]

In [4]:
ori_age = BinaryLabelDatasetMetric(dataset=dataset,unprivileged_groups=unprivileged_groups1,privileged_groups=privileged_groups1)
ori_gender = BinaryLabelDatasetMetric(dataset=dataset,unprivileged_groups=unprivileged_groups2,privileged_groups=privileged_groups2)
print(
    "before reweighing ,age disparate impact and spd are "
    + str(ori_age.disparate_impact())+" "+str(ori_age.statistical_parity_difference())

)
print(
    "before reweighing ,gender disparate impact are "
    + str(ori_gender.disparate_impact())+" "+str(ori_gender.statistical_parity_difference())
)
# 逻辑回归的pipeline
model = make_pipeline(LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': dataset_orig_train.instance_weights}
# 原始训练数据逻辑回归
lr_orig = model.fit(dataset_orig_train.features,
                    dataset_orig_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("age:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_orig,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)

describe_metrics(val_metrics, thresh_arr)
print("gender:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_orig,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

describe_metrics(val_metrics, thresh_arr)

before reweighing ,age disparate impact and spd are 0.27816769605090774 -0.4823952470293934
before reweighing ,gender disparate impact are 0.545413870246085 -0.2573129036342915
age:
Threshold corresponding to Best balanced accuracy: 0.2900
Best balanced accuracy: 0.8357
acc value: 0.8349
auroc value: 0.8357
auprc value: 0.8696
Corresponding min(DI, 1/DI) value: 0.3147
Corresponding average odds difference value: -0.2585
Corresponding statistical parity difference value: -0.5227
Corresponding equal opportunity difference value: -0.2922
Corresponding Theil index value: 0.0989
gender:
Threshold corresponding to Best balanced accuracy: 0.2900
Best balanced accuracy: 0.8357
acc value: 0.8349
auroc value: 0.8357
auprc value: 0.8696
Corresponding min(DI, 1/DI) value: 0.6732
Corresponding average odds difference value: -0.0699
Corresponding statistical parity difference value: -0.1942
Corresponding equal opportunity difference value: -0.0018
Corresponding Theil index value: 0.0989


In [5]:
mmrw = MultiLevelReweighing(multi_unprivileged_groups, multi_privileged_groups)
trans_dataset = mmrw.fit(dataset).transform(dataset)
trans_dataset_train = mmrw.fit(dataset_orig_train).transform(dataset_orig_train)
# 给定测量数据集的指标
trans_age = BinaryLabelDatasetMetric(dataset=trans_dataset_train,unprivileged_groups=unprivileged_groups1,privileged_groups=privileged_groups1)
trans_gender = BinaryLabelDatasetMetric(dataset=trans_dataset_train,unprivileged_groups=unprivileged_groups2,privileged_groups=privileged_groups2)


print(
    "after reweighing ,age disparate impact and spd are "

    + str(trans_age.disparate_impact())+" "
    + str(trans_age.statistical_parity_difference())
)


print(
    "after reweighing ,gender disparate impact and spd are "

    + str(trans_gender.disparate_impact())+" "
    + str(trans_gender.statistical_parity_difference())
)

# 逻辑回归的pipeline
model = make_pipeline(LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': trans_dataset_train.instance_weights}
# 原始训练数据逻辑回归
lr_mmrw = model.fit(trans_dataset_train.features,
                    trans_dataset_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("sex:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_mmrw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)
lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])
describe_metrics(val_metrics, thresh_arr)
print("race:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_mmrw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)
lr_orig_best_ind = np.argmax(val_metrics['bal_acc'])
describe_metrics(val_metrics, thresh_arr)

after reweighing ,age disparate impact and spd are 1.0 0.0
after reweighing ,gender disparate impact and spd are 0.9999999999999991 -3.885780586188048e-16
sex:
Threshold corresponding to Best balanced accuracy: 0.1500
Best balanced accuracy: 0.8095
acc value: 0.8073
auroc value: 0.8095
auprc value: 0.8500
Corresponding min(DI, 1/DI) value: 0.4104
Corresponding average odds difference value: -0.2191
Corresponding statistical parity difference value: -0.4597
Corresponding equal opportunity difference value: -0.3160
Corresponding Theil index value: 0.0969
race:
Threshold corresponding to Best balanced accuracy: 0.1500
Best balanced accuracy: 0.8095
acc value: 0.8073
auroc value: 0.8095
auprc value: 0.8500
Corresponding min(DI, 1/DI) value: 0.7622
Corresponding average odds difference value: -0.0392
Corresponding statistical parity difference value: -0.1482
Corresponding equal opportunity difference value: -0.0281
Corresponding Theil index value: 0.0969


In [6]:
rw_age = Reweighing(unprivileged_groups=unprivileged_groups1,
                    privileged_groups=privileged_groups1)
rw_gender = Reweighing(unprivileged_groups=unprivileged_groups2,
                     privileged_groups=privileged_groups2)

trans_age_dataset = rw_age.fit(dataset).transform(dataset)
trans_age_gender_dataset = rw_gender.fit(
    trans_age_dataset).transform(trans_age_dataset)

trans_age_dataset_train = rw_age.fit(dataset_orig_train).transform(dataset_orig_train)
trans_age_gender_dataset_train = rw_gender.fit(
    trans_age_dataset_train).transform(trans_age_dataset_train)

trans_age_metric = BinaryLabelDatasetMetric(
    dataset=trans_age_gender_dataset, unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)
trans_gender_metric = BinaryLabelDatasetMetric(
    dataset=trans_age_gender_dataset, unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

print(
    "after reweighing ,age disparate impact and spd are "

    + str(trans_age_metric.disparate_impact())+" "
    + str(trans_age_metric.statistical_parity_difference())
)


print(
    "after reweighing ,gender disparate impact and spd are "

    + str(trans_gender_metric.disparate_impact())+" "
    + str(trans_gender_metric.statistical_parity_difference())
)
# 逻辑回归的pipeline
model = make_pipeline(LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': trans_age_gender_dataset_train.instance_weights}
# 原始训练数据逻辑回归
lr_rw = model.fit(trans_age_gender_dataset_train.features,
                    trans_age_gender_dataset_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("age:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)

describe_metrics(val_metrics, thresh_arr)
print("gender:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

describe_metrics(val_metrics, thresh_arr)

after reweighing ,age disparate impact and spd are 1.010969596827694 0.005020922431002661
after reweighing ,gender disparate impact and spd are 0.9999999999999997 -1.6653345369377348e-16
age:
Threshold corresponding to Best balanced accuracy: 0.1600
Best balanced accuracy: 0.8095
acc value: 0.8073
auroc value: 0.8095
auprc value: 0.8500
Corresponding min(DI, 1/DI) value: 0.4104
Corresponding average odds difference value: -0.2191
Corresponding statistical parity difference value: -0.4597
Corresponding equal opportunity difference value: -0.3160
Corresponding Theil index value: 0.0969
gender:
Threshold corresponding to Best balanced accuracy: 0.1600
Best balanced accuracy: 0.8095
acc value: 0.8073
auroc value: 0.8095
auprc value: 0.8500
Corresponding min(DI, 1/DI) value: 0.7622
Corresponding average odds difference value: -0.0392
Corresponding statistical parity difference value: -0.1482
Corresponding equal opportunity difference value: -0.0281
Corresponding Theil index value: 0.0969


In [7]:
rw_age = Reweighing(unprivileged_groups=unprivileged_groups1,
                    privileged_groups=privileged_groups1)
rw_gender = Reweighing(unprivileged_groups=unprivileged_groups2,
                     privileged_groups=privileged_groups2)
trans_gender_dataset = rw_gender.fit(
    dataset).transform(dataset)
trans_gender_age_dataset = rw_age.fit(trans_gender_dataset).transform(trans_gender_dataset)


trans_gender_dataset_train = rw_gender.fit(dataset_orig_train).transform(dataset_orig_train)
trans_gender_age_dataset_train = rw_age.fit(
    trans_gender_dataset_train).transform(trans_gender_dataset_train)

trans_age_metric2 = BinaryLabelDatasetMetric(
    dataset=trans_gender_age_dataset, unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)
trans_gender_metric2 = BinaryLabelDatasetMetric(
    dataset=trans_gender_age_dataset, unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

print(
    "after reweighing ,age disparate impact and spd are "

    + str(trans_age_metric2.disparate_impact())+" "
    + str(trans_age_metric2.statistical_parity_difference())
)


print(
    "after reweighing ,gender disparate impact and spd are "

    + str(trans_gender_metric2.disparate_impact())+" "
    + str(trans_gender_metric2.statistical_parity_difference())
)
# 逻辑回归的pipeline
model = make_pipeline(LogisticRegression(solver='liblinear', random_state=1))
fit_params = {
    'logisticregression__sample_weight': trans_gender_age_dataset_train.instance_weights}
# 原始训练数据逻辑回归
lr_rw = model.fit(trans_gender_age_dataset_train.features,
                    trans_gender_age_dataset_train.labels.ravel(), **fit_params)
thresh_arr = np.linspace(0.01, 0.5, 50)
# 评价验证数据逻辑回归
print("age:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups1, privileged_groups=privileged_groups1)

describe_metrics(val_metrics, thresh_arr)
print("gender:")
val_metrics = test(dataset=dataset_orig_val,
                   model=lr_rw,
                   thresh_arr=thresh_arr,
                   unprivileged_groups=unprivileged_groups2, privileged_groups=privileged_groups2)

describe_metrics(val_metrics, thresh_arr)


after reweighing ,age disparate impact and spd are 0.9999999999999999 -5.551115123125783e-17
after reweighing ,gender disparate impact and spd are 1.0214939048830591 0.009800601436590783
age:
Threshold corresponding to Best balanced accuracy: 0.1600
Best balanced accuracy: 0.8095
acc value: 0.8073
auroc value: 0.8095
auprc value: 0.8500
Corresponding min(DI, 1/DI) value: 0.4104
Corresponding average odds difference value: -0.2191
Corresponding statistical parity difference value: -0.4597
Corresponding equal opportunity difference value: -0.3160
Corresponding Theil index value: 0.0969
gender:
Threshold corresponding to Best balanced accuracy: 0.1600
Best balanced accuracy: 0.8095
acc value: 0.8073
auroc value: 0.8095
auprc value: 0.8500
Corresponding min(DI, 1/DI) value: 0.7622
Corresponding average odds difference value: -0.0392
Corresponding statistical parity difference value: -0.1482
Corresponding equal opportunity difference value: -0.0281
Corresponding Theil index value: 0.0969
