# Bias and Fairness Assessment (Binary Classification: Adult Income)

### Step 1: Install and import dependencies


In [None]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

In [None]:
# fetch dataset
adult = fetch_ucirepo(id=2)
adult = adult.data.features.join(adult.data.targets, how="inner")

In [None]:
adult.head(3)

## Basic Preprocessing Steps

### 1. Drop missing values

In [None]:
# Drop missing values
adult.dropna(inplace=True)

### 2. Copy DataFrame for posterity

In [None]:
df = adult.copy()

In [None]:
adult["income"].value_counts()

### 3. Encode categorical variables

In [None]:
def outcome_merge(val):
    if val == "<=50K" or val == "<=50K.":
        return 0
    else:
        return 1

In [None]:
df["income"] = df["income"].apply(outcome_merge)

In [None]:
#  sex, count and percentages above_50k

income_by_sex = df.groupby("sex")["income"].agg(
    ["count", lambda x: (x.sum() / x.count()) * 100]
)
income_by_sex.columns = ["count", "percentage_above_50k"]
income_by_sex

In [None]:
#  race, count and percentages above_50k

income_by_race = df.groupby("race")["income"].agg(
    ["count", lambda x: (x.sum() / x.count()) * 100]
)
income_by_race.columns = ["count", "percentage_above_50k"]
income_by_race

In [None]:
df['race'] = df['race'].replace("Amer-Indian-Eskimo", "Native American or Inuit")

### 4. Split the data

In [None]:
# Split data
X = df.drop("income", axis=1)
y = df["income"]

In [None]:
for col in X.columns:
    if isinstance(X[col], object):
        X[col] = X[col].astype("category")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)

In [None]:
y_train.value_counts()

## Train XGBoost Model

In [None]:
model = XGBClassifier(eval_metric="logloss", random_state=42, enable_categorical=True)
model.fit(X_train, y_train)

## Evaluate XGBoost Model

In [None]:
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)
print(classification_report(y_test, y_pred))

# Bias and Fairness Analysis with EquiBoots

**Equiboots supports a point estimate fairness analysis on a model's operating point (e.g., optimal threshold) as well as on multiple bootstraps with replacement.**


To initialize an analysis with equiboots:

1. Define a fairness Dataframe with the variables of interest.
2. Initialize an equiboots object using:
    - Ground truth (y_true)
    - Model probabilities (y_prob)
    - Model predictions (y_pred)
3. Identify the columns/variables that we will be assessing (e.g., race, sex)

In [None]:
import equiboots as eqb

In [None]:
# get predictions and true values
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]
y_test = y_test.to_numpy()

X_test[['race', 'sex']] = X_test[['race', 'sex']].astype(str)

## Point Estimates

In [None]:
sensitive_features = ['race', 'sex']

fairness_df = X_test[sensitive_features].reset_index(drop=True)

eq = eqb.EquiBoots(y_true=y_test, y_pred=y_pred, y_prob=y_prob, fairness_df=fairness_df, fairness_vars=sensitive_features)

eq.grouper(groupings_vars=sensitive_features)

In [None]:
sliced_race_data = eq.slicer("race")
sliced_sex_data = eq.slicer("sex")

race_metrics = eq.get_metrics(sliced_race_data)
sex_metrics = eq.get_metrics(sliced_sex_data)

In [None]:
eqb.eq_plot_group_curves(
    sliced_race_data,
    curve_type="roc",
    title="ROC AUC by Race Group",
    exclude_groups=['Other', 'Amer-Indian-Eskimo']
)

In [None]:
test_config = {
    "test_type": "chi_square",
    "alpha": 0.05,
    "adjust_method": "bonferroni",
    "confidence_level": 0.95,
    "classification_task": "binary_classification"
}

In [None]:
stat_test_results_race = eq.analyze_statistical_significance(race_metrics, "race", test_config)

In [None]:
stat_test_results_sex = eq.analyze_statistical_significance(sex_metrics, "sex", test_config)

In [None]:
stat_test_results_sex

In [None]:
race_metrics.pop('Other')

In [None]:
eqb.eq_plot_metrics_forest(
    group_metrics=race_metrics,
    metric_name="Precision",
    title="Forest Plot: Precision Across Groups",
    reference_group="White",
    statistical_tests=stat_test_results_race,
    
)

In [None]:
eqb.eq_plot_metrics_forest(
    group_metrics=race_metrics,
    metric_name="Recall",
    title="Forest Plot: Recall Across Groups",
    reference_group="White",
    statistical_tests=stat_test_results_race,
    
)

In [None]:
int_list = np.linspace(0, len(y_test), num=len(y_test), dtype=int).tolist()


In [None]:
eqb_bootstrap = eqb.EquiBoots(
    y_true=y_test,
    y_pred=y_pred,
    y_prob=y_prob,
    fairness_df=fairness_df,
    fairness_vars=['race'],
    seeds=int_list,
    reference_groups=['White'],
    task="binary_classification",
    bootstrap_flag=True,
    num_bootstraps=5001,
    boot_sample_size=len(y_test),
    stratify_by_outcome=True,
    balanced=True,
    )

In [None]:
print(eqb_bootstrap.seeds)

In [None]:
eqb_bootstrap.grouper(groupings_vars=['race'])

In [None]:
bootstrap_race_data = eqb_bootstrap.slicer("race")

In [None]:
boots_race_metrics = eqb_bootstrap.get_metrics(bootstrap_race_data)

In [None]:
diffs = eqb_bootstrap.calculate_differences(boots_race_metrics, "race")

In [None]:
# metrics to perform a statistical test
metrics_boot = [

    "F1_Score_diff",
    "ROC_AUC_diff",
    "Average_Precision_Score_diff",
]

# configuration dictionary to provide parameters around statistical testing
test_config = {
    "test_type": "bootstrap_test",
    "alpha": 0.05,
    "adjust_method": "bonferroni",
    "confidence_level": 0.95,
    "classification_task": "binary_classification",
    "tail_type": "two_tailed",
    "metrics": metrics_boot,
}


stat_test_results = eq.analyze_statistical_significance(
    metric_dict=boots_race_metrics,  # pass variable sliced metrics
    var_name="race",  # variable name
    test_config=test_config,  # configuration
    differences=diffs,  # the differences of each race group
)

In [None]:
eqb.eq_group_metrics_plot(
    group_metrics=diffs,
    metric_cols=metrics_boot,
    name="race",
    categories="all",
    figsize=(14,5),
    plot_type="violinplot",
    color_by_group=True,
    show_grid=True,
    max_cols=3,
    statistical_tests=stat_test_results,
)

In [None]:
group_thresholds_sex = eqb.find_group_thresholds(
    y_true=y_test,
    y_prob=y_prob,
    reference_group='Male',
    group_vec=fairness_df['sex'],
    threshold_range=(0.1, 0.9),
    n_steps=100,
    default_threshold=0.5
)

In [None]:
group_thresholds_race = eqb.find_group_thresholds(
    y_true=y_test,
    y_prob=y_prob,
    reference_group='White',
    group_vec=fairness_df['race'],
    threshold_range=(0.1, 0.9),
    n_steps=100,
    default_threshold=0.5
)

In [None]:
sex_labels = fairness_df["sex"].values
race_labels = fairness_df["race"].values
y_pred_grouped_thresh = eqb.grouped_threshold_predict(y_prob, sex_labels, group_thresholds_sex)
y_pred_grouped_thresh_race = eqb.grouped_threshold_predict(y_prob, race_labels, group_thresholds_race)

In [None]:
eq_adjusted = eqb.EquiBoots(
    y_true=y_test,                        
    y_prob=y_prob,                        
    y_pred=y_pred_grouped_thresh_race,        
    fairness_df=fairness_df,                             
    fairness_vars=["race", "sex"],
    group_min_size=90
)

eq_adjusted.grouper(groupings_vars=["race", "sex"])


In [None]:
# Extract subgroup data by race
sliced_race_data_adjusted = eq_adjusted.slicer("race")
sliced_sex_data_adjusted = eq_adjusted.slicer("sex")

# Compute fairness performance metrics for each racial group using adjusted predictions
sex_metrics_adjusted = eq_adjusted.get_metrics(sliced_sex_data_adjusted)
race_metrics_adjusted = eq_adjusted.get_metrics(sliced_race_data_adjusted)


In [None]:
eqb.eq_plot_metrics_forest(
    group_metrics=race_metrics_adjusted,
    metric_name="Recall",
    title="Forest Plot: Recall Across Groups",
    reference_group="White",
)

In [None]:
eqb.eq_plot_metrics_forest(
    group_metrics=sex_metrics_adjusted,
    metric_name="Recall",
    title="Forest Plot: Recall Across Groups",
    reference_group="Male",
)