In [1]:
"""
ACS income binary classification.
"""

import sys

sys.path.append('../')

from src.data.datasets import fetch_openml_dataset
from src.models.sklearn_estimators import SklearnClfs, DEFAULT_SKLEARN_CLFS
from src.eval.fairness import eval_binary_clf_fairness
from src.data.utils import num_to_percentile_category

In [2]:
METRICS = {}

In [3]:
def acs_binary_pipeline(sensitive, clfs_dict):
    """Run the pipeline for UCI adult dataset.

    Args:
        sensitive (str): sensitive feature
        clfs_dict (dict): dictionary of classifiers

    Returns:
        dict: dictionary of fairness metrics
    """
    # Fetch dataset
    acs_income = fetch_openml_dataset("ACSincome", "RAC1P")
    X = acs_income["features"]
    y_true = acs_income["labels"]
    y_true_binary = num_to_percentile_category(y_true, 2)
    sensitive_features = acs_income["sensitive"]

    # Fit classifiers
    print("Fitting classifiers...")
    clfs = SklearnClfs(clfs_dict)
    clfs.fit_estimator_all(X, y_true_binary)

    # Predict
    y_pred = clfs.predict_all(X)
    
    # Evaluate fairness
    print("Evaluating fairness...")
    fairness = {}
    for type in y_pred:
        fairness[type] = eval_binary_clf_fairness(y_true_binary, y_pred[type], sensitive_features)

    return fairness

In [4]:
def test_acs_binary_pipeline():
    """Test acs_binary_pipeline function."""
    estimator_dict = DEFAULT_SKLEARN_CLFS
    METRICS["raw"] = acs_binary_pipeline("RAC1P", estimator_dict)
    stats_list = ["accuracy", "selection rate", "true positive rate", "false positive rate"]
    for key, val in METRICS["raw"].items():
        print(f"Fairness metrics for {key}:")
        print(val.overall[stats_list])
        print(val.by_group[stats_list])
        print("\n")


test_acs_binary_pipeline()  # Takes about 15 minutes to run

Fitting classifiers...
Evaluating fairness...
Fairness metrics for Logistic regression:
accuracy               0.752055
selection rate         0.512136
true positive rate     0.765118
false positive rate    0.260916
dtype: object
       accuracy  selection rate  true positive rate  false positive rate
RAC1P                                                                   
1.0    0.755065        0.550396            0.792162             0.285570
2.0    0.729436        0.366226            0.627818             0.209800
3.0    0.727854        0.391604            0.672330             0.242693
4.0    0.694260        0.364238            0.594982             0.261563
5.0    0.743821        0.310361            0.585329             0.182451
6.0    0.746346        0.505617            0.727370             0.230075
7.0    0.722716        0.330252            0.566459             0.173759
8.0    0.751260        0.232295            0.472789             0.128154
9.0    0.763315        0.335023         