In [1]:
"""
UCI adult dataset with Fairlearn mitigator.
"""

import sys

sys.path.append('../')

from src.data.datasets import fetch_openml_dataset
from src.models.fairlearn_mitigators import ExpGradMitigator
from fairlearn.reductions import DemographicParity
from src.models.sklearn_estimators import SklearnClfs
from sklearn.linear_model import LogisticRegression
from src.eval.fairness import eval_binary_clf_fairness

In [2]:
METRICS = {}

In [3]:
def uci_adult_exp_grad_pipline(sensitive, clfs_dict, constraint=DemographicParity()):
    """Run the pipeline for UCI adult dataset.

    Args:
        sensitive (str): sensitive feature
        clfs_dict (dict): dictionary of classifiers
        mitigator_name (str): name of the mitigator
        seed (int): random seed

    Returns:
        dict: dictionary of fairness metrics
    """
    # Fetch dataset
    uci_adult = fetch_openml_dataset("UCIadult", sensitive)
    X = uci_adult["features"]
    y_true = uci_adult["labels"]
    sensitive_features = uci_adult["sensitive"]

    # Fit classifiers
    print("Fitting classifiers...")
    clfs = SklearnClfs(clfs_dict)
    clfs.fit_estimator_all(X, y_true)
    
    # Fit mitigator
    print("Fitting mitigators...")
    mitigators = ExpGradMitigator(clfs_dict, constraint)
    mitigators.fit_estimator_all(X, y_true, sensitive_features=sensitive_features)

    # Predict
    y_pred = clfs.predict_all(X)
    y_pred_mitigated = mitigators.predict_all(X)
    
    # Evaluate fairness
    print("Evaluating fairness...")
    fairness = {}
    fairness_mitigated = {}
    for type in y_pred:
        fairness[type] = eval_binary_clf_fairness(y_true, y_pred[type], sensitive_features)
        fairness_mitigated[type] = eval_binary_clf_fairness(y_true, y_pred_mitigated[type], sensitive_features)

    return fairness, fairness_mitigated

In [4]:
def test_uci_adult_exp_grad_pipline():
    """Test uci_adult_exp_grad_pipline function."""
    clfs_dict = {
        "Logistic regression": LogisticRegression(max_iter=1000)
    }
    logreg_fair, logreg_expgrad_fair = uci_adult_exp_grad_pipline("sex", clfs_dict)
    METRICS["raw"] = logreg_fair
    METRICS["expgrad"] = logreg_expgrad_fair

    stats_list = ["accuracy", "selection rate", "true positive rate", "false positive rate"]
    print("Logistic Regression - Raw:")
    print(METRICS["raw"]["Logistic regression"].by_group[stats_list])
    print()

    print("Logistic Regression - Exponentiated Gradient:")
    print(METRICS["expgrad"]["Logistic regression"].by_group[stats_list])

test_uci_adult_exp_grad_pipline()

Fitting classifiers...
Fitting mitigators...
Evaluating fairness...
