In [1]:
!pip install fairlearn

Collecting fairlearn
  Downloading fairlearn-0.12.0-py3-none-any.whl.metadata (7.0 kB)
Downloading fairlearn-0.12.0-py3-none-any.whl (240 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m240.0/240.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fairlearn
Successfully installed fairlearn-0.12.0


In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from fairlearn.metrics import MetricFrame, true_positive_rate, false_positive_rate

In [34]:
df = pd.read_csv("/content/survey.csv")

In [35]:
# Keep only the needed columns
df = df[df['Gender'].isin(['Male', 'Female'])]
df = df.dropna(subset=['Age', 'Gender', 'treatment'])

In [36]:
print(df.columns)

Index(['Timestamp', 'Age', 'Gender', 'Country', 'state', 'self_employed',
       'family_history', 'treatment', 'work_interfere', 'no_employees',
       'remote_work', 'tech_company', 'benefits', 'care_options',
       'wellness_program', 'seek_help', 'anonymity', 'leave',
       'mental_health_consequence', 'phys_health_consequence', 'coworkers',
       'supervisor', 'mental_health_interview', 'phys_health_interview',
       'mental_vs_physical', 'obs_consequence', 'comments'],
      dtype='object')


In [9]:
# Select all required columns
features = ['Age']
X = df[features]
y = df['treatment']
race = df['Gender']

In [37]:
X_train, X_test, y_train, y_test, race_train, race_test = \
    train_test_split(X, y, race, test_size=0.3, stratify=race)

# Convert 'Yes' to 1 and 'No' to 0
y_train = y_train.apply(lambda x: 1 if x == 'Yes' else 0)
y_test = y_test.apply(lambda x: 1 if x == 'Yes' else 0)

In [38]:
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [39]:
metric_frame = MetricFrame(
    metrics={'TPR': true_positive_rate, 'FPR': false_positive_rate},
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=race_test
)
print(metric_frame.by_group)

             TPR       FPR
Gender                    
Female  0.275862  0.142857
Male    0.377778  0.305263


In [40]:
tpr_gap = abs(0.65 - 0.48)         # example ~0.17
fpr_gap = abs(0.34 - 0.15)         # example ~0.19
selection_gap = abs(0.50 - 0.27)   # if selection rate is available

In [41]:
#Fairness Mitigation Code with Reweighing

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [42]:
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
from fairlearn.metrics import MetricFrame, selection_rate, true_positive_rate, false_positive_rate, false_negative_rate

In [43]:
df = pd.read_csv("/content/survey.csv")

In [44]:
# Filter to Male and Female only and drop missing values for relevant columns
df = df[df['Gender'].isin(['Male', 'Female'])]
df = df.dropna(subset=['Age', 'Gender', 'treatment'])

In [45]:
# Select features, target, and sensitive attribute for mitigation
features_mitigation = ['Age']
X_mitigation = df[features_mitigation]
y_mitigation = df['treatment']
sensitive_features_mitigation = df['Gender']

# Convert 'Yes' to 1 and 'No' to 0 for the target variable
y_mitigation = y_mitigation.apply(lambda x: 1 if x == 'Yes' else 0)

# Split data into training and testing sets for mitigation
X_mitigation_train, X_mitigation_test, y_mitigation_train, y_mitigation_test, sensitive_features_mitigation_train, sensitive_features_mitigation_test = \
    train_test_split(X_mitigation, y_mitigation, sensitive_features_mitigation, test_size=0.3, stratify=sensitive_features_mitigation)

In [50]:
# Instantiate the base estimator (Logistic Regression)
estimator = LogisticRegression()

# Instantiate the fairness mitigation algorithm (Exponentiated Gradient with Demographic Parity)
mitigator = ExponentiatedGradient(estimator, DemographicParity())

# Fit the mitigator on the training data
mitigator.fit(X_mitigation_train, y_mitigation_train, sensitive_features=sensitive_features_mitigation_train)

# Predict on the test data using the mitigated estimator
y_pred_mitigated = mitigator.predict(X_mitigation_test)

In [51]:
# Evaluate fairness metrics for the mitigated model
metric_frame_mitigated = MetricFrame(
    metrics={'TPR': true_positive_rate, 'FPR': false_positive_rate},
    y_true=y_mitigation_test,
    y_pred=y_pred_mitigated,
    sensitive_features=sensitive_features_mitigation_test
)

print("Fairness metrics for the mitigated model:")
display(metric_frame_mitigated.by_group)

Fairness metrics for the mitigated model:


Unnamed: 0_level_0,TPR,FPR
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,0.423077,0.5
Male,0.611765,0.47
