In [None]:
import pandas as pd
import mitigate_disparity
import measure_disparity
import prepare_example_dataset

In [None]:
# preparing example data csv from Synthea covid data
prepare_example_dataset

# reading in example data
data = pd.read_csv('MLPE_example_dataset.csv')

# using first 10k records
fit_data = data.iloc[:10000,:].copy()

# initiating and fitting mitigate_disparity
mlpe = mitigate_disparity.MLPE()
mlpe.fit(fit_data, desired_points_in_lattice=5000, r_multiple=1.3)

In [None]:
# using the feedback function
feedback = mlpe.feedback(level=3, sort_by='lowest', information_source='csv')
feedback

In [None]:
# predicting model sensitivity on the remaining patient records

# selecting everything after 10k records
new_patient_data = data.iloc[10000:,:].copy()

print('predicting model sensitivity for',len(new_patient_data),'new patient records')

# selecting feature vectors only
new_patient_features = new_patient_data.select_dtypes(include=float)

# using the predict function
predict = mlpe.predict(new_patient_features)
predict

In [None]:
# using measure disparity

# the 5th percentile confidence interval scores on sensitivity are our produced "model predictions" of disparity
low_ci_scores = predict['low_ci']

# selecting just demographic info 
new_patient_demographics = new_patient_data[['COUNTY','RACE','ETHNICITY','GENDER','AGE_BRACKET']]

# initiating measure_disparity with new patient records on which to measure disparity
measure = measure_disparity.MeasureDisparityMLPE(new_patient_demographics)

# preparing the data with the 5th percentile confidence interval scores
measure.prepare_data(low_ci_scores.values)

# measure disparity can be done on various "levels" of demographic intersection
measure.measure_disparity(level=1)