In [7]:
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from lightgbm import LGBMClassifier
import lightgbm as lgb
import pandas as pd
import sklearn
import matplotlib.pyplot as plt

# Loan Dataset

In [4]:
loan_dir = "../data/out/feature_engineered_loan_data.csv.zip"
zf = zipfile.ZipFile(loan_dir)
loan = pd.read_csv(zf.open('feature_engineered_loan_data.csv'))

## LGBM Model

In [8]:
model = LGBMClassifier(objective = 'binary', predict_raw_score = True)

X = loan.drop(['Target','Unnamed: 0'], 1)
columns = X.columns
X.columns = range(X.shape[1])
y = loan[['Target']]

train_X, test_X, train_y, test_y = train_test_split(X, y, random_state = 42)
model = model.fit(train_X, train_y)

  X = loan.drop(['Target','Unnamed: 0'], 1)
  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [9]:
train_X.columns = columns
test_X.columns = columns

In [10]:
pred_test_y = model.predict(test_X)
accuracy_score(pred_test_y, test_y)

0.9196649236452561

In [11]:
test_data = test_X
test_data['test_y'] = test_y
test_data['predicted_y'] = pred_test_y

### Group Fairness

A classifier satisfies
this definition if subjects in both protected and unprotected groups
have equal probability of being assigned to the positive predicted
class. 

P(d =
1|G = m) = P(d = 1|G = f ).

In [24]:
p_group_fairness_m = test_data.loc[test_data['CODE_GENDER: M'] == 1].loc[test_data.predicted_y == 1].shape[0]/test_data.loc[test_data['CODE_GENDER: M'] == 1].shape[0]
p_group_fairness_f = test_data.loc[test_data['CODE_GENDER: M'] == 0].loc[test_data.predicted_y == 1].shape[0]/test_data.loc[test_data['CODE_GENDER: M'] == 0].shape[0]

In [25]:
p_group_fairness_m

0.004101659830566949

In [26]:
p_group_fairness_f

0.0018113445295426355

### Predictive Parity

A classifier satisfies this definition if both protected and unprotected
groups have equal PPV – the probability of a subject with positive predictive value to truly belong to the positive class. 

P(Y = 1|d = 1,G = m) = P(Y = 1|d = 1,G = f )

In [28]:
p_predictive_parity_m = test_data.loc[test_data['CODE_GENDER: M'] == 1].loc[test_data.predicted_y == 1].loc[test_data.test_y == 1].shape[0]\
                        /test_data.loc[test_data['CODE_GENDER: M'] == 1].loc[test_data.predicted_y == 1].shape[0]
p_predictive_parity_f = test_data.loc[test_data['CODE_GENDER: M'] == 0].loc[test_data.predicted_y == 1].loc[test_data.test_y == 1].shape[0]\
                        /test_data.loc[test_data['CODE_GENDER: M'] == 0].loc[test_data.predicted_y == 1].shape[0]

In [29]:
p_predictive_parity_m, p_predictive_parity_f

(0.48598130841121495, 0.5978260869565217)

### False positive error rate balance(predictive equality)

 A classifier satisfies this definition if both
protected and unprotected groups have equal FPR – the probability
of a subject in the negative class to have a positive predictive value.

P(d = 1|Y = 0,G = m) = P(d = 1|Y = 0,G = f )

In [30]:
p_predictive_eqaulity_m = test_data.loc[test_data['CODE_GENDER: M'] == 1].loc[test_data.test_y == 0].loc[test_data.predicted_y == 1].shape[0]\
                        /test_data.loc[test_data['CODE_GENDER: M'] == 1].loc[test_data.test_y == 0].shape[0]
p_predictive_eqaulity_f = test_data.loc[test_data['CODE_GENDER: M'] == 0].loc[test_data.test_y == 0].loc[test_data.predicted_y == 1].shape[0]\
                        /test_data.loc[test_data['CODE_GENDER: M'] == 0].loc[test_data.test_y == 0].shape[0]

In [31]:
p_predictive_eqaulity_m, p_predictive_eqaulity_f

(0.002346016038218734, 0.0007831848104481087)

### False negative error rate balance(equal opportunity)

A classifier satisfies this definition if both
protected and unprotected groups have equal FNR – the probability
of a subject in a positive class to have a negative predictive value.

P(d = 0|Y = 1,G = m) = P(d = 0|Y = 1,G = f )

In [32]:
p_predictive_opportunity_m = test_data.loc[test_data['CODE_GENDER: M'] == 1].loc[test_data.test_y == 1].loc[test_data.predicted_y == 0].shape[0]\
                        /test_data.loc[test_data['CODE_GENDER: M'] == 1].loc[test_data.test_y == 1].shape[0]
p_predictive_opportunity_f = test_data.loc[test_data['CODE_GENDER: M'] == 0].loc[test_data.test_y == 1].loc[test_data.predicted_y == 0].shape[0]\
                        /test_data.loc[test_data['CODE_GENDER: M'] == 0].loc[test_data.test_y == 1].shape[0]

In [33]:
p_predictive_opportunity_m, p_predictive_opportunity_f

(0.9803253878168747, 0.9844983089064262)