# Credit Risk Ensemble Techniques

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

In [3]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

# Read the CSV and Perform Basic Data Cleaning

In [5]:
# Load the data
file_path = Path('Resources/lending_data.csv')
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,loan_size,interest_rate,homeowner,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,loan_status
0,10700.0,7.672,own,52800,0.431818,5,1,22800,low_risk
1,8400.0,6.692,own,43600,0.311927,3,0,13600,low_risk
2,9000.0,6.963,rent,46100,0.349241,3,0,16100,low_risk
3,10700.0,7.664,own,52700,0.43074,5,1,22700,low_risk
4,10800.0,7.698,mortgage,53000,0.433962,5,1,23000,low_risk


# Split the Data into Training and Testing

In [5]:
# Create our features
X = # YOUR CODE HERE

# Create our target
y = # YOUR CODE HERE

In [6]:
X.describe()

Unnamed: 0,loan_size,interest_rate,borrower_income,debt_to_income,num_of_accounts,derogatory_marks,total_debt,homeowner_mortgage,homeowner_own,homeowner_rent
count,55000.0,55000.0,55000.0,55000.0,55000.0,55000.0,55000.0,55000.0,55000.0,55000.0
mean,11980.701981,8.216798,57922.807923,0.239614,6.281636,1.360855,27922.807923,0.503309,0.396982,0.099709
std,2428.659146,1.03218,9714.636584,0.048573,2.203214,0.590277,9714.636584,0.499994,0.489277,0.299614
min,5000.0,5.25,30000.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0
25%,10439.956986,7.561982,51759.827942,0.208799,5.0,1.0,21759.827942,0.0,0.0,0.0
50%,11609.670503,8.05911,56438.682013,0.232193,6.0,1.0,26438.682013,1.0,0.0,0.0
75%,12902.612993,8.608611,61610.45197,0.258052,7.0,2.0,31610.45197,1.0,1.0,0.0
max,22769.252782,12.801932,101077.011129,0.455385,16.0,4.0,71077.011129,1.0,1.0,1.0


In [7]:
# Check the balance of our target values
# YOUR CODE HERE

low_risk     50000
high_risk     5000
Name: loan_status, dtype: int64

In [8]:
# Split the X and y into X_train, X_test, y_train, y_test
# YOUR CODE HERE

# Ensemble Learners

In this section, you will compare two ensemble algorithms to determine which algorithm results in the best performance. You will train a Balanced Random Forest Classifier and an Easy Ensemble AdaBoost classifier . For each algorithm, be sure to complete the folliowing steps:

1. Train the model using the training data. 
2. Calculate the balanced accuracy score from sklearn.metrics.
3. Print the confusion matrix from sklearn.metrics.
4. Generate a classication report using the `imbalanced_classification_report` from imbalanced-learn.
5. For the Balanced Random Forest Classifier onely, print the feature importance sorted in descending order (most important feature to least important) along with the feature score

Note: Use a random state of 1 for each algorithm to ensure consistency between tests

### Balanced Random Forest Classifier

In [9]:
# Resample the training data with the RandomOversampler
# YOUR CODE HERE

BalancedRandomForestClassifier(random_state=1)

In [10]:
# Calculated the balanced accuracy score
# YOUR CODE HERE

0.9927988250218349

In [11]:
# Display the confusion matrix
# YOUR CODE HERE

array([[  615,     4],
       [  149, 18616]])

In [12]:
# Print the imbalanced classification report
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

  high_risk       0.80      0.99      0.99      0.89      0.99      0.99       619
   low_risk       1.00      0.99      0.99      1.00      0.99      0.99     18765

avg / total       0.99      0.99      0.99      0.99      0.99      0.99     19384



In [13]:
# List the features sorted in descending order by feature importance
# YOUR CODE HERE

loan_size: (0.21788480603139448)
interest_rate: (0.19595880072166166)
borrower_income: (0.1770964986362893)
debt_to_income: (0.13872579131432708)
num_of_accounts: (0.12149751059258558)
derogatory_marks: (0.11796397199368643)
total_debt: (0.028820980671490287)
homeowner_mortgage: (0.0008975938107589897)
homeowner_own: (0.0008326989735687767)
homeowner_rent: (0.00032134725423733495)


### Easy Ensemble AdaBoost Classifier

In [14]:
# Train the Classifier
# YOUR CODE HERE

EasyEnsembleClassifier(n_estimators=100, random_state=1)

In [None]:
# Calculated the balanced accuracy score
# YOUR CODE HERE

In [None]:
# Display the confusion matrix
# YOUR CODE HERE

In [None]:
# Print the imbalanced classification report
# YOUR CODE HERE