## A Credit Scoring Use Case for Loan Approval: Using XGBoost and Feast
![](images/feast_xgboost.png)

In [1]:
import sys
sys.path.insert(0, "../")

### Import General Python libs and modules

In [2]:
import pandas as pd
import time
import numpy as np
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import precision_score

import xgboost as xgb

### Import Feast related modules and definitions from feast_repo directories

In [3]:
from feast import FeatureStore
from utils.data_fetcher import DataFetcher
from queries.train_model import CreditXGBClassifier

### Create instances of 
 * feature store
 * data fetcher utility class
 * XGBoost classifier

In [4]:
# [IMPORTANT] Change this  path to yours git repo
REPO_PATH = Path("/Users/kike/Library/CloudStorage/OneDrive-VMware,Inc/OCTO/2022-H1/Taurus/Feast/feast_workshops-master/module_3/feature_repo")
store = FeatureStore(repo_path=REPO_PATH)
fetcher = DataFetcher(store, REPO_PATH)

In [5]:
xgboost_cls = CreditXGBClassifier(store, fetcher)

Columns int training df: Index(['loan_id', 'dob_ssn', 'zipcode', 'person_age', 'person_income',
       'person_home_ownership', 'person_emp_length', 'loan_intent',
       'loan_amnt', 'loan_int_rate', 'loan_status', 'event_timestamp',
       'created_timestamp__', 'city', 'state', 'location_type',
       'tax_returns_filed', 'population', 'total_wages', 'credit_card_due',
       'mortgage_due', 'student_loan_due', 'vehicle_loan_due', 'hard_pulls',
       'missed_payments_2y', 'missed_payments_1y', 'missed_payments_6m',
       'bankruptcies'],
      dtype='object')
Columns to drop: ['event_timestamp', 'created_timestamp__', 'loan_id', 'loan_status']


### Train the XGBoost classifier on localhost using a single process and a single core
![](images/xgboost_single_core.png)

In [6]:
%timeit xgboost_cls.train()

 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6.0962387e-03
 9.9624002e-01]
Numpy array precision: 1.0
 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6.0962387e-03
 9.9624002e-01]
Numpy array precision: 1.0
 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6.0962387e-03
 9.9624002e-01]
Numpy array precision: 1.0
 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6.0962387e-03
 9.9624002e-01]
Numpy array precision: 1.0
 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6.0962387e-03
 9.9624002e-01]
Numpy array precision: 1.0
 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6.0962387e-03
 9.9624002e-01]
Numpy array precision: 1.0
 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6.0962387e-03
 9.9624002e-01]
Numpy array precision: 1.0
 predictions: [2.7212387e-02 4.6638330e-04 8.2361093e-03 ... 2.5680298e-02 6

### Define loan requests

In [7]:
 loan_requests = [
    {
        "zipcode": [76104],
        "person_age": [22],
        "person_income": [59000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [123.0],
        "loan_intent": ["PERSONAL"],
        "loan_amnt": [35000],
        "loan_int_rate": [16.02],
        "dob_ssn": ["19530219_5179"]
    },
    {
        "zipcode": [69033],
        "person_age": [66],
        "person_income": [42000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [2.0],
        "loan_intent": ["MEDICAL"],
        "loan_amnt": [6475],
        "loan_int_rate": [9.99],
        "dob_ssn": ["19960703_3449"]
    }
]

### Predict the loan requests

In [8]:
for loan_request in loan_requests:
    result = round(xgboost_cls.predict(loan_request))
    loan_status = "approved" if result == 1 else "rejected"
    print(f"Loan for {loan_request['zipcode'][0]} code {loan_status}: status_code={result}")

Loan for 76104 code approved: status_code=1
Loan for 69033 code rejected: status_code=0


  loop = asyncio.get_event_loop()
