## A Credit Scoring Use Case for Loan Approval: Using Distributed Training with Ray and XGBoost and Feast
![](images/feast_ray_xgboost.png)

In [1]:
import sys
sys.path.insert(0, "../")

### Import General Python libs and modules

In [2]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import precision_score

from xgboost_ray import RayXGBClassifier, RayParams
import xgboost as xgb

### Import Feast related modules and definitions from feast_repo directories and Ray modules

In [3]:
from feast import FeatureStore
from utils.data_fetcher import DataFetcher
from queries.ray_train_model import CreditRayXGBClassifier

### Create instances of 
 * feature store
 * data fetcher utility class
 * RayXGBoost classifier for distributed training

In [4]:
# [IMPORTANT] Change this  path to yours git repo
REPO_PATH = Path("/Users/kike/Library/CloudStorage/OneDrive-VMware,Inc/OCTO/2022-H1/Taurus/Feast/feast_workshops-master/module_3/feature_repo")
store = FeatureStore(repo_path=REPO_PATH)
fetcher = DataFetcher(store, REPO_PATH)
xgboost_cls = CreditRayXGBClassifier(store, fetcher)

Columns int training df: Index(['loan_id', 'dob_ssn', 'zipcode', 'person_age', 'person_income',
       'person_home_ownership', 'person_emp_length', 'loan_intent',
       'loan_amnt', 'loan_int_rate', 'loan_status', 'event_timestamp',
       'created_timestamp__', 'city', 'state', 'location_type',
       'tax_returns_filed', 'population', 'total_wages', 'credit_card_due',
       'mortgage_due', 'student_loan_due', 'vehicle_loan_due', 'hard_pulls',
       'missed_payments_2y', 'missed_payments_1y', 'missed_payments_6m',
       'bankruptcies'],
      dtype='object')
Columns to drop: ['event_timestamp', 'created_timestamp__', 'loan_id', 'loan_status']


  if XGBOOST_LOOSE_VERSION < LooseVersion("1.4.0"):


### Train the RayXGBoost classifier for distributed training on localhost using a four cores or processes
![](images/xgboost_multi_core.png)

In [5]:
%timeit xgboost_cls.train()

2022-06-15 13:30:39,197	INFO main.py:980 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for training.
2022-06-15 13:30:41,834	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99281)[0m [13:30:41] task [xgboost.ray]:140204653047056 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=99282)[0m [13:30:41] task [xgboost.ray]:140225325165840 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=99283)[0m [13:30:41] task [xgboost.ray]:140467452996880 got new rank 2
[2m[36m(_RemoteRayXGBoostActor pid=99284)[0m [13:30:41] task [xgboost.ray]:140409201159440 got new rank 3
2022-06-15 13:30:46,393	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 7.35 seconds (4.55 pure XGBoost training time).
2022-06-15 13:30:47,121	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
2022-06-15 13:30:49,601	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost predicti

 predictions: [0 0 0 ... 0 0 1]


2022-06-15 13:30:52,747	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99322)[0m [13:30:52] task [xgboost.ray]:140154193276176 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=99321)[0m [13:30:52] task [xgboost.ray]:140430544989456 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=99324)[0m [13:30:52] task [xgboost.ray]:140640191900944 got new rank 3
[2m[36m(_RemoteRayXGBoostActor pid=99323)[0m [13:30:52] task [xgboost.ray]:140461143354640 got new rank 2
2022-06-15 13:30:57,298	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 7.22 seconds (4.54 pure XGBoost training time).
2022-06-15 13:30:58,055	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
2022-06-15 13:31:00,337	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.
2022-06-15 13:31:00,986	INFO main.py:980 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for traini

 predictions: [0 0 0 ... 0 0 1]


2022-06-15 13:31:03,483	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99367)[0m [13:31:03] task [xgboost.ray]:140243843247376 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=99369)[0m [13:31:03] task [xgboost.ray]:140692402695440 got new rank 3
[2m[36m(_RemoteRayXGBoostActor pid=99368)[0m [13:31:03] task [xgboost.ray]:140599654182160 got new rank 2
[2m[36m(_RemoteRayXGBoostActor pid=99366)[0m [13:31:03] task [xgboost.ray]:140443020127504 got new rank 0
2022-06-15 13:31:08,694	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 7.76 seconds (5.21 pure XGBoost training time).
2022-06-15 13:31:09,008	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
[2m[36m(_RemoteRayXGBoostActor pid=99369)[0m 2022-06-15 13:31:09,012	ERROR worker.py:451 -- SystemExit was raised from the worker.
[2m[36m(_RemoteRayXGBoostActor pid=99369)[0m Traceback (most recent call last):
[2m[3

 predictions: [0 0 0 ... 0 0 1]


2022-06-15 13:31:14,319	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99403)[0m [13:31:14] task [xgboost.ray]:140528120196368 got new rank 3
[2m[36m(_RemoteRayXGBoostActor pid=99401)[0m [13:31:14] task [xgboost.ray]:140159688371472 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=99400)[0m [13:31:14] task [xgboost.ray]:140399803657488 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=99402)[0m [13:31:14] task [xgboost.ray]:140588375731376 got new rank 2
2022-06-15 13:31:18,670	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 6.81 seconds (4.35 pure XGBoost training time).
2022-06-15 13:31:18,823	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
2022-06-15 13:31:21,281	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.
2022-06-15 13:31:21,779	INFO main.py:980 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for traini

 predictions: [0 0 0 ... 0 0 1]


2022-06-15 13:31:24,287	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99426)[0m [13:31:24] task [xgboost.ray]:140531074575536 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=99429)[0m [13:31:24] task [xgboost.ray]:140312163904784 got new rank 3
[2m[36m(_RemoteRayXGBoostActor pid=99427)[0m [13:31:24] task [xgboost.ray]:140590396271888 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=99428)[0m [13:31:24] task [xgboost.ray]:140546636868880 got new rank 2
2022-06-15 13:31:28,641	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 6.92 seconds (4.35 pure XGBoost training time).
2022-06-15 13:31:28,733	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
2022-06-15 13:31:31,233	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.
2022-06-15 13:31:31,781	INFO main.py:980 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for traini

 predictions: [0 0 0 ... 0 0 1]


2022-06-15 13:31:34,274	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99454)[0m [13:31:34] task [xgboost.ray]:140395508198672 got new rank 2
[2m[36m(_RemoteRayXGBoostActor pid=99455)[0m [13:31:34] task [xgboost.ray]:140161436101808 got new rank 3
[2m[36m(_RemoteRayXGBoostActor pid=99452)[0m [13:31:34] task [xgboost.ray]:140453625490704 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=99453)[0m [13:31:34] task [xgboost.ray]:140495233100976 got new rank 1
2022-06-15 13:31:38,634	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 6.91 seconds (4.35 pure XGBoost training time).
2022-06-15 13:31:38,703	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
2022-06-15 13:31:41,025	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.
2022-06-15 13:31:41,649	INFO main.py:980 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for traini

 predictions: [0 0 0 ... 0 0 1]


2022-06-15 13:31:44,142	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99489)[0m [13:31:44] task [xgboost.ray]:140503289556240 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=99490)[0m [13:31:44] task [xgboost.ray]:140663680003344 got new rank 2
[2m[36m(_RemoteRayXGBoostActor pid=99488)[0m [13:31:44] task [xgboost.ray]:140290417650960 got new rank 0
[2m[36m(_RemoteRayXGBoostActor pid=99491)[0m [13:31:44] task [xgboost.ray]:140434036026640 got new rank 3
2022-06-15 13:31:48,601	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 7.01 seconds (4.45 pure XGBoost training time).
2022-06-15 13:31:48,645	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
2022-06-15 13:31:50,925	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.
2022-06-15 13:31:51,570	INFO main.py:980 -- [RayXGBoost] Created 4 new actors (4 total actors). Waiting until actors are ready for traini

 predictions: [0 0 0 ... 0 0 1]


2022-06-15 13:31:53,948	INFO main.py:1025 -- [RayXGBoost] Starting XGBoost training.
[2m[36m(_RemoteRayXGBoostActor pid=99517)[0m [13:31:53] task [xgboost.ray]:140287864990992 got new rank 1
[2m[36m(_RemoteRayXGBoostActor pid=99519)[0m [13:31:53] task [xgboost.ray]:140239283154192 got new rank 3
[2m[36m(_RemoteRayXGBoostActor pid=99518)[0m [13:31:53] task [xgboost.ray]:140153782398224 got new rank 2
[2m[36m(_RemoteRayXGBoostActor pid=99516)[0m [13:31:53] task [xgboost.ray]:140513626352912 got new rank 0
2022-06-15 13:31:58,409	INFO main.py:1516 -- [RayXGBoost] Finished XGBoost training on training data with total N=21,478 in 6.90 seconds (4.46 pure XGBoost training time).
2022-06-15 13:31:58,455	INFO main.py:1556 -- [RayXGBoost] Created 4 remote actors.
2022-06-15 13:32:00,664	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.


 predictions: [0 0 0 ... 0 0 1]
10.2 s ± 458 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Define loan requests

In [6]:
loan_requests = [
    {
        "zipcode": [76104],
        "person_age": [22],
        "person_income": [59000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [123.0],
        "loan_intent": ["PERSONAL"],
        "loan_amnt": [35000],
        "loan_int_rate": [16.02],
        "dob_ssn": ["19530219_5179"]
    },
    {
        "zipcode": [69033],
        "person_age": [66],
        "person_income": [42000],
        "person_home_ownership": ["RENT"],
        "person_emp_length": [2.0],
        "loan_intent": ["MEDICAL"],
        "loan_amnt": [6475],
        "loan_int_rate": [9.99],
        "dob_ssn": ["19960703_3449"]
    }
]

### Predict the loan requests

In [7]:
for loan_request in loan_requests:
    result = round(xgboost_cls.predict(loan_request))
    loan_status = "approved" if result == 1 else "rejected"
    print(f"Loan for {loan_request['zipcode'][0]} code {loan_status}: status_code={result}")

2022-06-15 13:32:01,337	INFO main.py:1556 -- [RayXGBoost] Created 1 remote actors.
2022-06-15 13:32:03,367	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.
2022-06-15 13:32:04,136	INFO main.py:1556 -- [RayXGBoost] Created 1 remote actors.


Loan for 76104 code rejected: status_code=0


2022-06-15 13:32:06,181	INFO main.py:1573 -- [RayXGBoost] Starting XGBoost prediction.


Loan for 69033 code rejected: status_code=0
