# Credit Scoring Model

We develop and evaluate a model that predicts the chance of a given loan applicant defaulting on loan repayment while keeping the user's data private. Using a dataset from Kaggle (https://www.kaggle.com/code/ajay1735/my-credit-scoring-model/input), and borrowing some ideas from an existing notebook (https://www.kaggle.com/code/ajay1735/my-credit-scoring-model), we compare Scikit Learn models and Concrete ML models. 

In [1]:
# Importing necessary libraries
import time
from functools import partial

import numpy as np
import pandas as pd

In [2]:
# Importing the models, from both scikit-learn and Concrete ML
from sklearn.ensemble import RandomForestClassifier as SklearnRandomForestClassifier
from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier as SklearnDecisionTreeClassifier
from xgboost import XGBClassifier as SklearnXGBoostClassifier

from concrete.ml.sklearn import DecisionTreeClassifier as ConcreteDecisionTreeClassifier
from concrete.ml.sklearn import LogisticRegression as ConcreteLogisticRegression
from concrete.ml.sklearn import RandomForestClassifier as ConcreteRandomForestClassifier
from concrete.ml.sklearn import XGBClassifier as ConcreteXGBoostClassifier

In [3]:
# Reading the dataset
df = pd.read_csv("hmeq.csv")

### Cleaning the dataset

Details on data science aspects can be found in the original notebook https://www.kaggle.com/code/ajay1735/my-credit-scoring-model. We start with the best setting described in the linked notebook and focus on converting the model to FHE with Concrete ML.

In [4]:
# Replacement of NaN variables
df["REASON"].fillna(value="DebtCon", inplace=True)
df["JOB"].fillna(value="Other", inplace=True)
df["DEROG"].fillna(value=0, inplace=True)
df["DELINQ"].fillna(value=0, inplace=True)

df.fillna(value=df.mean(), inplace=True)

# Checking if there is anything left out
assert np.array_equal(df.isnull().sum(), [0] * len(df.isnull().sum()))

In [5]:
# How the dataset is
df.head()

Unnamed: 0,BAD,LOAN,MORTDUE,VALUE,REASON,JOB,YOJ,DEROG,DELINQ,CLAGE,NINQ,CLNO,DEBTINC
0,1,1100,25860.0,39025.0,HomeImp,Other,10.5,0.0,0.0,94.366667,1.0,9.0,33.779915
1,1,1300,70053.0,68400.0,HomeImp,Other,7.0,0.0,2.0,121.833333,0.0,14.0,33.779915
2,1,1500,13500.0,16700.0,HomeImp,Other,4.0,0.0,0.0,149.466667,1.0,10.0,33.779915
3,1,1500,73760.8172,101776.048741,DebtCon,Other,8.922268,0.0,0.0,179.766275,1.186055,21.296096,33.779915
4,0,1700,97800.0,112000.0,HomeImp,Office,3.0,0.0,0.0,93.333333,0.0,14.0,33.779915


In [6]:
# Removing the features BAD, JOB, REASON from the input features set
x_basic = df.drop(columns=["BAD", "JOB", "REASON"])
y = df["BAD"]

### Credit scoring task with Concrete ML

In [7]:
# pylint: disable=too-many-locals


def evaluate(
    model_class, name, x, y, test_size=0.33, show_circuit=False, predict_in_fhe=True, n_bits=None
):
    """Function to evaluate a model class on a given (x, y). This returns different metrics, notably
    in simulate and FHE for Concrete ML models, as well as execution times."""

    print(f"Evaluating {name}")

    # Splitting the data into test and train sets. Remark the use of stratify, to make sure that
    # the testset contains some representative class distribution in our targets
    x_local_tr, x_local_te, y_local_tr, y_local_te = train_test_split(
        x, y, stratify=y, test_size=test_size, random_state=1
    )
    len_x_local_te = len(x_local_te)

    # With a normalization
    model = Pipeline(
        [
            ("preprocessor", StandardScaler()),
            ("model", model_class()),
        ]
    )

    # Training
    model.fit(x_local_tr, y_local_tr)

    # Predicting
    before_time = time.time()
    y_local_pre = model.predict(x_local_te)
    local_t = (time.time() - before_time) / len_x_local_te

    local_a = accuracy_score(y_local_te, y_local_pre)
    local_f = f1_score(y_local_te, y_local_pre, average="macro")
    local_p = precision_score(y_local_te, y_local_pre, average="macro")
    local_r = recall_score(y_local_te, y_local_pre, average="macro")

    max_bit_width = None
    local_a_simulate = None
    local_a_fhe = None
    local_t_simulate = None
    local_t_fhe = None

    # For Concrete ML models
    if getattr(model_class(), "_is_a_public_cml_model", False):
        circuit = model["model"].compile(x)  # pylint: disable=no-member

        # To see the circuit
        if show_circuit:
            print(circuit)

        # Max bitwidth of the circuit
        max_bit_width = circuit.graph.maximum_integer_bit_width()

        # Prediction in simulation
        before_time = time.time()
        y_local_pre_simulate = model.predict(x_local_te, fhe="simulate")
        local_t_simulate = (time.time() - before_time) / len_x_local_te

        local_a_simulate = accuracy_score(y_local_te, y_local_pre_simulate)

        # Prediction in FHE
        if predict_in_fhe:
            before_time = time.time()
            y_local_pre_fhe = model.predict(x_local_te, fhe="execute")
            local_t_fhe = (time.time() - before_time) / len_x_local_te

            local_a_fhe = accuracy_score(y_local_te, y_local_pre_fhe)

    ans = (
        name,
        local_a,
        local_a_simulate,
        local_a_fhe,
        local_f,
        local_p,
        local_r,
        max_bit_width,
        local_t,
        local_t_simulate,
        local_t_fhe,
        len_x_local_te,
        n_bits,
    )

    return ans

In [8]:
list_of_results = []

# For fast models, take a large test_size
test_size = 0.33

# Logistic regression
list_of_results += evaluate(
    SklearnLogisticRegression, "SklearnLogisticRegression", x_basic, y, test_size=test_size
)
list_of_results += evaluate(
    ConcreteLogisticRegression, "ConcreteLogisticRegression", x_basic, y, test_size=test_size
)

# If you want, make it smaller, to avoid to have too long execution
test_size_short = test_size

# Options to tree-based models
n_bits = 3

extra_flags_dt = {"max_depth": 10}
extra_flags_rf = {"max_depth": 7, "n_estimators": 5}
extra_flags_xgb = {"max_depth": 7, "n_estimators": 5}
extra_flags_cml = {"n_bits": n_bits}

# Options
use_dt = False
use_rf = False
use_xgb = True
use_full_dataset_for_cml_models = True

# Decision tree
if use_dt:
    list_of_results += evaluate(
        partial(SklearnDecisionTreeClassifier, **extra_flags_dt),
        "SklearnDecisionTreeClassifier",
        x_basic,
        y,
        test_size=test_size,
    )
    if use_full_dataset_for_cml_models:
        list_of_results += evaluate(
            partial(ConcreteDecisionTreeClassifier, **extra_flags_dt, **extra_flags_cml),
            "ConcreteDecisionTreeClassifier",
            x_basic,
            y,
            test_size=test_size,
            n_bits=n_bits,
            predict_in_fhe=False,
        )
    list_of_results += evaluate(
        partial(ConcreteDecisionTreeClassifier, **extra_flags_dt, **extra_flags_cml),
        "ConcreteDecisionTreeClassifier",
        x_basic,
        y,
        test_size=test_size_short,
        n_bits=n_bits,
    )

# Random Forest
if use_rf:
    list_of_results += evaluate(
        partial(SklearnRandomForestClassifier, **extra_flags_rf),
        "SklearnRandomForestClassifier",
        x_basic,
        y,
        test_size=test_size,
    )
    if use_full_dataset_for_cml_models:
        list_of_results += evaluate(
            partial(ConcreteRandomForestClassifier, **extra_flags_rf, **extra_flags_cml),
            "ConcreteRandomForestClassifier",
            x_basic,
            y,
            test_size=test_size,
            n_bits=n_bits,
            predict_in_fhe=False,
        )
    list_of_results += evaluate(
        partial(ConcreteRandomForestClassifier, **extra_flags_rf, **extra_flags_cml),
        "ConcreteRandomForestClassifier",
        x_basic,
        y,
        test_size=test_size_short,
        n_bits=n_bits,
    )

# XGBoost
if use_xgb:
    list_of_results += evaluate(
        partial(SklearnXGBoostClassifier, **extra_flags_xgb),
        "SklearnXGBoostClassifier",
        x_basic,
        y,
        test_size=test_size,
    )
    if use_full_dataset_for_cml_models:
        list_of_results += evaluate(
            partial(ConcreteXGBoostClassifier, **extra_flags_xgb, **extra_flags_cml),
            "ConcreteXGBoostClassifier",
            x_basic,
            y,
            test_size=test_size,
            n_bits=n_bits,
            predict_in_fhe=False,
        )
    list_of_results += evaluate(
        partial(ConcreteXGBoostClassifier, **extra_flags_xgb, **extra_flags_cml),
        "ConcreteXGBoostClassifier",
        x_basic,
        y,
        test_size=test_size_short,
        n_bits=n_bits,
    )

Evaluating SklearnLogisticRegression
Evaluating ConcreteLogisticRegression
Evaluating SklearnXGBoostClassifier
Evaluating ConcreteXGBoostClassifier
Evaluating ConcreteXGBoostClassifier


# Comparing all the models

In [9]:
# Extract information from list_of_results
size_of_info = 13

model_names = list_of_results[0::size_of_info]

accuracies = list_of_results[1::size_of_info]
accuracies_simulate = list_of_results[2::size_of_info]
accuracies_fhe = list_of_results[3::size_of_info]

recalls = list_of_results[4::size_of_info]
f1s = list_of_results[5::size_of_info]
precisions = list_of_results[6::size_of_info]
max_bit_widths = list_of_results[7::size_of_info]

t = list_of_results[8::size_of_info]
t_simulate = list_of_results[9::size_of_info]
t_fhe = list_of_results[10::size_of_info]

length_dataset = list_of_results[11::size_of_info]
n_bits = list_of_results[12::size_of_info]

# And make a nice table
results_dataframe = pd.DataFrame(
    {
        "Model name": model_names,
        "Quantization (bits)": n_bits,
        "Len of the dataset": length_dataset,
        "Accuracy Score (original)": accuracies,
        "Accuracy Score (simulate)": accuracies_simulate,
        "Accuracy Score (FHE)": accuracies_fhe,
        "Recall Score": recalls,
        "F1 Score": f1s,
        "Precision Score": precisions,
        "Max bitwidth": max_bit_widths,
        "Execution time (original, in seconds)": t,
        "Execution time (simulate, in seconds)": t_simulate,
        "Execution time (FHE, in seconds)": t_fhe,
    }
)

In [10]:
pd.set_option("display.precision", 3)
results_dataframe = results_dataframe.fillna("")
results_dataframe  # pylint: disable=W0104

Unnamed: 0,Model name,Quantization (bits),Len of the dataset,Accuracy Score (original),Accuracy Score (simulate),Accuracy Score (FHE),Recall Score,F1 Score,Precision Score,Max bitwidth,"Execution time (original, in seconds)","Execution time (simulate, in seconds)","Execution time (FHE, in seconds)"
0,SklearnLogisticRegression,,1967,0.83,,,0.645,0.762,0.621,,1.018e-06,,
1,ConcreteLogisticRegression,,1967,0.83,0.83,0.83,0.645,0.765,0.62,18.0,1.469e-06,0.0,0.001
2,SklearnXGBoostClassifier,,1967,0.883,,,0.805,0.828,0.788,,2.694e-06,,
3,ConcreteXGBoostClassifier,3.0,1967,0.84,0.84,,0.649,0.825,0.621,4.0,2.003e-05,0.001,
4,ConcreteXGBoostClassifier,3.0,1967,0.84,0.84,0.84,0.649,0.825,0.621,4.0,1.638e-05,0.001,0.225
