# Testing

In [11]:
import pandas as pd
import numpy as np
import joblib
from xgboost import XGBClassifier

In [12]:
import sys
import os
root_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
if root_path not in sys.path:
    sys.path.append(root_path)
from fileDir import getDataDir, getModelDir, getPredDir

Load the model, scaler, and feature list

In [13]:
MODEL_PATH = getModelDir("model", 0)
SCALER_PATH = getModelDir("scaler", 0)
FEATURES_PATH = getModelDir("feature", 0)
TEST_PATH = getDataDir("test")

In [14]:
model: XGBClassifier = joblib.load(MODEL_PATH)
scaler = joblib.load(SCALER_PATH)
train_features = joblib.load(FEATURES_PATH)

Load the new dataset + Align columns with the training features

In [15]:
test_df = pd.read_csv(TEST_PATH)

ids = test_df["ID"]

drop_cols = ["ID", "Shop Name", "pms_i_ymd", "date_of_birth", "c_postal_code", "postal_code"]
test_df = test_df.drop(columns=drop_cols, errors="ignore")

In [16]:
test_df = pd.get_dummies(test_df, drop_first=True)

# Align test features with training features
for col in train_features:
    if col not in test_df.columns:
        test_df[col] = 0  # add missing columns

# Reorder columns to match training order
test_df = test_df[train_features]

Scale

In [17]:
X_test = scaler.transform(test_df)

Predict

In [18]:
y_proba = model.predict_proba(X_test)[:, 1]

# model_v0 threshold recommendation
threshold = 0.178
preds = (y_proba > threshold).astype(int)

Export output

In [19]:
output = pd.DataFrame({
    "ID": ids,
    "default_12month": preds
})
output.to_csv(getPredDir(0), index=False)