# Testing

In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.linear_model import LogisticRegression

In [18]:
import sys
import os
root_path = os.path.abspath(os.path.join(os.getcwd(), "../.."))
if root_path not in sys.path:
    sys.path.append(root_path)
from fileDir import getDataDir, getModelDir, getPredDir

In [None]:
TEST_PATH = getDataDir("test")
LOGISTIC_MODEL_PATH = getModelDir("model_logistic", 1, True)
LOGISTIC_SCALER_PATH = getModelDir("scaler_model_logistic", 1, True)
LOGISTIC_FEATURES_PATH = getModelDir("train_features_model_logistic", 1, True)

LOGISTIC_PRED_PATH = getPredDir(1, "prediction_logistic")

Load Models and Test

In [None]:
logistic_model: LogisticRegression = joblib.load(LOGISTIC_MODEL_PATH)
logistic_scaler = joblib.load(LOGISTIC_SCALER_PATH)
logistic_train_features = joblib.load(LOGISTIC_FEATURES_PATH)

df = pd.read_csv(TEST_PATH)

ids = df["ID"]

Prepare Test

In [None]:
#Drop Columns
drop_cols = ['ID','pms_i_ymd','Area','Province','Shop Name','date_of_birth_week','c_postal_code','c_date_of_salary_payment','media','place_for_sending_information','r_generalcode4']
df = df.drop(columns=drop_cols, errors="ignore")

#year to month
df['living_period_month'] = df['living_period_month'] + df['living_period_year']*12
df['c_number_of_working_month'] = df['c_number_of_working_month'] + df['c_number_of_working_year']*12

df = df.drop(columns = ['living_period_year','c_number_of_working_year'])

df['date_of_birth'] = pd.to_datetime(df['date_of_birth'], errors='coerce')
df['birth_year'] = df['date_of_birth'].dt.year
df.drop(columns=['date_of_birth'], inplace=True)
df['age'] = 2025 - df['birth_year']  # replace 2025 with current year or dataset year
df.drop(columns=['birth_year'], inplace=True)

#Numeric
num_cols = ['number_of_children','number_of_resident',
       'living_period_month','c_number_of_employee','c_monthly_salary',
       'c_number_of_working_month','r_expected_credit_limit', 'r_allloan_case',
       'r_allloan_amount', 'r_additional_income', 'r_spouse_income','age']
df[num_cols] = df[num_cols].fillna(df[num_cols].median())

#Categorical
cols = ['gender', 'marital_status','postal_code', 'tel_category','type_of_residence',
       'c_business_type','c_position', 'c_occupation',
       'c_employment_status','c_salary_payment_methods','r_propose','r_generalcode1', 'r_generalcode2', 'r_generalcode3','r_generalcode5','apply']
df[cols] = df[cols].fillna('Unknown')


logistic_train_df = df

In [None]:
# Allign Features
logistic_train_df = pd.get_dummies(logistic_train_df, drop_first=True)

for col in logistic_train_features:
    if col not in logistic_train_df.columns:
        logistic_train_df[col] = 0

logistic_train_df = logistic_train_df[logistic_train_features]


# Scaler
X_test_logistic = logistic_scaler.transform(logistic_train_df)

Predict

In [None]:
logistic_y_proba = logistic_model.predict_proba(X_test_logistic)[:, 1]

logistic_threshold = 0.486
logistic_pred = (logistic_y_proba >= logistic_threshold).astype(int)

Export Output

In [None]:
logistic_output_df = pd.DataFrame({
    "ID": ids,
    "default_12month": logistic_pred
})

logistic_output_df.to_csv(LOGISTIC_PRED_PATH, index=False)