IMPORT LIBRARY

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

IMPORT FILE

In [24]:
import os

base_dir = os.getcwd()
file_path = os.path.join(base_dir, 'csv', 'hasil_clustering_final_basefit.csv')

df = pd.read_csv(file_path)
df.head()

Unnamed: 0,loan_number,amount_borrowed,term,borrower_rate,installment,grade,origination_date,principal_balance,principal_paid,interest_paid,...,last_payment_date,next_payment_due_date,days_past_due,loan_status_description,data_source,borrower_id,loan_due_date,grade_encoded,cluster,cluster_elbow
0,1047556,4500.0,36,0.3182,195.55,G,2026-05-30,4346.27,153.73,237.37,...,2026-09-30,2026-08-30,0,CURRENT,Prosper,104707556,2029-06-30,7,2,3
1,1047487,10000.0,36,0.2444,394.64,E,2026-05-30,0.0,10000.0,552.77,...,2026-10-30,2026-08-30,0,COMPLETED,Prosper,104707487,2029-06-30,5,2,3
2,1045856,9000.0,36,0.0899,286.16,B,2026-05-30,8561.29,438.71,133.61,...,2026-09-30,2026-08-30,0,CURRENT,Prosper,104505856,2029-06-30,2,3,0
3,1050525,30000.0,36,0.0799,939.95,A,2026-05-30,28515.86,1484.14,395.76,...,2026-09-30,2026-08-30,0,CURRENT,Prosper,105000525,2029-06-30,1,3,2
4,1047529,6001.0,36,0.3182,260.77,G,2026-05-30,4389.44,1611.56,188.44,...,2026-08-30,2026-08-30,0,CURRENT,Prosper,104707529,2029-06-30,7,2,3


SET COLUMN AND X,Y

In [25]:
features_pred = ['amount_borrowed', 'term', 'borrower_rate', 'installment', 'grade_encoded']
target_pred = 'cluster_elbow' # Kita prediksi Label Cluster yang sudah Anda buat

X = df[features_pred]
y = df[target_pred]


SPLIT DATA TEST TRAIN

In [26]:
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier

In [27]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [28]:
import joblib

SCALER_FEATURES = [
    'amount_borrowed', 
    'borrower_rate', 
    'installment',
    'term', 
    'borrower_rate',
    'grade_encoded'
]

X_train_scaler_data = X_train[SCALER_FEATURES]
scaler = StandardScaler()
scaler.fit(X_train_scaler_data)

output_path = os.path.join(os.getcwd(), 'model')
os.makedirs(output_path, exist_ok=True) # Pastikan folder 'model' ada

scaler_file_path = os.path.join(output_path, 'scaler.pkl')
joblib.dump(scaler, scaler_file_path)

print(f"✅ StandardScaler berhasil dilatih dan disimpan di: {scaler_file_path}")

# --- 3. TRANSFORMASI DATA ---
# Terapkan scaling ke data pelatihan dan pengujian Anda

X_train_scaled = scaler.transform(X_train_scaler_data)
X_test_scaled = scaler.transform(X_test[SCALER_FEATURES])

✅ StandardScaler berhasil dilatih dan disimpan di: d:\project\model\scaler.pkl


TRAINING MODEL

In [29]:
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier

# 1. Encode Target dulu (XGBoost wajib angka 0, 1, 2...)
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

# 2. Setup Model XGBoost Khusus CPU
model_cpu = XGBClassifier(
    tree_method='hist',       # Metode 'histogram' ini SANGAT CEPAT di CPU
    n_jobs=-1,                # Pakai semua core CPU
    n_estimators=100,
    random_state=42
)

print("Sedang melatih XGBoost di CPU...")
model_cpu.fit(X_train_scaled, y_train_enc)
print("Selesai!")


Sedang melatih XGBoost di CPU...
Selesai!


EVALUASI

In [30]:
# # Evaluasi
# acc = model_cpu.score(X_test, y_test_enc)
# print(f"Akurasi: {acc*100:.2f}%")

from sklearn.metrics import classification_report

# 1. Prediksi menggunakan data test yang SUDAH DI-SCALING
y_pred = model_cpu.predict(X_test_scaled)


# 2. Akurasi
acc = model_cpu.score(X_test_scaled, y_test_enc) # Ganti X_test menjadi X_test_scaled
print(f"Akurasi: {acc*100:.2f}%")


Akurasi: 99.17%


In [31]:

# 3. Laporan Klasifikasi (PENTING untuk risiko pinjaman!)
print("\nLaporan Klasifikasi:")
print(classification_report(y_test_enc, y_pred))


Laporan Klasifikasi:
              precision    recall  f1-score   support

           0       0.98      1.00      0.99    362463
           1       1.00      0.99      1.00    243047
           2       0.99      0.99      0.99    151561
           3       1.00      0.98      0.99    335664
           4       0.98      1.00      0.99     57324

    accuracy                           0.99   1150059
   macro avg       0.99      0.99      0.99   1150059
weighted avg       0.99      0.99      0.99   1150059



SAVE MODEL

In [33]:
from skl2onnx import convert_sklearn 

from onnxmltools.convert.common.data_types import FloatTensorType
import onnxmltools 
import os

# --- Definisi Fitur (7 Fitur) ---
# Menggunakan FloatTensorType dari ONNXMLTOOLS
initial_type = [('float_input', FloatTensorType([None, 6]))]
core_model = model_cpu.get_booster()


# 1. Konversi Model XGBoost ke ONNX (Percobaan Pertama & Terkuat)
try:
    print("Percobaan 1: Menggunakan onnxmltools.convert_xgboost dengan tipe data yang benar...")
    onnx_model = onnxmltools.convert_xgboost(
        core_model,  # Objek booster
        initial_types=initial_type,
        target_opset=12 # Opset yang stabil
    )
    
except Exception as e:
    # Jika convert_xgboost masih gagal, kita kembali ke convert_sklearn
    print(f"Percobaan 1 gagal. Mencoba convert_sklearn... Detail: {e}")
    
    # NOTE: Jika Anda menggunakan convert_sklearn, Anda HARUS menggunakan FloatTensorType dari skl2onnx
    from skl2onnx.common.data_types import FloatTensorType as SklearnFloatTensorType
    
    onnx_model = convert_sklearn(
        model_cpu,  # Objek wrapper Scikit-learn
        initial_types=[('float_input', SklearnFloatTensorType([None, 6]))],
        target_opset=12
    )

# 2. Simpan Model ONNX
onnx_file_path = os.path.join(output_path, 'loan_model.onnx')
onnxmltools.utils.save_model(onnx_model, onnx_file_path)

print(f"\n✅ SUKSES! Model ONNX tersimpan di: {onnx_file_path}")

Percobaan 1: Menggunakan onnxmltools.convert_xgboost dengan tipe data yang benar...

✅ SUKSES! Model ONNX tersimpan di: d:\project\model\loan_model.onnx
