In [1]:
# Load data
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

train_df, test_df = pd.read_csv('./train_data_balanced.csv'), pd.read_csv('./test_data.csv')
X_train, X_test = train_df.drop('target', axis=1), test_df.drop('target', axis=1)
y_train, y_test = train_df['target'], test_df['target']

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def summary(y_pred, model = None):
    print("Model Accuracy:", accuracy_score(y_test, y_pred))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))

    if model is not None:
        feature_importance = pd.DataFrame({
            'Feature': X_train.columns,
            'Importance': model.feature_importances_
        }).sort_values('Importance', ascending=False)
        print("\nFeature Importance:")
        print(feature_importance)

In [2]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier(n_estimators=100, random_state=42)
forest.fit(X_train_scaled, y_train)
y_pred = forest.predict(X_test_scaled)
summary(y_pred, forest)

Model Accuracy: 0.9239204934886909

Confusion Matrix:
[[1121   47]
 [  64  227]]

Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.95      1168
           1       0.83      0.78      0.80       291

    accuracy                           0.92      1459
   macro avg       0.89      0.87      0.88      1459
weighted avg       0.92      0.92      0.92      1459


Feature Importance:
    Feature  Importance
7    DELINQ    0.197799
11  DEBTINC    0.151179
6     DEROG    0.094162
9      NINQ    0.090819
8     CLAGE    0.086759
2     VALUE    0.067014
0      LOAN    0.061304
1   MORTDUE    0.058081
10     CLNO    0.058002
5       YOJ    0.055491
4       JOB    0.040867
3    REASON    0.038523


In [3]:
# Decision Tree
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(max_depth=5, random_state=42)
tree.fit(X_train_scaled, y_train)
y_pred = tree.predict(X_test_scaled)
summary(y_pred, tree)

Model Accuracy: 0.8217957505140507

Confusion Matrix:
[[1008  160]
 [ 100  191]]

Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.86      0.89      1168
           1       0.54      0.66      0.60       291

    accuracy                           0.82      1459
   macro avg       0.73      0.76      0.74      1459
weighted avg       0.84      0.82      0.83      1459


Feature Importance:
    Feature  Importance
7    DELINQ    0.508229
11  DEBTINC    0.173498
6     DEROG    0.144267
8     CLAGE    0.093356
9      NINQ    0.042525
2     VALUE    0.032598
10     CLNO    0.004295
5       YOJ    0.001231
0      LOAN    0.000000
1   MORTDUE    0.000000
3    REASON    0.000000
4       JOB    0.000000


In [4]:
# KNN
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred = knn.predict(X_test_scaled)
summary(y_pred)

Model Accuracy: 0.9424263193968472

Confusion Matrix:
[[1155   13]
 [  71  220]]

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.99      0.96      1168
           1       0.94      0.76      0.84       291

    accuracy                           0.94      1459
   macro avg       0.94      0.87      0.90      1459
weighted avg       0.94      0.94      0.94      1459



In [5]:
# SVM
from sklearn.svm import SVC

svm = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm.fit(X_train_scaled, y_train)
y_pred = svm.predict(X_test_scaled)
summary(y_pred)

Model Accuracy: 0.8615490061686086

Confusion Matrix:
[[1049  119]
 [  83  208]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.90      0.91      1168
           1       0.64      0.71      0.67       291

    accuracy                           0.86      1459
   macro avg       0.78      0.81      0.79      1459
weighted avg       0.87      0.86      0.86      1459

