# Credit Card Default Prediction

**Objective:** Predict whether a customer will default on their credit card payment next month using the UCI/Kaggle Default of Credit Card Clients dataset.

In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib
import warnings
warnings.filterwarnings('ignore')

## Step 2: Load the Dataset

In [None]:
# Load data from CSV
df = pd.read_csv(r"C:\Users\Admin\Documents\Personal\GOMYCODE\CHECKPOINTS\Streamlit_Machine_learning project.UCI_Credit_Card.csv")
df.head()

## Step 3: Clean and Preprocess Data

In [None]:
# Rename target column if needed
df.rename(columns={'default.payment.next.month': 'default'}, inplace=True)

# Drop ID column if exists
if 'ID' in df.columns:
    df.drop(columns=['ID'], inplace=True)

# Check for missing values
df = df.dropna()

# Features and target
X = df.drop('default', axis=1)
y = df['default']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Step 4: Visualize Class Distribution

In [None]:
sns.countplot(x=y)
plt.title("Default Payment Distribution")
plt.xlabel("Default (1=Yes, 0=No)")
plt.ylabel("Count")
plt.show()

## Step 5: Handle Class Imbalance with SMOTE

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)
print("Class distribution after SMOTE:")
print(pd.Series(y_resampled).value_counts())

## Step 6: Train Model (Random Forest)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

## Step 7: Evaluate Model

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

## Step 8: Save the Trained Model

In [None]:
joblib.dump(model, "credit_default_model.pkl")
print("Model saved as 'credit_default_model.pkl'")