# Credit Card Default Prediction

**Objective:** Predict whether a customer will default on their credit card payment next month using the UCI/Kaggle Default of Credit Card Clients dataset.

In [12]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib
import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'matplotlib'

## Step 2: Load the Dataset

In [10]:
# Load data from CSV
df = pd.read_csv("UCI_Credit_Card.csv")
df.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,default.payment.next.month
0,1,20000.0,2,2,1,24,2,2,-1,-1,...,0.0,0.0,0.0,0.0,689.0,0.0,0.0,0.0,0.0,1
1,2,120000.0,2,2,2,26,-1,2,0,0,...,3272.0,3455.0,3261.0,0.0,1000.0,1000.0,1000.0,0.0,2000.0,1
2,3,90000.0,2,2,2,34,0,0,0,0,...,14331.0,14948.0,15549.0,1518.0,1500.0,1000.0,1000.0,1000.0,5000.0,0
3,4,50000.0,2,2,1,37,0,0,0,0,...,28314.0,28959.0,29547.0,2000.0,2019.0,1200.0,1100.0,1069.0,1000.0,0
4,5,50000.0,1,2,1,57,-1,0,-1,0,...,20940.0,19146.0,19131.0,2000.0,36681.0,10000.0,9000.0,689.0,679.0,0


## Step 3: Clean and Preprocess Data

In [11]:
# Rename target column if needed
df.rename(columns={'default.payment.next.month': 'default'}, inplace=True)

# Drop ID column if exists
if 'ID' in df.columns:
    df.drop(columns=['ID'], inplace=True)

# Check for missing values
df = df.dropna()

# Features and target
X = df.drop('default', axis=1)
y = df['default']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

NameError: name 'StandardScaler' is not defined

## Step 4: Visualize Class Distribution

In [None]:
sns.countplot(x=y)
plt.title("Default Payment Distribution")
plt.xlabel("Default (1=Yes, 0=No)")
plt.ylabel("Count")
plt.show()

## Step 5: Handle Class Imbalance with SMOTE

In [None]:
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)
print("Class distribution after SMOTE:")
print(pd.Series(y_resampled).value_counts())

## Step 6: Train Model (Random Forest)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

## Step 7: Evaluate Model

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

## Step 8: Save the Trained Model

In [4]:
joblib.dump(model, "credit_default_model.pkl")
print("Model saved as 'credit_default_model.pkl'")

NameError: name 'joblib' is not defined

In [None]:
# Streamlit User Interface for Credit Card Default Prediction
import streamlit as st
import numpy as np
import pandas as pd
import joblib

st.title('Credit Card Default Prediction')
st.write('Enter customer information to predict the probability of defaulting next month.')

# Load the trained model
model = joblib.load('credit_default_model.pkl')

# Define input fields (customize as per your features)
LIMIT_BAL = st.number_input('Credit Limit (NT dollar)', min_value=0, value=20000)
SEX = st.selectbox('Gender', [1, 2], format_func=lambda x: 'Male' if x == 1 else 'Female')
EDUCATION = st.selectbox('Education', [1, 2, 3, 4], format_func=lambda x: ['Graduate School', 'University', 'High School', 'Other'][x-1])
MARRIAGE = st.selectbox('Marital Status', [1, 2, 3], format_func=lambda x: ['Married', 'Single', 'Other'][x-1])
AGE = st.number_input('Age', min_value=18, max_value=100, value=30)
# Add more fields for PAY_0, PAY_2, ..., BILL_AMT1, ..., PAY_AMT1, ... as per your dataset

# Example for one payment and bill field (add all required fields for your model)
PAY_0 = st.number_input('Repayment Status (Sept)', min_value=-2, max_value=8, value=0)
BILL_AMT1 = st.number_input('Bill Statement (Sept)', value=0)
PAY_AMT1 = st.number_input('Amount Paid (Sept)', value=0)

# Collect all features in the correct order as used in training
input_data = np.array([[
    LIMIT_BAL, SEX, EDUCATION, MARRIAGE, AGE,
    PAY_0, # Add all PAY_2, PAY_3, ...
    BILL_AMT1, # Add all BILL_AMT2, ...
    PAY_AMT1 # Add all PAY_AMT2, ...
]])

# Predict button
if st.button('Predict Default Probability'):
    prediction = model.predict(input_data)
    probability = model.predict_proba(input_data)[0][1]
    st.write(f'Prediction: {"Will Default" if prediction[0] == 1 else "Will Not Default"}')
    st.write(f'Probability of Default: {probability:.2%}')