In [1]:
# Import necessary libraries
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [3]:
# Load the "Default of Credit Card Clients" dataset from UCI ML Repository
#url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"
df =pd.read_csv('default of credit card clients.csv', sep=';') 

# Drop the ID column as it is not needed for modeling
df = df.drop(columns=["ID"])

# Convert target variable to binary (1 for default, 0 for no default)
df["default payment next month"] = df["default payment next month"].astype(int)

# Split the data into features and target
X = df.drop(columns=["default payment next month"])
y = df["default payment next month"]

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define machine learning models
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Gradient Boosting": GradientBoostingClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(random_state=42),
    "Support Vector Machine": SVC(random_state=42),
}

In [8]:

# Streamlit app
st.title("Credit Card Default Prediction Comparison")

# Display dataset information
st.write("Dataset Information:")
st.write(f"Number of samples: {X.shape[0]}")
st.write(f"Number of features: {X.shape[1]}")

# Train and evaluate models
metrics = {"Algorithm": [], "Accuracy": [], "Precision": [], "Recall": [], "F1 Score": []}

for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Store metrics in the dictionary
    metrics["Algorithm"].append(model_name)
    metrics["Accuracy"].append(accuracy)
    metrics["Precision"].append(precision)
    metrics["Recall"].append(recall)
    metrics["F1 Score"].append(f1)

# Create a DataFrame from the metrics dictionary
metrics_df = pd.DataFrame(metrics)

# Display metrics in a table
st.write("Accuracy Metrics for Different Algorithms:")
st.table(metrics_df)

2023-12-27 22:58:21.428 
  command:

    streamlit run C:\Users\gpman\anaconda3\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


DeltaGenerator()

In [9]:
# Create .py file
!jupyter nbconvert --to script streamlit_app.ipynb

[NbConvertApp] Converting notebook streamlit_app.ipynb to script
[NbConvertApp] Writing 2817 bytes to streamlit_app.py
