# Binary Classification â€” Bank Marketing (UCI)


# Pinned installs for reproducibility in Colab






In [None]:
%pip -q install "pycaret>=3.0.4,<4" "pandas-datareader>=0.10.0" xgboost lightgbm catboost --upgrade


In [None]:
# === PyCaret Binary Classification (CPU fallback; no GPU) ===
# Run this if the GPU run crashes. Safe + lighter settings.

import sys, subprocess, zipfile, io, requests, pandas as pd
print("Python:", sys.version)

# 1) Load UCI Bank Marketing data
zip_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip"
zf = zipfile.ZipFile(io.BytesIO(requests.get(zip_url).content))
with zf.open("bank-additional/bank-additional-full.csv") as f:
    df = pd.read_csv(f, sep=";")

# Basic cleanup
df = df.drop(columns=["duration"], errors="ignore").dropna().reset_index(drop=True)
print("Data shape:", df.shape)
print(df["y"].value_counts())

# 2) PyCaret workflow on CPU (no GPU)
from pycaret.classification import (
    setup, compare_models, tune_model, finalize_model,
    plot_model, save_model, predict_model
)

exp = setup(
    data=df,
    target="y",
    session_id=42,
    use_gpu=False,     # <<< CPU ONLY
    fold=3,            # lighter than 10 folds
    n_jobs=1           # conservative threads to avoid crashes
)

# Evaluate a stable subset of models
top = compare_models(
    include=["xgboost", "catboost", "lightgbm", "lr"],
    sort="AUC"
)

# Tune, plot, finalize, save
best = tune_model(top, optimize="AUC", choose_better=True)
plot_model(best, plot="confusion_matrix")

final = finalize_model(best)
path = save_model(final, "bank_marketing_classifier_cpu")
print("Saved:", path)

# Quick inference demo
sample = df.sample(5, random_state=7)
display(predict_model(final, data=sample))


In [None]:
import pycaret, pandas, numpy, sklearn
print("pycaret:", pycaret.__version__)
print("pandas :", pandas.__version__)
print("numpy  :", numpy.__version__)
print("sklearn:", sklearn.__version__)
