# Binary Classification â€” Bank Marketing (UCI)


# Pinned installs for reproducibility in Colab
!pip -q install "pycaret>=3.0.4,<4" "pandas-datareader>=0.10.0" xgboost lightgbm catboost --upgrade

import os, sys, platform, subprocess, pandas as pd, numpy as np
import matplotlib.pyplot as plt

# Show GPU status (Colab: Runtime -> Change runtime type -> GPU)
try:
    import subprocess
    print(subprocess.check_output(["nvidia-smi"]).decode("utf-8"))
except Exception as e:
    print("No NVIDIA GPU detected (this is OK; PyCaret will fall back to CPU).")

print("PyCaret version check:")
import pycaret
import sklearn
print("pycaret", pycaret.__version__, "| sklearn", sklearn.__version__)



# Download & load bank marketing dataset (UCI) - not a PyCaret demo dataset
# Source: https://archive.ics.uci.edu/ml/datasets/Bank+Marketing
import pandas as pd, zipfile, io, requests

zip_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip"
content = requests.get(zip_url).content
zf = zipfile.ZipFile(io.BytesIO(content))
with zf.open("bank-additional/bank-additional-full.csv") as f:
    df = pd.read_csv(f, sep=';')

print(df.shape, df.columns.tolist()[:10])

# It's common to drop 'duration' as it's known to leak target information
df = df.drop(columns=["duration"], errors="ignore")
df = df.dropna()

# Target is 'y' (yes/no)
from pycaret.classification import setup, compare_models, tune_model, finalize_model, evaluate_model, pull, save_model, predict_model, plot_model

exp = setup(
    data=df,
    target="y",
    session_id=42,
    use_gpu=True,      # GPU where supported, else CPU fallback
    silent=True,
    categorical_features=[c for c in df.columns if df[c].dtype=='object']
)

top = compare_models(n_select=1, sort="AUC")
best = tune_model(top, optimize="AUC")
plot_model(best, plot="confusion_matrix")
final = finalize_model(best)

save_path = save_model(final, "bank_marketing_classifier")
print("Saved pipeline at:", save_path)

# Example inference on a small sample
sample = df.sample(5, random_state=7)
preds = predict_model(final, data=sample)
preds.head()
