<a href="https://colab.research.google.com/github/yashaswinidinesh/pycaret-assignment-yashaswinidinesh/blob/main/notebooks/a_multiclass_classification_penguins.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Multiclass Classification — Penguins (PyCaret 3)

This notebook uses **PyCaret 3** on the **Palmer Penguins** dataset for a multiclass classification task. It contains two main cells: (1) installs, (2) full pipeline (CPU fallback for stability in Colab).

In [None]:
# Pinned installs for reproducibility in Colab / Jupyter
# After this cell **restart the runtime** so new binaries load.
%pip -q install "pycaret>=3.0.4,<4" "pandas-datareader>=0.10.0" xgboost lightgbm catboost --upgrade


In [None]:
# === Multiclass Classification — Penguins (PyCaret 3) ===
# Runs on CPU for stability; flip use_gpu=True later if your session is stable.

import seaborn as sns, pandas as pd
from pycaret.classification import (
    setup, compare_models, tune_model, finalize_model,
    plot_model, save_model, predict_model
)

# 1) Load & clean
df = sns.load_dataset("penguins")            # species, island, sex + 4 numeric measurements
df = df.drop(columns=["year"], errors="ignore").dropna().reset_index(drop=True)
print("Shape:", df.shape)
print(df["species"].value_counts())

# 2) PyCaret setup
exp = setup(
    data=df,
    target="species",
    session_id=42,
    use_gpu=False,     # set True if your GPU run is stable
    fold=3,
    n_jobs=1
)

# 3) AutoML compare (multiclass) and tune
top = compare_models(
    include=["lightgbm", "xgboost", "catboost", "lr", "ridge", "svm"],
    sort="F1"          # reliable metric for multiclass
)

best = tune_model(top, optimize="F1", choose_better=True)

# 4) Evaluate, finalize, save, quick inference
plot_model(best, plot="confusion_matrix")
final = finalize_model(best)
path = save_model(final, "penguins_multiclass_classifier_cpu")
print("Saved:", path)

sample = df.sample(5, random_state=7)
display(predict_model(final, data=sample))


In [None]:
import pycaret, pandas, numpy, sklearn
print("pycaret:", pycaret.__version__)
print("pandas :", pandas.__version__)
print("numpy  :", numpy.__version__)
print("sklearn:", sklearn.__version__)
