# Multiclass Classification â€” Palmer Penguins


# Pinned installs for reproducibility in Colab
!pip -q install "pycaret>=3.0.4,<4" "pandas-datareader>=0.10.0" xgboost lightgbm catboost --upgrade

import os, sys, platform, subprocess, pandas as pd, numpy as np
import matplotlib.pyplot as plt

# Show GPU status (Colab: Runtime -> Change runtime type -> GPU)
try:
    import subprocess
    print(subprocess.check_output(["nvidia-smi"]).decode("utf-8"))
except Exception as e:
    print("No NVIDIA GPU detected (this is OK; PyCaret will fall back to CPU).")

print("PyCaret version check:")
import pycaret
import sklearn
print("pycaret", pycaret.__version__, "| sklearn", sklearn.__version__)


In [None]:

import pandas as pd

# Load penguins from seaborn repo (not a PyCaret demo dataset)
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv"
df = pd.read_csv(url)

# Drop rows with missing target and basic impute/cleanup
df = df.dropna(subset=["species"]).copy()
df = df.fillna(method="ffill")

from pycaret.classification import setup, compare_models, tune_model, finalize_model, plot_model, save_model, predict_model

exp = setup(
    data=df,
    target="species",
    session_id=42,
    use_gpu=True,
    silent=True
)

top = compare_models(sort="Accuracy")
best = tune_model(top, optimize="Accuracy")
plot_model(best, plot="confusion_matrix")
final = finalize_model(best)
save_path = save_model(final, "penguins_multiclass")
print("Saved pipeline at:", save_path)

sample = df.sample(5, random_state=7)
predict_model(final, data=sample).head()
