In [2]:
# ==========================================================
# IRIS FLOWER CLASSIFICATION USING MULTIPLE ALGORITHMS
# ==========================================================

# 1. Import libraries
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 2. Load the Iris dataset
iris = load_iris()

X = iris.data          # Features: sepal length, sepal width, petal length, petal width
y = iris.target        # Labels: 0 = setosa, 1 = versicolor, 2 = virginica
target_names = iris.target_names

print("Feature names:", iris.feature_names)
print("Target names:", target_names)

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 4. Feature scaling (important for distance-based and SVM models)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 5. Define multiple classification algorithms
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Support Vector Machine (SVM)": SVC(kernel='rbf', probability=True),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Naive Bayes": GaussianNB()
}

# 6. Train and evaluate each model
results = {}

for name, model in models.items():
    # For tree-based models, scaling is not mandatory, but we can still use scaled data
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    acc = accuracy_score(y_test, y_pred)
    results[name] = acc

    print("\n======================================")
    print("MODEL:", name)
    print("Accuracy: {:.2f}%".format(acc * 100))
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=target_names))
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

# 7. Show a comparison of all models
print("\n\n======== MODEL COMPARISON (Accuracy) ========")
for name, acc in results.items():
    print(f"{name:30s} : {acc*100:.2f}%")

# 8. Example: Using the best model for a single prediction
# Here, we simply choose Random Forest as an example

best_model = models["Random Forest"]

sample = np.array([[5.1, 3.5, 1.4, 0.2]])   # Example input (looks like setosa)
sample_scaled = scaler.transform(sample)
sample_pred = best_model.predict(sample_scaled)

print("\nSample Input:", sample[0])
print("Predicted Class:", target_names[sample_pred[0]])


Feature names: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Target names: ['setosa' 'versicolor' 'virginica']

MODEL: Logistic Regression
Accuracy: 93.33%

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.90      0.90      0.90        10
   virginica       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30

Confusion Matrix:
[[10  0  0]
 [ 0  9  1]
 [ 0  1  9]]

MODEL: K-Nearest Neighbors
Accuracy: 93.33%

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.83      1.00      0.91        10
   virginica       1.00      0.80      0.89        10

    accuracy                           0.93        30
   mac