<a href="https://colab.research.google.com/github/wolfzxcv/ml-examples/blob/master/iris_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [48]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [49]:
# Load the iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split dataset into training and test sets
RANDOM_STATE = 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

# Define models
models = [
    ('Logistic Regression', LogisticRegression(solver='liblinear', multi_class='ovr')),
    ('Naive Bayes', GaussianNB()),
    ('Support Vector Classifier', SVC(gamma='auto')),
    ('K-Nearest Neighbors', KNeighborsClassifier(n_neighbors=3)),
    ('Decision Tree', DecisionTreeClassifier()),
    ('Random Forest', RandomForestClassifier())
]

# Perform cross-validation and store results
for name, model in models:
    cv_results = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    print(f'{name}: {cv_results.mean():.6f} ({cv_results.std():.6f})')


Logistic Regression: 0.933333 (0.042492)
Naive Bayes: 0.941667 (0.033333)
Support Vector Classifier: 0.975000 (0.020412)
K-Nearest Neighbors: 0.958333 (0.026352)
Decision Tree: 0.941667 (0.056519)
Random Forest: 0.941667 (0.056519)


In [50]:
# Fit the model on the training data and make predictions on the test data
final_model = SVC(gamma='auto')
final_model.fit(X_train, y_train)
predictions = final_model.predict(X_test)

# Evaluate on test set
print('Accuracy:')
print(accuracy_score(y_test, predictions))

print('\nConfusion Matrix:')
conf_matrix = confusion_matrix(y_test, predictions)
conf_matrix_df = pd.DataFrame(conf_matrix, index=iris.target_names, columns=iris.target_names)
conf_matrix_df.index.name = "True Label"
conf_matrix_df.columns.name = "Predicted Label"
print(conf_matrix_df)

print('\nClassification Report:')
print(classification_report(y_test, predictions, target_names=iris.target_names))

Accuracy:
0.9666666666666667

Confusion Matrix:
Predicted Label  setosa  versicolor  virginica
True Label                                    
setosa               11           0          0
versicolor            0          12          1
virginica             0           0          6

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        11
  versicolor       1.00      0.92      0.96        13
   virginica       0.86      1.00      0.92         6

    accuracy                           0.97        30
   macro avg       0.95      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30

