In [1]:
# Step 1 : import Libraries:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score

In [5]:
# Step 2 : import data
from sklearn.datasets import load_iris
iris = load_iris()

In [6]:
# Step 3 : define target (y) and features (X)
X = iris.data
y = iris.target

In [7]:
# Step 4 : train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
#Feature Scaling (Optional):
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# check shape of train and test sample
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [10]:
# Step 5 : train or fit model
k = 3  # Number of neighbors
classifier = KNeighborsClassifier(n_neighbors=k)
classifier.fit(X_train, y_train)

In [11]:
#Make Predictions:
y_pred = classifier.predict(X_test)

In [12]:
#Evaluate the Model:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 1.00
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [14]:
#Cross-Validation (Optional):
cv_scores = cross_val_score(classifier, X, y, cv=5)
print("Cross-Validation Scores:", cv_scores)
print(f"Mean Accuracy: {np.mean(cv_scores):.2f}")

Cross-Validation Scores: [0.96666667 0.96666667 0.93333333 0.96666667 1.        ]
Mean Accuracy: 0.97


In [17]:
#Hyperparameter Tuning:
from sklearn.model_selection import GridSearchCV

# Define a range of values for 'k' (number of neighbors)
param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}

grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid_search.fit(X, y)

best_k = grid_search.best_params_['n_neighbors']
print(f"Best 'k' value: {best_k}")

best_classifier = KNeighborsClassifier(n_neighbors=best_k)
best_classifier.fit(X_train, y_train)

Best 'k' value: 7


In [20]:
import joblib

# Save the model to a file
joblib.dump(best_classifier, 'iris_classifier_model.pkl')

# Load the model later
loaded_model = joblib.load('iris_classifier_model.pkl')


In [21]:
# Example data for prediction
new_data = np.array([[5.1, 3.5, 1.4, 0.2]])  # Modify this data as needed

# Scale the new data if necessary
scaled_new_data = scaler.transform(new_data)

# Make predictions using the loaded model
predicted_class = loaded_model.predict(scaled_new_data)
predicted_species = iris.target_names[predicted_class][0]

print(f"The predicted species is: {predicted_species}")

The predicted species is: setosa
