In [None]:
'''
Classify the iris dataset using the KNN classifier.
Divide the dataset into training, validation, and testing in the ratio 70:15:15. 
Use the functions from the sklearn package. 
Find the best value for k. Normalize the dataset before applying the model. 
Display the training, validation, and testing accuracy, confusion matrix.
'''

In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score

In [2]:
iris = load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [3]:
X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

best_k = 0
best_val_acc = 0


k_values = list(range(1, 11))

In [5]:
cv_scores = []
for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    scores = cross_val_score(knn, X_scaled, y, cv=5, scoring='accuracy')
    cv_scores.append(scores.mean())

In [6]:
best_k = k_values[np.argmax(cv_scores)]

best_knn = KNeighborsClassifier(n_neighbors=best_k)
best_knn.fit(X_train, y_train)

In [7]:
train_pred = best_knn.predict(X_train)
val_pred = best_knn.predict(X_val)
test_pred = best_knn.predict(X_test)

In [8]:
train_acc = accuracy_score(y_train, train_pred)
val_acc = accuracy_score(y_val, val_pred)
test_acc = accuracy_score(y_test, test_pred)

In [9]:
train_cm = confusion_matrix(y_train, train_pred)
val_cm = confusion_matrix(y_val, val_pred)
test_cm = confusion_matrix(y_test, test_pred)

In [10]:
print("Best K value:", best_k)
print("Training Accuracy:", train_acc)
print("Validation Accuracy:", val_acc)
print("Testing Accuracy:", test_acc)

print("\nTraining Confusion Matrix:")
print(train_cm)
print("\nValidation Confusion Matrix:")
print(val_cm)
print("\nTesting Confusion Matrix:")
print(test_cm)

Best K value: 6
Training Accuracy: 0.9523809523809523
Validation Accuracy: 1.0
Testing Accuracy: 1.0

Training Confusion Matrix:
[[31  0  0]
 [ 0 36  1]
 [ 0  4 33]]

Validation Confusion Matrix:
[[13  0  0]
 [ 0  3  0]
 [ 0  0  6]]

Testing Confusion Matrix:
[[ 6  0  0]
 [ 0 10  0]
 [ 0  0  7]]
