Source:: https://www.tutorialspoint.com/machine_learning_with_python/machine_learning_with_python_knn_algorithm_finding_nearest_neighbors.htm

# Importing packages

In [41]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Load data

In [42]:
path = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
headernames = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
dataset = pd.read_csv(path, names = headernames)
dataset.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


# Split Train/Test data


In [43]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, 4].values

XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.3, random_state= 123)

# Data scaling

In [44]:
#Feature scaling is essential for machine learning algorithms that calculate distances between data. 
##If not scale, the feature with a higher value range starts dominating when calculating distances

scaler = StandardScaler()
scaler.fit(XTrain)
XTrain = scaler.transform(XTrain)
XTest = scaler.transform(XTest)

# Apply GridSearchCV

In [45]:
param_grid = {'n_neighbors' : [2,3,4,5,6,7,8,9,10,11,12,13,14], 'weights': ['uniform', 'distance']}

model = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
model.fit(XTrain, yTrain)
model.best_params_

#model.fit() performs the training, 
##model.transform() changes the data in the pipeline in order to pass it on to the next stage in the pipeline

{'n_neighbors': 13, 'weights': 'uniform'}

# Apply Classifier & then do the Prediction

In [47]:
model = KNeighborsClassifier(n_neighbors=13)
model.fit(XTrain, yTrain)
yPred = model.predict(XTest)

# Classification report

In [48]:
result = confusion_matrix(yTest, yPred)
display("Confusion Matrix:")
display(result)

result1 = classification_report(yTest, yPred)
display("Classification Report:",)

print (result1)
result2 = accuracy_score(yTest, yPred)*100
display("Accuracy:",result2)

'Confusion Matrix:'

array([[18,  0,  0],
       [ 0,  9,  1],
       [ 0,  3, 14]], dtype=int64)

'Classification Report:'

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        18
Iris-versicolor       0.75      0.90      0.82        10
 Iris-virginica       0.93      0.82      0.87        17

       accuracy                           0.91        45
      macro avg       0.89      0.91      0.90        45
   weighted avg       0.92      0.91      0.91        45



'Accuracy:'

91.11111111111111