# 22 APRIL ASSIGNMENT

Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in
sklearn.datasets.

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier object
knn = KNeighborsClassifier(n_neighbors=3)

# Train the classifier
knn.fit(X_train, y_train)

# Predict the classes for test set
y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using
cross-validation on load_iris dataset in sklearn.datasets.

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier object
knn = KNeighborsClassifier()

# Define the parameter grid
param_grid = {'n_neighbors': range(1, 21)}  # Range of K values to try

# Perform grid search using cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best K value
best_k = grid_search.best_params_['n_neighbors']
print("Best K value:", best_k)


Best K value: 3


Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on
load_iris dataset in sklearn.datasets.

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a KNN classifier object with weighted voting
knn = KNeighborsClassifier(n_neighbors=3, weights='distance')

# Train the classifier
knn.fit(X_train, y_train)

# Predict the classes for test set
y_pred = knn.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


Q6. Implement a function to standardise the features before applying KNN classifier.

In [5]:
from sklearn.preprocessing import StandardScaler

def standardize_features(X_train, X_test):
    # Create a StandardScaler object
    scaler = StandardScaler()
    
    # Fit the scaler to the training data
    scaler.fit(X_train)
    
    # Transform the training and testing data using the scaler
    X_train_std = scaler.transform(X_train)
    X_test_std = scaler.transform(X_test)
    
    return X_train_std, X_test_std


Q7. Write a Python function to calculate the euclidean distance between two points.

In [6]:
import math

def euclidean_distance(point1, point2):
    distance = 0.0
    
    # Calculate the Euclidean distance between two points
    for i in range(len(point1)):
        distance += (point1[i] - point2[i]) ** 2
    
    distance = math.sqrt(distance)
    return distance


In [7]:
point1 = [2, 3, 5]
point2 = [1, 4, 6]

distance = euclidean_distance(point1, point2)
print("Euclidean distance:", distance)


Euclidean distance: 1.7320508075688772


Q8. Write a Python function to calculate the manhattan distance between two points.

In [8]:
def manhattan_distance(point1, point2):
    distance = 0.0
    
    # Calculate the Manhattan distance between two points
    for i in range(len(point1)):
        distance += abs(point1[i] - point2[i])
    
    return distance


In [9]:
point1 = [2, 3, 5]
point2 = [1, 4, 6]

distance = manhattan_distance(point1, point2)
print("Manhattan distance:", distance)


Manhattan distance: 3.0
