Q1. Write a Python code to implement the KNN classifier algorithm on load_iris dataset in sklearn.datasets.

ANS-

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)  # Set the number of neighbors (you can adjust this value)

# Train the KNN classifier
knn.fit(X_train, y_train)

# Make predictions on the test set
predictions = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy of KNN Classifier: {accuracy:.4f}")


Accuracy of KNN Classifier: 1.0000


Q2. Write a Python code to implement the KNN regressor algorithm on load_boston dataset in sklearn.datasets.

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from urllib.request import urlopen

# URL link to the Boston dataset in UCI ML Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"

# Define column names as per the dataset description
column_names = [
    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'
]

# Read the dataset directly from the URL
boston_data = pd.read_csv(urlopen(url), delim_whitespace=True, names=column_names, header=None)

# Separate features and target variable
X = boston_data.drop(columns=['MEDV'])  # Features
y = boston_data['MEDV']  # Target variable (housing prices)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors=5)  # Set the number of neighbors (you can adjust this value)

# Train the KNN regressor
knn_regressor.fit(X_train, y_train)

# Make predictions on the test set
predictions = knn_regressor.predict(X_test)

# Calculate the mean squared error of the model
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error of KNN Regressor: {mse:.4f}")


Mean Squared Error of KNN Regressor: 25.8601


Q3. Write a Python code snippet to find the optimal value of K for the KNN classifier algorithm using
cross-validation on load_iris dataset in sklearn.datasets.

In [5]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define a range of k values to search
param_grid = {'n_neighbors': range(1, 21)}  # Range of k values from 1 to 20

# Initialize the KNN classifier
knn = KNeighborsClassifier()

# Use GridSearchCV to find the best k value through cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best k value and its corresponding accuracy score
best_k = grid_search.best_params_['n_neighbors']
best_score = grid_search.best_score_

print(f"Best k value: {best_k}")
print(f"Corresponding Cross-Validation Accuracy Score: {best_score:.4f}")

# Evaluate the best model on the test set
best_model = grid_search.best_estimator_
test_accuracy = best_model.score(X_test, y_test)
print(f"Accuracy on Test Set with Best k value: {test_accuracy:.4f}")


Best k value: 3
Corresponding Cross-Validation Accuracy Score: 0.9583
Accuracy on Test Set with Best k value: 1.0000


Q4. Implement the KNN regressor algorithm with feature scaling on load_boston dataset in sklearn.datasets.

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from urllib.request import urlopen

# URL link to the Boston dataset in UCI ML Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"

# Define column names as per the dataset description
column_names = [
    'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV'
]

# Read the dataset directly from the URL into a Pandas DataFrame
boston_data = pd.read_csv(urlopen(url), delim_whitespace=True, names=column_names, header=None)

# Separate features and target variable
X = boston_data.drop(columns=['MEDV'])  # Features
y = boston_data['MEDV']  # Target variable (housing prices)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize StandardScaler for feature scaling
scaler = StandardScaler()

# Fit and transform the scaler on the training data
X_train_scaled = scaler.fit_transform(X_train)

# Transform the test data using the same scaler
X_test_scaled = scaler.transform(X_test)

# Initialize the KNN regressor
knn_regressor = KNeighborsRegressor(n_neighbors=5)  # Set the number of neighbors (you can adjust this value)

# Train the KNN regressor with scaled features
knn_regressor.fit(X_train_scaled, y_train)

# Make predictions on the scaled test set
predictions = knn_regressor.predict(X_test_scaled)

# Calculate the mean squared error of the model
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error of KNN Regressor with Feature Scaling: {mse:.4f}")


Mean Squared Error of KNN Regressor with Feature Scaling: 20.6055


Q5. Write a Python code snippet to implement the KNN classifier algorithm with weighted voting on load_iris dataset in sklearn.datasets.

In [9]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the KNN classifier with weighted voting
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')  # Set weights to 'distance' for weighted voting

# Train the KNN classifier
knn.fit(X_train, y_train)

# Make predictions on the test set
predictions = knn.predict(X_test)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy of KNN Classifier with Weighted Voting: {accuracy:.4f}")


Accuracy of KNN Classifier with Weighted Voting: 1.0000


Q6. Implement a function to standardise the features before applying KNN classifier.

In [10]:
from sklearn.preprocessing import StandardScaler

def knn_with_standardization(X_train, X_test, y_train, y_test, n_neighbors=5):
    # Initialize StandardScaler for feature standardization
    scaler = StandardScaler()

    # Fit and transform the scaler on the training data
    X_train_scaled = scaler.fit_transform(X_train)

    # Transform the test data using the same scaler
    X_test_scaled = scaler.transform(X_test)

    # Initialize the KNN classifier
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)

    # Train the KNN classifier with standardized features
    knn.fit(X_train_scaled, y_train)

    # Make predictions on the scaled test set
    predictions = knn.predict(X_test_scaled)

    # Calculate and return the accuracy of the model
    accuracy = accuracy_score(y_test, predictions)
    return accuracy

# Usage example:
# Assuming X_train, X_test, y_train, y_test are already defined as training and testing sets
# Replace these with your actual training and testing data

# Example usage of the function with load_iris dataset
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Call the function to apply KNN with feature standardization
result_accuracy = knn_with_standardization(X_train, X_test, y_train, y_test, n_neighbors=5)
print(f"Accuracy of KNN Classifier with Feature Standardization: {result_accuracy:.4f}")


Accuracy of KNN Classifier with Feature Standardization: 1.0000


Q7. Write a Python function to calculate the euclidean distance between two points.

In [11]:
import numpy as np  # Using numpy for efficient calculations

def euclidean_distance(point1, point2):
    # Ensure both points have the same dimensionality
    if len(point1) != len(point2):
        raise ValueError("Points must have the same dimensionality")

    # Calculate the squared differences between coordinates of each dimension
    squared_diff = [(p1 - p2) ** 2 for p1, p2 in zip(point1, point2)]

    # Sum the squared differences and take the square root to get the Euclidean distance
    distance = np.sqrt(sum(squared_diff))

    return distance

# Example usage:
# Define two points in a 3-dimensional space
point_a = [1, 2, 3]
point_b = [4, 5, 6]

# Calculate the Euclidean distance between the two points
distance = euclidean_distance(point_a, point_b)
print(f"Euclidean Distance between point_a and point_b: {distance:.4f}")


Euclidean Distance between point_a and point_b: 5.1962


Q8. Write a Python function to calculate the manhattan distance between two points.

In [12]:
def manhattan_distance(point1, point2):
    # Ensure both points have the same dimensionality
    if len(point1) != len(point2):
        raise ValueError("Points must have the same dimensionality")

    # Calculate the absolute differences between coordinates of each dimension
    abs_diff = [abs(p1 - p2) for p1, p2 in zip(point1, point2)]

    # Sum the absolute differences to get the Manhattan distance
    distance = sum(abs_diff)

    return distance

# Example usage:
# Define two points in a 3-dimensional space
point_a = [1, 2, 3]
point_b = [4, 5, 6]

# Calculate the Manhattan distance between the two points
distance = manhattan_distance(point_a, point_b)
print(f"Manhattan Distance between point_a and point_b: {distance}")


Manhattan Distance between point_a and point_b: 9
