# Part 1: KNN Classification

1. **Import Libraries and Load Data**

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
# Load the Iris dataset
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

In [3]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

2. **Hyperparameter Tuning with Grid Search**

In [4]:
# Define the model
knn = KNeighborsClassifier()

In [5]:
# Define the parameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

In [6]:
# Set up the GridSearchCV
grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy')

In [7]:
# Fit the model
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'metric': ['euclidean', 'manhattan'],
                         'n_neighbors': [3, 5, 7, 9],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

In [8]:
# Print the best parameters and best score
print("Best parameters found by Grid Search:", grid_search.best_params_)
print("Best Grid Search score:", grid_search.best_score_)

Best parameters found by Grid Search: {'metric': 'euclidean', 'n_neighbors': 3, 'weights': 'uniform'}
Best Grid Search score: 0.9583333333333334


3. **Evaluate the Model**

In [15]:
# Predict on the test data
y_pred = grid_search.predict(X_test)

In [16]:
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [17]:
# Precision
precision = precision_score(y_test, y_pred, average='weighted')
print("Precision:", precision)

Precision: 1.0


In [18]:
# Recall
recall = recall_score(y_test, y_pred, average='weighted')
print("Recall:", recall)

Recall: 1.0


In [19]:
# F1 Score
f1 = f1_score(y_test, y_pred, average='weighted')
print("F1 Score:", f1)

F1 Score: 1.0


In [20]:
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", conf_matrix)

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


### Part 2: KNN Regression

1. **Import Libraries and Load Data**

In [21]:
import warnings
warnings.filterwarnings('ignore')

In [22]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error, r2_score

In [23]:
# Load the Boston housing dataset
data = load_boston()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

In [24]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

2. **Hyperparameter Tuning with Grid Search**

In [25]:
# Define the model
knn_reg = KNeighborsRegressor()

In [26]:
# Define the parameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

In [27]:
# Set up the GridSearchCV
grid_search_reg = GridSearchCV(estimator=knn_reg, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')

In [28]:
# Fit the model
grid_search_reg.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=KNeighborsRegressor(),
             param_grid={'metric': ['euclidean', 'manhattan'],
                         'n_neighbors': [3, 5, 7, 9],
                         'weights': ['uniform', 'distance']},
             scoring='neg_mean_squared_error')

In [29]:
# Print the best parameters and best score
print("Best parameters found by Grid Search:", grid_search_reg.best_params_)
print("Best Grid Search score (negative MSE):", grid_search_reg.best_score_)

Best parameters found by Grid Search: {'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'distance'}
Best Grid Search score (negative MSE): -33.2971448976826


3. **Evaluate the Model**

In [30]:
# Predict on the test data
y_pred = grid_search_reg.predict(X_test)

In [31]:
# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 20.998924877624457


In [32]:
# Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)
print("Root Mean Squared Error:", rmse)

Root Mean Squared Error: 4.582458387986132


In [33]:
# R-squared (R²)
r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)

R-squared: 0.7136527808511335
