In [4]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV 
from sklearn.model_selection import StratifiedKFold
import warnings
import os 

warnings.simplefilter('ignore')

In [10]:
# Change the current working directory to "D:\Datasets"
os.chdir("D:\Datasets")

# Load the Kyphosis dataset
kyp = pd.read_csv("Kyphosis.csv")

# Extract the target variable "y" (Kyphosis) from the DataFrame
y = kyp['Kyphosis']

# Extract the feature variables "X" (all columns except 'Kyphosis') from the DataFrame
X = kyp.drop('Kyphosis', axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                   test_size=0.3,stratify=y,
                                   random_state=24)

# Create a K-Nearest Neighbors (KNN) classifier
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training data
knn.fit(X_train, y_train)

# Make predictions on the test data
y_pred = knn.predict(X_test)

# Print the confusion matrix
print(confusion_matrix(y_test, y_pred))

# Print the accuracy score
print(accuracy_score(y_test, y_pred))

# Print the classification report
print(classification_report(y_test, y_pred))


[[20  0]
 [ 5  0]]
0.8
              precision    recall  f1-score   support

      absent       0.80      1.00      0.89        20
     present       0.00      0.00      0.00         5

    accuracy                           0.80        25
   macro avg       0.40      0.50      0.44        25
weighted avg       0.64      0.80      0.71        25



 # GridSearchCV

In [9]:
# Perform grid search for hyperparameter tuning
params = {'n_neighbors':[1,2,3,4,5,6,7,8,9,10]}

# Create a KNN classifier
knn = KNeighborsClassifier()

# Perform grid search with 5-fold cross-validation
kfold = StratifiedKFold(n_splits=5, 
                        shuffle=True, random_state=24)
gcv = GridSearchCV(knn, param_grid=params, cv=kfold,
                   scoring='accuracy')
gcv.fit(X, y)

# Print the best parameters and the best score
print(gcv.best_params_)
print(gcv.best_score_)

{'n_neighbors': 7}
0.8029411764705883
