In [10]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix , accuracy_score , precision_score , recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("C:/Users/gokup/Downloads/heart (1).csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [15]:
x = df.drop("target",axis=1)
y = df['target']

x_train , x_test , y_train , y_test = train_test_split(
    x,
    y,
    random_state=42,
    test_size=0.2
)

In [16]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled =  scaler.transform(x_test)

In [17]:
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(x_train_scaled,y_train)

y_pred = knn_classifier.predict(x_test_scaled)

In [19]:
print("acc_score:",accuracy_score(y_test,y_pred))
print("precision_score:",precision_score(y_test,y_pred))
print("recall_score:",recall_score(y_test,y_pred))

acc_score: 0.8524590163934426
precision_score: 0.9259259259259259
recall_score: 0.78125


In [20]:
# KNN == 7 best score value
knn_classifier = KNeighborsClassifier(n_neighbors=7)
knn_classifier.fit(x_train_scaled,y_train)

y_pred = knn_classifier.predict(x_test_scaled)

print("acc_score:",accuracy_score(y_test,y_pred))
print("precision_score:",precision_score(y_test,y_pred))
print("recall_score:",recall_score(y_test,y_pred))

acc_score: 0.9180327868852459
precision_score: 0.9354838709677419
recall_score: 0.90625


In [28]:
from sklearn.model_selection import GridSearchCV
knn_classifier = KNeighborsClassifier()
param_grid = {"n_neighbors":[2,3,4,5,6,7,8,9]}

cv = GridSearchCV(
    knn_classifier ,
    param_grid,
    cv=5,
    scoring="recall"
)

cv.fit(x_train_scaled,y_train)
ypred = cv.predict(x_test_scaled)

print("acc_score:",accuracy_score(y_test,ypred))
print("precision_score:",precision_score(y_test,ypred))
print("recall_score:",recall_score(y_test,ypred))

res = pd.DataFrame(cv.cv_results_)
print(res[["params","mean_test_score"]])
print(cv.best_params_)

acc_score: 0.9180327868852459
precision_score: 0.9354838709677419
recall_score: 0.90625
               params  mean_test_score
0  {'n_neighbors': 2}         0.616809
1  {'n_neighbors': 3}         0.864387
2  {'n_neighbors': 4}         0.767806
3  {'n_neighbors': 5}         0.857550
4  {'n_neighbors': 6}         0.789744
5  {'n_neighbors': 7}         0.871795
6  {'n_neighbors': 8}         0.811396
7  {'n_neighbors': 9}         0.856980
{'n_neighbors': 7}


In [42]:
# using pipeline -- 

from sklearn.pipeline import Pipeline

# first train test split
x_train , x_test , y_train , y_test = train_test_split(
    x,
    y,
    random_state=42,
    test_size=0.2
)

# making pipeline 
pipeline = Pipeline([
    ("scaler",StandardScaler()),
    ("knn",KNeighborsClassifier())
])

# using pipeline in Cross validation 
param_grid = {'knn__n_neighbors':[3,5,6,7]}
cross_v = GridSearchCV(
    pipeline,
    param_grid,
    cv=5,
    scoring="recall" 
)

# train model
cross_v.fit(x_train,y_train)
yp= cross_v.predict(x_test)

print("acc_score",accuracy_score(y_test,yp))
print("recall_score",recall_score(y_test,yp))
print(cv.best_params_)

# display
from sklearn import set_config
set_config(display="diagram")
pipeline

acc_score 0.9180327868852459
recall_score 0.90625
{'n_neighbors': 7}


0,1,2
,steps,"[('scaler', ...), ('knn', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,
