In [105]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

# read the dataset
dataset = pd.read_csv('/content/drive/My Drive/heart-extracted.csv') # change file_path to where heart-extracted.csv is located
print('Dataset has ',len(dataset), ' instances')
print(dataset)

Dataset has  14  instances
    Age  RestingBP  Cholesterol  FastingBS  MaxHR  Oldpeak  HeartDisease
0    40        140          289          0    172      0.0             0
1    49        160          180          0    156      1.0             1
2    37        130          283          0     98      0.0             0
3    48        138          214          0    108      1.5             1
4    54        150          195          0    122      0.0             0
5    39        120          339          0    170      0.0             0
6    45        130          237          0    170      0.0             0
7    54        110          208          0    142      0.0             0
8    37        140          207          0    130      1.5             1
9    48        120          284          0    120      0.0             0
10   37        130          211          0    142      0.0             0
11   58        136          164          0     99      2.0             1
12   39        120      

In [106]:
# split the dataset into features and label
features_train = dataset.iloc[:, 0:6]
label_train = dataset.iloc[:, 6]

print(features_train)
print()
print(label_train)




    Age  RestingBP  Cholesterol  FastingBS  MaxHR  Oldpeak
0    40        140          289          0    172      0.0
1    49        160          180          0    156      1.0
2    37        130          283          0     98      0.0
3    48        138          214          0    108      1.5
4    54        150          195          0    122      0.0
5    39        120          339          0    170      0.0
6    45        130          237          0    170      0.0
7    54        110          208          0    142      0.0
8    37        140          207          0    130      1.5
9    48        120          284          0    120      0.0
10   37        130          211          0    142      0.0
11   58        136          164          0     99      2.0
12   39        120          204          0    145      0.0
13   49        140          234          0    140      1.0

0     0
1     1
2     0
3     1
4     0
5     0
6     0
7     0
8     1
9     0
10    0
11    1
12    0
13    1
Na

In [107]:
# new instance for test dataset
new_instance = pd.read_csv('/content/drive/My Drive/heart-new-instance.csv') # change file_path to where heart-new-instance.csv is located
# split test dataset into features and label
features_test = new_instance.iloc[:, 0:6]
label_test = new_instance.iloc[:, 6]

print(features_test)
print()
print(label_test)

   Age  RestingBP  Cholesterol  FastingBS  MaxHR  Oldpeak
0   45        132          297          0    144        0

0    0
Name: HeartDisease, dtype: int64


In [108]:
# Define the models: Init K-NN
classifierK1 = KNeighborsClassifier(n_neighbors=1, p=2,metric='euclidean')
classifierK3 = KNeighborsClassifier(n_neighbors=3, p=2,metric='euclidean')
classifierK6 = KNeighborsClassifier(n_neighbors=6, p=2,metric='euclidean')
classifierK9 = KNeighborsClassifier(n_neighbors=9, p=2,metric='euclidean')
classifierK11 = KNeighborsClassifier(n_neighbors=11, p=2,metric='euclidean')

# Fit the models
classifierK1.fit(features_train, label_train)
classifierK3.fit(features_train, label_train)
classifierK6.fit(features_train, label_train)
classifierK9.fit(features_train, label_train)
classifierK11.fit(features_train, label_train)

In [109]:
# when k = 1
distances, indices = classifierK1.kneighbors(features_test)
print("The nearest neighbor is row" + str(indices))

# Predicting the test set results
label_predict = classifierK1.predict(features_test)
print("Based on its nearest neighbors, the new instance's label is predicted to be " + str(label_predict))

The nearest neighbor is row[[9]]
Based on its nearest neighbors, the new instance's label is predicted to be [0]


In [110]:
# when k = 3
distances, indices = classifierK3.kneighbors(features_test)
print("The nearest neighbors are rows" + str(indices))

# Predicting the test set results
label_predict = classifierK3.predict(features_test)
print("Based on its nearest neighbors, the new instance's label is predicted to be " + str(label_predict))

The nearest neighbors are rows[[9 0 2]]
Based on its nearest neighbors, the new instance's label is predicted to be [0]


In [111]:
# when k = 6
distances, indices = classifierK6.kneighbors(features_test)
print("The nearest neighbors are rows" + str(indices))

# Predicting the test set results
label_predict = classifierK6.predict(features_test)
print("Based on its nearest neighbors, the new instance's label is predicted to be " + str(label_predict))

The nearest neighbors are rows[[ 9  0  2  5 13  6]]
Based on its nearest neighbors, the new instance's label is predicted to be [0]


In [112]:
# when k = 9
distances, indices = classifierK9.kneighbors(features_test)
print("The nearest neighbors are rows" + str(indices))

# Predicting the test set results
label_predict = classifierK9.predict(features_test)
print("Based on its nearest neighbors, the new instance's label is predicted to be " + str(label_predict))

The nearest neighbors are rows[[ 9  0  2  5 13  6 10  3  8]]
Based on its nearest neighbors, the new instance's label is predicted to be [0]


In [113]:
# when k = 11
distances, indices = classifierK11.kneighbors(features_test)
print("The nearest neighbors are rows" + str(indices))
# Predicting the test set results
label_predict = classifierK3.predict(features_test)
print("Based on its nearest neighbors, the new instance's label is predicted to be " + str(label_predict))

The nearest neighbors are rows[[ 9  0  2  5 13  6 10  3  8  7 12]]
Based on its nearest neighbors, the new instance's label is predicted to be [0]
