# Data Mining Project (1)

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import sklearn.metrics as met
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import CategoricalNB, GaussianNB
import random

## Import And Read Data

In [2]:
data = pd.read_csv('heart_disease.csv')

data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3,30,No,Female,55-59,White,Yes,Yes,Very good,5,Yes,No,Yes
1,No,20.34,No,No,Yes,0,0,No,Female,80 or older,White,No,Yes,Very good,7,No,No,No
2,No,26.58,Yes,No,No,20,30,No,Male,65-69,White,Yes,Yes,Fair,8,Yes,No,No
3,No,24.21,No,No,No,0,0,No,Female,75-79,White,No,No,Good,6,No,No,Yes
4,No,23.71,No,No,No,28,0,Yes,Female,40-44,White,No,Yes,Very good,8,No,No,No


In [3]:
data.shape

(10000, 18)

## Data Types, Missing data And Drop Columns

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   HeartDisease      10000 non-null  object 
 1   BMI               10000 non-null  float64
 2   Smoking           10000 non-null  object 
 3   AlcoholDrinking   10000 non-null  object 
 4   Stroke            10000 non-null  object 
 5   PhysicalHealth    10000 non-null  int64  
 6   MentalHealth      10000 non-null  int64  
 7   DiffWalking       10000 non-null  object 
 8   Sex               10000 non-null  object 
 9   AgeCategory       10000 non-null  object 
 10  Race              10000 non-null  object 
 11  Diabetic          10000 non-null  object 
 12  PhysicalActivity  10000 non-null  object 
 13  GenHealth         10000 non-null  object 
 14  SleepTime         10000 non-null  int64  
 15  Asthma            10000 non-null  object 
 16  KidneyDisease     10000 non-null  object 

In [5]:
data = data.drop(['PhysicalHealth', 'MentalHealth', 'SkinCancer', 'KidneyDisease', 'Asthma', 'Stroke', 'Race', 'DiffWalking', 'GenHealth'], axis=1)

data.shape

(10000, 9)

In [6]:
data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Sex,AgeCategory,Diabetic,PhysicalActivity,SleepTime
0,No,16.6,Yes,No,Female,55-59,Yes,Yes,5
1,No,20.34,No,No,Female,80 or older,No,Yes,7
2,No,26.58,Yes,No,Male,65-69,Yes,Yes,8
3,No,24.21,No,No,Female,75-79,No,No,6
4,No,23.71,No,No,Female,40-44,No,Yes,8


In [7]:
# Missing Values
data.isnull().sum().sort_values(ascending = False)

HeartDisease        0
BMI                 0
Smoking             0
AlcoholDrinking     0
Sex                 0
AgeCategory         0
Diabetic            0
PhysicalActivity    0
SleepTime           0
dtype: int64

In [8]:
"""cleanup_nums = {'HeartDisease': {'No': 0, 'Yes': 1}, 
                'Smoking': {'Yes': 1, 'No': 0},
               'AlcoholDrinking': {'Yes': 1, 'No': 0},
               'PhysicalActivity': {'Yes': 1, 'No': 0},
               'Sex': {'Female': 1, 'Male': 0}}

data = data.replace(cleanup_nums)"""

"cleanup_nums = {'HeartDisease': {'No': 0, 'Yes': 1}, \n                'Smoking': {'Yes': 1, 'No': 0},\n               'AlcoholDrinking': {'Yes': 1, 'No': 0},\n               'PhysicalActivity': {'Yes': 1, 'No': 0},\n               'Sex': {'Female': 1, 'Male': 0}}\n\ndata = data.replace(cleanup_nums)"

In [9]:
"""data['Diabetic'] = data['Diabetic'].astype('category').cat.codes"""

"data['Diabetic'] = data['Diabetic'].astype('category').cat.codes"

In [10]:
data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Sex,AgeCategory,Diabetic,PhysicalActivity,SleepTime
0,No,16.6,Yes,No,Female,55-59,Yes,Yes,5
1,No,20.34,No,No,Female,80 or older,No,Yes,7
2,No,26.58,Yes,No,Male,65-69,Yes,Yes,8
3,No,24.21,No,No,Female,75-79,No,No,6
4,No,23.71,No,No,Female,40-44,No,Yes,8


In [11]:
data['AgeCategory'].value_counts()

65-69          1232
70-74          1105
60-64          1100
80 or older     891
55-59           871
75-79           806
50-54           770
45-49           623
40-44           589
35-39           572
30-34           536
18-24           484
25-29           421
Name: AgeCategory, dtype: int64

In [12]:
data['AgeCategory'] = data['AgeCategory'].astype('category').cat.codes

In [13]:
data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Sex,AgeCategory,Diabetic,PhysicalActivity,SleepTime
0,No,16.6,Yes,No,Female,7,Yes,Yes,5
1,No,20.34,No,No,Female,12,No,Yes,7
2,No,26.58,Yes,No,Male,9,Yes,Yes,8
3,No,24.21,No,No,Female,11,No,No,6
4,No,23.71,No,No,Female,4,No,Yes,8


In [14]:
# Normalization

N = len(data)
def normalize(X):
    Max = max(X)
    Min = min(X)
    for i in range(0, N):
        X[i] = (X[i] - Min) / (Max - Min)
        
B = normalize(data['BMI'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[i] = (X[i] - Min) / (Max - Min)


In [15]:
data.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Sex,AgeCategory,Diabetic,PhysicalActivity,SleepTime
0,No,0.058423,Yes,No,Female,7,Yes,Yes,5
1,No,0.111458,No,No,Female,12,No,Yes,7
2,No,0.199943,Yes,No,Male,9,Yes,Yes,8
3,No,0.166336,No,No,Female,11,No,No,6
4,No,0.159246,No,No,Female,4,No,Yes,8


## Dataset Preparation and Splitting 

In [16]:
X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

# First Try
num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]
x = Y[num_of_rows:]

## Modeling

## Naive Bayes

In [17]:
# Calculate P(Y=y) for all possible y

def calculate_prior(df, Y):
    classes = sorted(list(Y.unique()))
    print(classes)
    prior = []
    for i in classes:
        prior.append(len(df[Y==i])/len(df))
    return prior

In [18]:
x = calculate_prior(X_train, Y_train)
x

['No', 'Yes']


[0.904625, 0.095375]

In [19]:
# Calculate P(X=x|Y=y) Categorical

def calculate_likelihood_categorical(df, feat_name, feat_val, Y, label):
    df = df.drop(['BMI', 'AgeCategory', 'SleepTime'], axis=1)
    df = df[Y==label]
    p_x_given_y = len(df[df[feat_name]==feat_val])/len(df)
    return p_x_given_y

In [20]:
x = calculate_likelihood_categorical(X_train, 'Smoking', "Yes", Y_train, "No")
x

0.4294597208788172

In [21]:
# Calculate P(X=x|Y=y) Using Gaussain dist

def calculate_likelihood_gaussian(df, feat_name, feat_val, Y, label):
    df = df.drop(['Smoking', 'AlcoholDrinking', 'Sex', 'Diabetic', 'PhysicalActivity'], axis=1)
    df = df[Y == label]
    mean, std = df[feat_name].mean(), df[feat_name].std()
    p_x_given_y = (1 / (np.sqrt(2 * np.pi) * std)) * np.exp(-((feat_val - mean)**2 / (2 * std**2)))
    return p_x_given_y

In [67]:
x = calculate_likelihood_gaussian(X_train, 'BMI',0.201361, Y_train, 'Yes')
x

3.9434810434464582

In [68]:
# Calculate P(X=x1|Y=y)P(X=x2|Y=y)...P(X=xn|Y=y)*P(Y=y)

def naive_bayes(df, X, Y):
    
    features = list(df.columns)[:]
    print(features)
    prior = calculate_prior(df, Y)
    
    Y_pred = []
    
    X = X.iterrows()
    print(X)
    for x in X:
        print(x)
        
        # Calculate likelihood
        labels = sorted(list(Y.unique()))
        likelihood = [1]*len(labels)
        for i in range(len(labels)): 
            for j in range(len(features)): 
                print(x)
                if(j==0):
                    likelihood[i] *= calculate_likelihood_gaussian(df,features[j], x[j], Y, labels[i])
                    print(likelihood[i])
                elif(j==4):
                    likelihood[i] *= calculate_likelihood_gaussian(df,features[j], x[j], Y, labels[i])
                elif(j==7):
                    likelihood[i] *= calculate_likelihood_gaussian(df,features[j], x[j], Y, labels[i])
                else:
                    likelihood[i] *= calculate_likelihood_categorical(df,features[j], x[j], Y, labels[i])
                
           
                
        post_prob = [1]*len(labels)
        for i in range(len(labels)):
            post_prob[i] = likelihood[i] * prior[i]
        
        Y_pred.append(np.argmax(post_prob))
        
        for i in range(len(X)):
            if(Y_pred[i] == 0):
                Y_pred[i] = 'No'
            else:
                Y_pred[i] = 'Yes'
        
    return np.array(Y_pred)

In [69]:
Y_pred = naive_bayes(X_train, X_test, Y_train)
Y_pred

['BMI', 'Smoking', 'AlcoholDrinking', 'Sex', 'AgeCategory', 'Diabetic', 'PhysicalActivity', 'SleepTime']
['No', 'Yes']
<generator object DataFrame.iterrows at 0x7fdf72b266d0>
(8000, BMI                 0.201361
Smoking                  Yes
AlcoholDrinking           No
Sex                     Male
AgeCategory                3
Diabetic                  No
PhysicalActivity         Yes
SleepTime                  5
Name: 8000, dtype: object)
(8000, BMI                 0.201361
Smoking                  Yes
AlcoholDrinking           No
Sex                     Male
AgeCategory                3
Diabetic                  No
PhysicalActivity         Yes
SleepTime                  5
Name: 8000, dtype: object)
0.0
(8000, BMI                 0.201361
Smoking                  Yes
AlcoholDrinking           No
Sex                     Male
AgeCategory                3
Diabetic                  No
PhysicalActivity         Yes
SleepTime                  5
Name: 8000, dtype: object)


ValueError: Can only compare identically-labeled Series objects

In [36]:
# Accuracy Score
acc = np.sum(np.equal(Y_test, x)) / len(Y_test)
error = 1 - acc
acc, error

(1.0, 0.0)

In [None]:
# Second Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
Y_pred = naive_bayes(X_train, X_test, Y_train)

In [None]:
# Accuracy Score
test_acc = met.accuracy_score(Y_test, Y_pred)
test_acc

In [None]:
# Third Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
Y_pred = naive_bayes(X_train, X_test, Y_train)

In [None]:
# Accuracy Score
test_acc = met.accuracy_score(Y_test, Y_pred)
test_acc

In [None]:
# Forth Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
Y_pred = naive_bayes(X_train, X_test, Y_train)

In [None]:
# Accuracy Score
test_acc = met.accuracy_score(Y_test, Y_pred)
test_acc

In [None]:
# Fifth Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
Y_pred = naive_bayes(X_train, X_test, Y_train)

In [None]:
# Accuracy Score
test_acc = met.accuracy_score(Y_test, Y_pred)
test_acc

### Naive Bayes SKLearn

In [None]:
NB = CategoricalNB()
NB.fit(X, Y)
Y_pred = NB.predict(X_test)
acc_NB = round(NB.score(X, Y) * 100, 2)
acc_NB

In [None]:
GNB = GaussianNB()
GNB.fit(X, Y)
Y_pred = GNB.predict(X_test)
acc_GNB = round(GNB.score(X, Y) * 100, 2)
acc_GNB

## K-Nearest_Neighbors (KNN)

In [35]:
def Distance(a, b):
    d_ab = a - b
    d = np.linalg.norm(d_ab)
    return d

In [36]:
def CalculateAllDistances(X, a):
    N = np.shape(X)[0] # Data Size
    D = np.zeros(N)
    for i in range(0, N):
        b = X[i]
        D[i] = Distance(a, b)
    return D

In [37]:
def KNN(K, X, a):
    D = CalculateAllDistances(X, a)
    A = np.argsort(D) #Sorting By Distance
    knn = A[:K]
    return knn

In [38]:
def WeightdKNN(knn, knn_Labels):
    L = knn_Labels.copy()
    A = [] # Yes Distances 
    B = [] # No Distances
    for i in L:
        if L[i] == 0:
            A.append(knn[i])
        else:
            B.append(knn[i])
    
    for i in A:
        sum_A = sum(1/A[i])
    for i in B:
        sum_B = sum(1/B[i]) 
        
    return sum_A, sum_B

In [39]:
def GetClass(sum_A, sum_B):
    if sum_A > sum_B:
        predict = 0
    elif sum_A < sum_B:
        predict = 1
    else:
        predict = random.randint(0,1)
    return predict

In [40]:
def Classify(K, X, Y, a):
    knn = KNN(K, X, a)
    knn_Labels = Y[knn, 0]
    WK = WeightdKNN(knn, knn_Labels)
    P = GetClass(sum_A, sum_B)
    return P

In [41]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [42]:
K = 10

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_train, Y_train[:,0], a)
    test_Prediction[i] = pred

KeyError: 0

In [None]:
# Accuracy Score
test_acc11 = met.accuracy_score(Y_test, test_Prediction)
test_acc11

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 20

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc12 = met.accuracy_score(Y_test, test_Prediction)
test_acc12

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 50

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc11 = met.accuracy_score(Y_test, test_Prediction)
test_acc11

In [None]:
# Second Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 10

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc21 = met.accuracy_score(Y_test, test_Prediction)
test_acc21

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 20

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc22 = met.accuracy_score(Y_test, test_Prediction)
test_acc22

In [None]:
K = 50

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc23 = met.accuracy_score(Y_test, test_Prediction)
test_acc23

In [None]:
# Third Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 10

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc31 = met.accuracy_score(Y_test, test_Prediction)
test_acc31

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 20

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc32 = met.accuracy_score(Y_test, test_Prediction)
test_acc32

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 50

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc33 = met.accuracy_score(Y_test, test_Prediction)
test_acc33

In [None]:
# Forth Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 10

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc41 = met.accuracy_score(Y_test, test_Prediction)
test_acc41

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 20

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc42 = met.accuracy_score(Y_test, test_Prediction)
test_acc42

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 50

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc43 = met.accuracy_score(Y_test, test_Prediction)
test_acc43

In [None]:
# Fifth Try
data = data.sample(frac = 1)

X = data.drop('HeartDisease', axis=1)
Y = data['HeartDisease'] #Target

num_of_rows = int(len(data) * 0.8)

X_train = X[:num_of_rows]
X_test = X[num_of_rows:]

Y_train = Y[:num_of_rows]
Y_test = Y[num_of_rows:]

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 10

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc51 = met.accuracy_score(Y_test, test_Prediction)
test_acc51

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 20

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc52 = met.accuracy_score(Y_test, test_Prediction)
test_acc52

In [None]:
train_Prediction = np.zeros(np.size(Y_train))
test_Prediction = np.zeros(np.size(Y_test))

In [None]:
K = 50

# Making predictions on test dataset
for i in range(0, np.size(Y_test)):
    a = X_test[i]
    pred = Classify(K, X_tarin, Y_train[:,0], a)
    test_Prediction[i] = pred

In [None]:
# Accuracy Score
test_acc53 = met.accuracy_score(Y_test, test_Prediction)
test_acc53

### KNN SKLearn

In [None]:
knn = KNeighborsClassifier(n_neighbors=10)
knn.fit(X, Y)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X, Y) * 100, 2)
acc_knn

In [None]:
knn = KNeighborsClassifier(n_neighbors=20)
knn.fit(X, Y)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X, Y) * 100, 2)
acc_knn

In [None]:
knn = KNeighborsClassifier(n_neighbors=50)
knn.fit(X, Y)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X, Y) * 100, 2)
acc_knn