<a href="https://colab.research.google.com/github/yeshika-b/Salary-Classification-Model--Naive-Bayes-from-scratch/blob/main/ML_Assignment_2_NaiveBayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
data=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data",names=["age","workclass","fnlwgt","education","education-num","marital-status","occupation","relationship","race","sex","capital-gain","capital-loss","hours-per-week","native-country","salary"])

In [None]:
print(data)


       age          workclass  fnlwgt    education  education-num  \
0       39          State-gov   77516    Bachelors             13   
1       50   Self-emp-not-inc   83311    Bachelors             13   
2       38            Private  215646      HS-grad              9   
3       53            Private  234721         11th              7   
4       28            Private  338409    Bachelors             13   
...    ...                ...     ...          ...            ...   
32556   27            Private  257302   Assoc-acdm             12   
32557   40            Private  154374      HS-grad              9   
32558   58            Private  151910      HS-grad              9   
32559   22            Private  201490      HS-grad              9   
32560   52       Self-emp-inc  287927      HS-grad              9   

            marital-status          occupation    relationship    race  \
0            Never-married        Adm-clerical   Not-in-family   White   
1       Married-civ-spo

## For Missing Values

In [None]:
def replaceNull(data):
  data1=data.copy()
  x=data1.filter(["workclass","occupation","native-country"])
  for feature in x:
    val=data1[feature].mode()[0]
    #print((val))
    data1[feature]=data1[feature].replace(' ?',val)
  return data1

In [None]:
data2=replaceNull(data)

## Splitting Data

In [None]:
def splitData(data):
  data1 = data.sample(frac=1,axis=0).reset_index(drop=True)
  x=int(0.80*len(data1))
  data_train=data1.iloc[:x,:]
  data_test=data1.iloc[x:,:]
  return data_train,data_test


In [None]:
data_train,data_test=splitData(data2)

# **Naive Bayes Classifier**

### Prior Probability

In [None]:
def calculate_prior_prob(data_train):
  p_lessthan50,p_morethan50=data_train["salary"].value_counts('<=50K')
  return p_lessthan50,p_morethan50

In [None]:
p_lessthan50,p_morethan50=data_train["salary"].value_counts('<=50K')

### Likelihood

Divide data into continuous and categorical

In [None]:
#Divide data into continuous and categorical
def divide_data(data):
  data_train=data.copy()
  data_cont=data_train.filter(["age","fnlwgt","education-num","capital-gain","capital-loss","hours-per-week","salary"],axis=1)
  data_categ=data_train.filter(["workclass","education","marital-status","occupation","relationship","race","sex","native-country","salary"],axis=1)
  return data_cont,data_categ



In [None]:
data_cont,data_categ=divide_data(data_train)

In [None]:
def calculate_likelihood_categ(data_categ):
  likelihood_probs={"workclass":{},"education":{},"marital-status":{},"occupation":{},"relationship":{},"race":{},"sex":{},"native-country":{},"salary":{}}
  x=data_categ.drop(["salary"],axis=1)
  y=data_categ["salary"]
  for feature in x:
    for outcome in np.unique(y):
      total_outcome=sum(y==outcome)
      feature_likelihood=x[feature][y[y==outcome].index.values.tolist()].value_counts().to_dict()
      for val,count in feature_likelihood.items():
        likelihood_probs[feature][val+"_"+outcome]=count/total_outcome

  return likelihood_probs



In [None]:
x=data_categ.drop(["salary"],axis=1)
y=data_categ["salary"]
feature_likelihood=x["workclass"][y[y=='<=50K'].index.values.tolist()]

In [None]:
feature_likelihood

Series([], Name: workclass, dtype: object)

In [None]:
def calculate_likelihood_cont(data_cont):
  likelihood_probs={"age":{},"fnlwgt":{},"education-num":{},"capital-gain":{},"capital-loss":{},"hours-per-week":{},"salary":{}}
  x=data_cont.drop(["salary"],axis=1)
  y=data_cont["salary"]
  for feature in x:
    for outcome in np.unique(y):
      feature_mean=x[feature][y[y==outcome].index.values.tolist()].mean()
      feature_std=x[feature][y[y==outcome].index.values.tolist()].std()
      likelihood_probs[feature]['mean_'+outcome]=feature_mean
      likelihood_probs[feature]['std_'+outcome]=feature_std
  return likelihood_probs


In [None]:
data1=data.copy()

In [None]:
x=data1.drop(["salary"],axis=1)
y=data1["salary"]
for feature in x:
  #feature_likelihood=x[feature[feature==' ?'].index.values.tolist()].value_counts()
  z=data1[feature].where(data1[feature] == ' ?').count()
  print(z)

0
1836
0
0
0
0
1843
0
0
0
0
0
0
583


### Predicting Class for Test Example

In [None]:
def predict_class(arr,prior_lessthan50,prior_morethan50,likelihood_categ,likelihood_cont):
  #numerator=prior_lessthan50*likelihood_categ["workclass"][arr[1]]*likelihood_categ["education"][arr[3]]*likelihood_categ["marital-status"][arr[1]]*likelihood_categ["occupation"][arr[1]]*likelihood_categ["relationship"][arr[1]]*likelihood_categ["race"][arr[1]]*likelihood_categ["sex"][arr[1]]*likelihood_categ["native-country"][arr[1]]
  numerator_lessthan=prior_lessthan50
  numerator_morethan=prior_morethan50
  for x in [1,3,5,6,7,8,9,13]:
    print(data.columns[x])
    print(arr[x])
    if((arr[x]+"_ <=50K") in likelihood_categ[data.columns[x]].keys()):
      numerator_lessthan*=likelihood_categ[data.columns[x]][arr[x]+"_ <=50K"]
    else:
      numerator_lessthan=0
      break
    if((arr[x]+"_ >50K") in likelihood_categ[data.columns[x]].keys()):
      numerator_morethan*=likelihood_categ[data.columns[x]][arr[x]+"_ >50K"]
    else:
      numerator_morethan=0
      break

  for x in [0,2,4,10,11,12]:
    mean_lessthan=likelihood_cont[data.columns[x]]['mean_ <=50K']
    std_lessthan=likelihood_cont[data.columns[x]]['std_ <=50K']
    mean_morethan=likelihood_cont[data.columns[x]]['mean_ >50K']
    std_morethan=likelihood_cont[data.columns[x]]['std_ >50K']
    numerator_lessthan*=norm.pdf(arr[x],loc=mean_lessthan,scale=std_lessthan)
    numerator_morethan*=norm.pdf(arr[x],loc=mean_morethan,scale=std_morethan)

  if(numerator_lessthan>=numerator_morethan):
    sal=" <=50K"
  else:
    sal=" >50K"

  return sal


Calculating all quantities before prediction

In [None]:
prior_lessthan50,prior_morethan50=calculate_prior_prob(data_train)
likelihood_categ=calculate_likelihood_categ(data_categ)
likelihood_cont=calculate_likelihood_cont(data_cont)



In [None]:
#arr=[23 ,'Private', 260254, 'HS-grad', 9, 'Never-married' ,'Sales' ,'Own-child','White' ,'Male', 0, 0, 40, 'United-States', '<=50K']
#sal=predict_class(arr,prior_lessthan50,prior_morethan50,likelihood_categ,likelihood_cont)

In [None]:
data_test2=replaceNull(data_test)

In [None]:
data_test2

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
26048,50,Local-gov,164127,HS-grad,9,Never-married,Other-service,Not-in-family,Black,Female,0,0,40,United-States,<=50K
26049,29,Private,31360,Some-college,10,Never-married,Farming-fishing,Own-child,White,Male,0,0,40,United-States,<=50K
26050,23,Private,293565,10th,6,Never-married,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
26051,42,State-gov,102343,Prof-school,15,Married-civ-spouse,Prof-specialty,Husband,Asian-Pac-Islander,Male,0,0,72,India,>50K
26052,37,Federal-gov,160910,HS-grad,9,Married-civ-spouse,Craft-repair,Husband,White,Male,0,0,40,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,61,State-gov,124971,Doctorate,16,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,40,United-States,>50K
32557,46,Private,199316,Some-college,10,Married-civ-spouse,Craft-repair,Other-relative,Asian-Pac-Islander,Male,0,0,40,India,<=50K
32558,45,Private,145637,Some-college,10,Divorced,Adm-clerical,Unmarried,White,Female,0,0,48,United-States,<=50K
32559,25,Private,91709,Assoc-acdm,12,Never-married,Tech-support,Not-in-family,White,Female,0,0,45,United-States,<=50K


## Calculating Performance Metrics

In [None]:
def NBTest(data_test):
  TN,TP,FN,FP=0,0,0,0
  for i,r in data_test.iterrows():
    print(i)
    sal=predict_class(r.values,prior_lessthan50,prior_morethan50,likelihood_categ,likelihood_cont)
    if(sal==r.values[-1]):
      if(sal==' >50K'):
        TP+=1
      else:
        TN+=1
    else:
      if(sal==' >50K'):
        FP+=1
      else:
        FN+=1
  print( TP,TN,FP,FN)
  accuracy=(TP+TN)/(TP+TN+FP+FN)
  precision=TP/(TP+FP)
  recall=TP/(TP+FN)
  f1_score=(2*precision*recall)/(precision+recall)
  return accuracy,precision,recall,f1_score


In [None]:
a,p,r,f=NBTest(data_test2)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
occupation
 Sales
relationship
 Husband
race
 White
sex
 Male
native-country
 United-States
32266
workclass
 Private
education
 Some-college
marital-status
 Never-married
occupation
 Exec-managerial
relationship
 Unmarried
race
 Black
sex
 Female
native-country
 Jamaica
32267
workclass
 Private
education
 Some-college
marital-status
 Never-married
occupation
 Craft-repair
relationship
 Not-in-family
race
 Black
sex
 Male
native-country
 United-States
32268
workclass
 Private
education
 Some-college
marital-status
 Never-married
occupation
 Craft-repair
relationship
 Not-in-family
race
 White
sex
 Male
native-country
 United-States
32269
workclass
 Private
education
 10th
marital-status
 Divorced
occupation
 Machine-op-inspct
relationship
 Not-in-family
race
 White
sex
 Female
native-country
 United-States
32270
workclass
 Self-emp-not-inc
education
 Some-college
marital-status
 Married-civ-spouse
occupation
 Farming-fishi

In [None]:
a,p,r,f

(0.8343313373253493, 0.718132854578097, 0.5111821086261981, 0.597237775289287)

## Smoothing techniques

###Laplace Smoothing

In [None]:
#no. of features
k=data_train.shape[1]-1

In [None]:
N_less=p_lessthan50*data_train.shape[0]
N_more=p_morethan50*data_train.shape[0]

In [None]:
N_less,N_more

(19772.0, 6276.0)

In [None]:

def calculate_likelihood_categ_laplace(data_categ,alpha):
  likelihood_probs={"workclass":{},"education":{},"marital-status":{},"occupation":{},"relationship":{},"race":{},"sex":{},"native-country":{},"salary":{}}
  x=data_categ.drop(["salary"],axis=1)
  y=data_categ["salary"]
  for feature in x:
    for outcome in np.unique(y):
      total_outcome=sum(y==outcome)
      feature_likelihood=x[feature][y[y==outcome].index.values.tolist()].value_counts().to_dict()
      for val,count in feature_likelihood.items():
        likelihood_probs[feature][val+"_"+outcome]=(count+alpha)/(total_outcome+k*alpha)

  return likelihood_probs



In [None]:
def calculate_likelihood_cont(data_cont):
  likelihood_probs={"age":{},"fnlwgt":{},"education-num":{},"capital-gain":{},"capital-loss":{},"hours-per-week":{},"salary":{}}
  x=data_cont.drop(["salary"],axis=1)
  y=data_cont["salary"]
  for feature in x:
    for outcome in np.unique(y):
      feature_mean=x[feature][y[y==outcome].index.values.tolist()].mean()
      feature_std=x[feature][y[y==outcome].index.values.tolist()].std()
      likelihood_probs[feature]['mean_'+outcome]=feature_mean
      likelihood_probs[feature]['std_'+outcome]=feature_std
  return likelihood_probs

In [None]:
#k is number of features,N_less is no of less than tuples
def predict_class_laplace(arr,prior_lessthan50,prior_morethan50,likelihood_categ_laplace,likelihood_cont,alpha):
  numerator_lessthan=prior_lessthan50
  numerator_morethan=prior_morethan50
  for x in [1,3,5,6,7,8,9,13]:
    print(data.columns[x])
    print(arr[x])
    if((arr[x]+"_ <=50K") in likelihood_categ_laplace[data.columns[x]].keys()):
      numerator_lessthan*=likelihood_categ_laplace[data.columns[x]][arr[x]+"_ <=50K"]
    else:
      numerator_lessthan*=(alpha/(N_less+alpha*k))

    if((arr[x]+"_ >50K") in likelihood_categ_laplace[data.columns[x]].keys()):
      numerator_morethan*=likelihood_categ_laplace[data.columns[x]][arr[x]+"_ >50K"]
    else:
      numerator_morethan*=(alpha/(N_more+alpha*k))


  for x in [0,2,4,10,11,12]:
    mean_lessthan=likelihood_cont[data.columns[x]]['mean_ <=50K']
    std_lessthan=likelihood_cont[data.columns[x]]['std_ <=50K']
    mean_morethan=likelihood_cont[data.columns[x]]['mean_ >50K']
    std_morethan=likelihood_cont[data.columns[x]]['std_ >50K']
    numerator_lessthan*=norm.pdf(arr[x],loc=mean_lessthan,scale=std_lessthan)
    numerator_morethan*=norm.pdf(arr[x],loc=mean_morethan,scale=std_morethan)

  if(numerator_lessthan>=numerator_morethan):
    sal=" <=50K"
  else:
    sal=" >50K"

  return sal


In [None]:
def NBTest_laplace(data_test):
  TN,TP,FN,FP=0,0,0,0
  for i,r in data_test.iterrows():
    print(i)
    sal=predict_class_laplace(r.values,prior_lessthan50,prior_morethan50,likelihood_categ,likelihood_cont,1)
    if(sal==r.values[-1]):
      if(sal==' >50K'):
        TP+=1
      else:
        TN+=1
    else:
      if(sal==' >50K'):
        FP+=1
      else:
        FN+=1
  print( TP,TN,FP,FN)
  accuracy=(TP+TN)/(TP+TN+FP+FN)
  precision=TP/(TP+FP)
  recall=TP/(TP+FN)
  f1_score=(2*precision*recall)/(precision+recall)
  return accuracy,precision,recall,f1_score


In [None]:
def getAvgPerformance(data2):
  acc_f,pre_f,rec_f,f1_f=0,0,0,0
  for i in range(10):
    data_train_f,data_test_f=splitData(data2)
    acc,pre,rec,f1=NBTest_laplace(data_test_f)
    acc_f+=acc
    pre_f+=pre
    rec_f+=rec
    f1_f+=f1
  return acc_f/10,pre_f/10,rec_f/10,f1_f/10


In [None]:
a,p,r,f=NBTest_laplace(data_test2)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 Jamaica
32267
workclass
 Private
education
 Some-college
marital-status
 Never-married
occupation
 Craft-repair
relationship
 Not-in-family
race
 Black
sex
 Male
native-country
 United-States
32268
workclass
 Private
education
 Some-college
marital-status
 Never-married
occupation
 Craft-repair
relationship
 Not-in-family
race
 White
sex
 Male
native-country
 United-States
32269
workclass
 Private
education
 10th
marital-status
 Divorced
occupation
 Machine-op-inspct
relationship
 Not-in-family
race
 White
sex
 Female
native-country
 United-States
32270
workclass
 Self-emp-not-inc
education
 Some-college
marital-status
 Married-civ-spouse
occupation
 Farming-fishing
relationship
 Husband
race
 White
sex
 Male
native-country
 United-States
32271
workclass
 Private
education
 11th
marital-status
 Never-married
occupation
 Prof-specialty
relationship
 Not-in-family
race
 White
sex
 Female
native-country
 United-States
32272

In [None]:
a,p,r,f

(0.8163672654690619,
 0.5927601809954751,
 0.7533546325878594,
 0.6634777715250422)

#**Comparing with other models**#

## Logistic Regression

In [None]:
X_LR=data2.drop(["salary"],axis=1)
y_LR=data2["salary"]
y_LR=y_LR.map({' >50K':1, ' <=50K': 0})
X_LR.sex = X_LR.sex.map({' Male': 0, ' Female': 1})
for x in [1,3,5,6,7,8,9,13]:
  col=data_train.columns[x]
  ports = pd.get_dummies(X_LR[col], prefix=col)
  X_LR= X_LR.join(ports)
  X_LR.drop([col], axis=1, inplace=True)





In [None]:
X_LR

Unnamed: 0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week,workclass_ Federal-gov,workclass_ Local-gov,workclass_ Never-worked,workclass_ Private,...,native-country_ Portugal,native-country_ Puerto-Rico,native-country_ Scotland,native-country_ South,native-country_ Taiwan,native-country_ Thailand,native-country_ Trinadad&Tobago,native-country_ United-States,native-country_ Vietnam,native-country_ Yugoslavia
0,39,77516,13,2174,0,40,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,50,83311,13,0,0,13,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,38,215646,9,0,0,40,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
3,53,234721,7,0,0,40,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
4,28,338409,13,0,0,40,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32556,27,257302,12,0,0,38,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
32557,40,154374,9,0,0,40,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
32558,58,151910,9,0,0,40,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
32559,22,201490,9,0,0,20,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0


In [None]:
y_LR

0        0
1        0
2        0
3        0
4        0
        ..
32556    0
32557    1
32558    0
32559    0
32560    1
Name: salary, Length: 32561, dtype: int64

In [None]:
X_LR_train, X_LR_test, y_LR_train, y_LR_test = train_test_split(X_LR, y_LR, test_size=0.33, random_state=42)

In [None]:
clf = LogisticRegression(random_state=0).fit(X_LR_train, y_LR_train)

In [None]:
# X_LR_test=data_test2.drop(["salary"],axis=1)
# y_LR_test=data_test2["salary"]
# X_LR_test.sex = X_LR_test.sex.map({' Male': 0, ' Female': 1})
# for x in [1,3,5,6,7,8,9,13]:
#   col=data_test2.columns[x]
#   ports = pd.get_dummies(X_LR_test[col], prefix=col)
#   X_LR_test= X_LR_test.join(ports)
#   X_LR_test.drop([col], axis=1, inplace=True)

# for x in X_LR.columns:
#   if(x not in X_LR_test.columns):
#     ports=pd.DataFrame(np.zeros([len(X_LR_test),1]),columns=[x])
#     X_LR_test= X_LR_test.join(ports)


In [None]:
y_LR_pred = pd.Series(clf.predict(X_LR_test))

In [None]:
print("Accuracy:", metrics.accuracy_score(y_LR_test, y_LR_pred))
print("Precision:", metrics.precision_score(y_LR_test, y_LR_pred))
print("Recall:", metrics.recall_score(y_LR_test, y_LR_pred))

Accuracy: 0.802252000744463
Precision: 0.7332601536772777
Recall: 0.2619607843137255


In [None]:
def getAvgPerformance_LR(X_LR,y_LR):
  acc,pre,rec,f1=0,0,0,0
  for i in range(10):
    X_LR_train, X_LR_test, y_LR_train, y_LR_test = train_test_split(X_LR, y_LR, test_size = 0.33, random_state = i)
    y_LR_pred = pd.Series(clf.predict(X_LR_test))
    acc+=metrics.accuracy_score(y_LR_test, y_LR_pred)
    pre+=metrics.precision_score(y_LR_test, y_LR_pred)
    rec+=metrics.accuracy_score(y_LR_test, y_LR_pred)
  return acc/10,pre/10,rec/10,2*pre/10*rec/(pre+rec)


In [None]:
getAvgPerformance_LR(X_LR,y_LR)

(0.800428066257212, 0.8727500959196831, 0.800428066257212, 0.8350260449179017)

# KNN

In [None]:
X_KNN=X_LR.copy()
y_KNN=y_LR.copy()


In [None]:
X_KNN_train, X_KNN_test, y_KNN_train, y_KNN_test = train_test_split(X_KNN, y_KNN, test_size = 0.33, random_state = 0)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
K = []
training = []
test = []
scores = {}

for k in range(2, 21):
    clf = KNeighborsClassifier(n_neighbors = k)
    clf.fit(X_KNN_train, y_KNN_train)

    training_score = clf.score(X_KNN_train, y_KNN_train)
    test_score = clf.score(X_KNN_test, y_KNN_test)
    K.append(k)

    training.append(training_score)
    test.append(test_score)
    scores[k] = [training_score, test_score]

In [None]:
for keys, values in scores.items():
    print(keys, ':', values)

2 : [0.8679807471922989, 0.788107202680067]
3 : [0.8628925051570021, 0.7587009119672437]
4 : [0.8341049736419894, 0.7854085241019915]
5 : [0.8328214531285812, 0.7735901730876605]
6 : [0.8213156085262434, 0.7898753024381165]
7 : [0.8200779280311712, 0.7811278615298716]
8 : [0.8141645656658263, 0.7931323283082077]
9 : [0.8135686454274582, 0.7899683603201191]
10 : [0.8107265642906257, 0.7957379490042806]
11 : [0.8109557643823058, 0.793783733482226]
12 : [0.8076552830621132, 0.7979713381723432]
13 : [0.8075177630071052, 0.7971338172343198]
14 : [0.8043548017419206, 0.7970407593523171]
15 : [0.8049048819619528, 0.7973199329983249]
16 : [0.8032088012835206, 0.7979713381723432]
17 : [0.8037130414852166, 0.798715801228364]
18 : [0.8030712812285125, 0.7996463800483901]
19 : [0.8029796011918405, 0.7989019169923692]
20 : [0.8012835205134082, 0.7984366275823562]


In [None]:
knn = KNeighborsClassifier(n_neighbors=2)
knn.fit(X_KNN_train, y_KNN_train)

In [None]:
y_KNN_pred = knn.predict(X_KNN_test)

In [None]:
print("Accuracy:", metrics.accuracy_score(y_KNN_test, y_KNN_pred))
print("Precision:", metrics.precision_score(y_KNN_test, y_KNN_pred))
print("Recall:", metrics.recall_score(y_KNN_test, y_KNN_pred))

Accuracy: 0.788107202680067
Precision: 0.6417112299465241
Recall: 0.2774566473988439


In [None]:
def getAvgPerformance_KNN(X_KNN,y_KNN):
  acc,pre,rec,f1=0,0,0,0
  for i in range(10):
    X_KNN_train, X_KNN_test, y_KNN_train, y_KNN_test = train_test_split(X_KNN, y_KNN, test_size = 0.33, random_state = i)
    y_KNN_pred = knn.predict(X_KNN_test)
    acc+=metrics.accuracy_score(y_KNN_test, y_KNN_pred)
    pre+=metrics.precision_score(y_KNN_test, y_KNN_pred)
    rec+=metrics.accuracy_score(y_KNN_test, y_KNN_pred)
  return acc/10,pre/10,rec/10,2*pre/10*rec/(pre+rec)


In [None]:
getAvgPerformance_KNN(X_KNN,y_KNN)

(0.8360413176996092,
 0.8601440970995154,
 0.8360413176996092,
 0.8479214572609478)