# Decision Tree Model - Features Selection

In [1]:
import pandas as pd
import numpy as np
import statistics as stat
from sklearn.model_selection import KFold
from sklearn import tree
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.linear_model import RidgeCV
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
import random

pd.options.mode.chained_assignment = None 

random.seed(10)
ad =  pd.read_csv('dataset.csv')

### a glimpse at the data

In [2]:
ad.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [3]:
ad.isnull().values.any()

True

### Grouping CDR 2.0 to 1.0


In [4]:
ad.loc[ad["CDR"] == 2,"CDR"] = 1

In [5]:
ad["Male"] = np.where(ad["M/F"]=="M", 1, 0)
ad["Female"] = np.where(ad["M/F"]=="F", 1, 0)

### Splitting the columns into indepdent and dependent variables

In [6]:
X = ad.iloc[:,[3,4,7,8,9,10,11,12,13,14]]
y = ad.iloc[:,[2]]


In [7]:
ad_X_train, ad_X_test, ad_y_train, ad_y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
med_SES = ad_X_train['SES'].mean()
avg_MMSE = ad_X_train['MMSE'].mean()

In [9]:
ad_X_train.loc[:,'SES'] = ad_X_train.loc[:,'SES'].fillna(med_SES)
ad_X_train.loc[:,'MMSE'] = ad_X_train.loc[:,'MMSE'].fillna(avg_MMSE)

In [10]:
ad_X_test["Group"] = ad_y_test
ad_X_test = ad_X_test.dropna(axis = 0, how='any')

ad_y_test = ad_X_test["Group"]
ad_X_test=ad_X_test.loc[:,ad_X_test.columns!='Group']


### Defining the function to check the accuracy, precision, and recall scores for Decision Tree

In [11]:
def acc_test(cnt,fold):
    clf = tree.DecisionTreeClassifier(criterion="entropy")
    
    
    kf = KFold(n_splits=fold, random_state=None)
    oversample = RandomOverSampler(sampling_strategy='minority')
    acc_score = []
    precise_score = []
    recall = []
    for train_index, test_index in kf.split(ad_X_train):
        X_train, X_test = ad_X_train.iloc[train_index,:], ad_X_train.iloc[test_index,:]
        y_train, y_test = ad_y_train.iloc[train_index,:], ad_y_train.iloc[test_index,:]

        X_train, y_train = oversample.fit_resample(X_train, y_train)
        



        X_test=X_test.loc[:,X_test.columns!='Group']

        if cnt < 9:
            clf1 = tree.DecisionTreeClassifier().fit(X_train,y_train)

            sfs_forward = SequentialFeatureSelector(
            clf1 , n_features_to_select = cnt, direction = "forward").fit(X_train,y_train)


            clf.fit(X_train[X_train.columns[sfs_forward.get_support()]],y_train)

            y_pred = clf.predict(ad_X_test[ad_X_test.columns[sfs_forward.get_support()]])
        else:
            clf.fit(X_train,y_train)
            y_pred = clf.predict(ad_X_test)


        acc = accuracy_score(ad_y_test, y_pred)
        acc_score.append(acc)

        precise = precision_score(ad_y_test, y_pred, average='macro',zero_division=1)
        precise_score.append(precise)

        rec = recall_score(ad_y_test, y_pred, average = 'macro', zero_division = 1)
        recall.append(rec)        
        
    return acc_score, precise_score, recall
        
    

### Selecting One Feature

In [12]:
a5 = acc_test(1,5)
a10 = acc_test(1,10)
a15 = acc_test(1,15)
a20 = acc_test(1,20)
a25 = acc_test(1,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc1 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc1)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre1 =  (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre1)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec1 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5


print("The average of the recall score: ", rec1 )


Accuracy Scores
0.4934579439252336
0.43644859813084114
0.432398753894081
0.43224299065420557
0.4328971962616822
The average of the accuracy score:  0.4454890965732087
*******************************
Precision Scores
0.4525761237946558
0.3701092348220678
0.36576040839884066
0.36562323068864633
0.3670207025890472
The average of the precision score:  0.3842179400586515
*******************************
Recall Scores
0.42406414723487895
0.36911015813454834
0.3653444116858751
0.3640556597873671
0.36566872152238006
The average of the recall score:  0.3776486196730099


### Selecting Two Features

In [13]:
a5 = acc_test(2,5)
a10 = acc_test(2,10)
a15 = acc_test(2,15)
a20 = acc_test(2,20)
a25 = acc_test(2,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc2 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc2)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre2 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre2)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec2 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec2 )

Accuracy Scores
0.7757009345794392
0.7728971962616822
0.7713395638629283
0.7672897196261682
0.7708411214953271
The average of the accuracy score:  0.771613707165109
*******************************
Precision Scores
0.5987192450052431
0.6066505577027904
0.6100884636305124
0.5968845745768614
0.6053370279547322
The average of the precision score:  0.6035359737740279
*******************************
Recall Scores
0.6096444206200303
0.614337085678549
0.6173218380535453
0.6045731707317074
0.6127124095416778
The average of the recall score:  0.611717784925102


### Selecting Three Features

In [14]:
a5 = acc_test(3,5)
a10 = acc_test(3,10)
a15 = acc_test(3,15)
a20 = acc_test(3,20)
a25 = acc_test(3,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc3 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc3)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre3 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre3)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec3 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec3 )

Accuracy Scores
0.8186915887850467
0.808411214953271
0.8049844236760124
0.8060747663551402
0.8157009345794393
The average of the accuracy score:  0.8107725856697818
*******************************
Precision Scores
0.6874490421858842
0.6668602532745806
0.6717866615514053
0.6745687000174041
0.6911113063928659
The average of the precision score:  0.678355192684428
*******************************
Recall Scores
0.6798177432323774
0.669822657017779
0.6643869680455047
0.6652461359778433
0.6800607522558743
The average of the recall score:  0.6718668513058758


### Selecting Four Features

In [15]:
a5 = acc_test(4,5)
a10 = acc_test(4,10)
a15 = acc_test(4,15)
a20 = acc_test(4,20)
a25 = acc_test(4,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc4 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc4)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre4 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre4)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec4 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec4)

Accuracy Scores
0.82803738317757
0.8168224299065421
0.8193146417445483
0.8107476635514018
0.8183177570093457
The average of the accuracy score:  0.8186479750778816
*******************************
Precision Scores
0.7103073799367638
0.6949142490875204
0.6908928816259496
0.6968701692245273
0.6945832127256399
The average of the precision score:  0.6975135785200803
*******************************
Recall Scores
0.7029393370856786
0.6901992316626463
0.6872048006194348
0.6915963101938711
0.6874153488787635
The average of the recall score:  0.6918710056880789


### Selecting Five Features

In [16]:
a5 = acc_test(5,5)
a10 = acc_test(5,10)
a15 = acc_test(5,15)
a20 = acc_test(5,20)
a25 = acc_test(5,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc5 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc5)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre5 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre5)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec5 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec5)

Accuracy Scores
0.8093457943925233
0.8177570093457944
0.8180685358255452
0.8144859813084112
0.8235514018691589
The average of the accuracy score:  0.8166417445482865
*******************************
Precision Scores
0.6701319191118302
0.6903187494789175
0.6992532920807204
0.6911164293527504
0.7043316648906249
The average of the precision score:  0.6910304109829688
*******************************
Recall Scores
0.6636558563387832
0.6843964978111319
0.6929658417463296
0.6825828642901813
0.6956624676136871
The average of the recall score:  0.6838527055600225


### Selecting Six Features

In [17]:
a5 = acc_test(6,5)
a10 = acc_test(6,10)
a15 = acc_test(6,15)
a20 = acc_test(6,20)
a25 = acc_test(6,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc6 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc6)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre6 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre6)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec6 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec6)

Accuracy Scores
0.8448598130841122
0.8252336448598131
0.8199376947040499
0.8247663551401869
0.8239252336448598
The average of the accuracy score:  0.8277445482866044
*******************************
Precision Scores
0.7471513196362356
0.7037445220739474
0.7072113773596124
0.7130918991555069
0.7079214526925348
The average of the precision score:  0.7158241141835674
*******************************
Recall Scores
0.7224291968194407
0.6943804163316358
0.6999195926025195
0.7085913070669169
0.6971732332707943
The average of the recall score:  0.7044987492182614


### Selecting Seven Features

In [18]:
a5 = acc_test(7,5)
a10 = acc_test(7,10)
a15 = acc_test(7,15)
a20 = acc_test(7,20)
a25 = acc_test(7,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc7 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc7 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre7 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre7)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec7 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec7)

Accuracy Scores
0.8523364485981308
0.8336448598130841
0.8292834890965732
0.8303738317757009
0.8302803738317757
The average of the accuracy score:  0.835183800623053
*******************************
Precision Scores
0.7545177832924994
0.730427441024788
0.7195451496379875
0.7175981167426561
0.7262562012619457
The average of the precision score:  0.7296689383919753
*******************************
Recall Scores
0.7202537300098275
0.7144130259983918
0.7071324339617022
0.7051237380505673
0.7112757973733583
The average of the recall score:  0.7116397450787695


### Selecting Eight Features

In [19]:
a5 = acc_test(8,5)
a10 = acc_test(8,10)
a15 = acc_test(8,15)
a20 = acc_test(8,20)
a25 = acc_test(8,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc8 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc8 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre8 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre8 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec8 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec8)

Accuracy Scores
0.8411214953271028
0.8345794392523365
0.8305295950155763
0.8327102803738318
0.84
The average of the accuracy score:  0.8357881619937695
*******************************
Precision Scores
0.7451781200127453
0.7305191645045799
0.7208531110303593
0.7258390627523624
0.738022393644029
The average of the precision score:  0.7320823703888152
*******************************
Recall Scores
0.7090815688376664
0.7187058876083267
0.7137868906161589
0.7175098275708032
0.7239837398373984
The average of the recall score:  0.7166135828940707


### Selecting Nine Features

In [20]:
a5 = acc_test(9,5)
a10 = acc_test(9,10)
a15 = acc_test(9,15)
a20 = acc_test(9,20)
a25 = acc_test(9,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc9 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc9 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre9 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre9 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec9 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec9)

Accuracy Scores
0.8448598130841122
0.8420560747663551
0.8367601246105919
0.8378504672897196
0.8306542056074766
The average of the accuracy score:  0.8384361370716512
*******************************
Precision Scores
0.7430806861814614
0.7416987794389482
0.7360560795196723
0.7346206963003328
0.7293262991051992
The average of the precision score:  0.7369565081091228
*******************************
Recall Scores
0.7210533369069955
0.7273139462163852
0.7180603948896632
0.7261692575716966
0.7221093540605735
The average of the recall score:  0.7229412579290628


## Scorecard

### Accuracy Score

In [21]:
print("Accuracy Score")
print("Selection One Feature: ", acc1)
print("Selection Two Features: ", acc2)
print("Selection Three Features: ", acc3)
print("Selection Four Features: ", acc4)
print("Selection Five Features: ", acc5)
print("Selection Six Features: ", acc6)
print("Selection Seven Features: ", acc7)
print("Selection Eight Features: ", acc8)
print("Selection Nine Features: ", acc9)


Accuracy Score
Selection One Feature:  0.4454890965732087
Selection Two Features:  0.771613707165109
Selection Three Features:  0.8107725856697818
Selection Four Features:  0.8186479750778816
Selection Five Features:  0.8166417445482865
Selection Six Features:  0.8277445482866044
Selection Seven Features:  0.835183800623053
Selection Eight Features:  0.8357881619937695
Selection Nine Features:  0.8384361370716512


### Precision Score

In [22]:
print("Precision Score")
print("Selection One Feature: ", pre1)
print("Selection Two Features: ", pre2)
print("Selection Three Features: ", pre3)
print("Selection Four Features: ", pre4)
print("Selection Five Features: ", pre5)
print("Selection Six Features: ", pre6)
print("Selection Seven Features: ", pre7)
print("Selection Eight Features: ", pre8)
print("Selection Nine Features: ", pre9)

Precision Score
Selection One Feature:  0.3842179400586515
Selection Two Features:  0.6035359737740279
Selection Three Features:  0.678355192684428
Selection Four Features:  0.6975135785200803
Selection Five Features:  0.6910304109829688
Selection Six Features:  0.7158241141835674
Selection Seven Features:  0.7296689383919753
Selection Eight Features:  0.7320823703888152
Selection Nine Features:  0.7369565081091228


### Recall Score

In [23]:
print("Recall Score")
print("Selection One Feature: ", rec1)
print("Selection Two Features: ", rec2)
print("Selection Three Features: ", rec3)
print("Selection Four Features: ", rec4)
print("Selection Five Features: ", rec5)
print("Selection Six Features: ", rec6)
print("Selection Seven Features: ", rec7)
print("Selection Eight Features: ", rec8)
print("Selection Nine Features: ", rec9)

Recall Score
Selection One Feature:  0.3776486196730099
Selection Two Features:  0.611717784925102
Selection Three Features:  0.6718668513058758
Selection Four Features:  0.6918710056880789
Selection Five Features:  0.6838527055600225
Selection Six Features:  0.7044987492182614
Selection Seven Features:  0.7116397450787695
Selection Eight Features:  0.7166135828940707
Selection Nine Features:  0.7229412579290628


### Selecting the feature(s)

In [24]:

med_SES = stat.median(X['SES'])
avg_MMSE = X['MMSE'].mean()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.loc[:,'SES'] = X_train.loc[:,'SES'].fillna(med_SES)
X_train.loc[:,'MMSE'] = X_train.loc[:,'MMSE'].fillna(avg_MMSE)

X_test["Group"] = y_test
X_test = X_test.dropna(axis = 0, how='any')

y_test = X_test["Group"]
X_test=X_test.loc[:,X_test.columns!='Group']

clf1 = tree.DecisionTreeClassifier().fit(X_train,y_train)

sfs_forward = SequentialFeatureSelector(
clf1 , n_features_to_select = 6, direction = "forward").fit(X_train,y_train)

X_test.columns[sfs_forward.get_support()]

Index(['Visit', 'MR Delay', 'EDUC', 'SES', 'CDR', 'ASF'], dtype='object')