# Decision Tree Model - Features Selection

In [21]:
import pandas as pd
import numpy as np
import statistics as stat
from sklearn.model_selection import KFold
from sklearn import tree
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.linear_model import RidgeCV
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
import random

pd.options.mode.chained_assignment = None 

random.seed(10)
ad =  pd.read_csv('dataset.csv')

### a glimpse at the data

In [22]:
ad.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [23]:
ad.isnull().values.any()

True

### Grouping CDR 2.0 to 1.0


In [24]:
ad.loc[ad["CDR"] == 2,"CDR"] = 1

In [25]:
ad["Male"] = np.where(ad["M/F"]=="M", 1, 0)
ad["Female"] = np.where(ad["M/F"]=="F", 1, 0)

### Splitting the columns into indepdent and dependent variables

In [26]:
X = ad.iloc[:,[3,4,7,8,9,10,11,12,13,14]]
y = ad.iloc[:,[2]]


In [27]:
ad_X_train, ad_X_test, ad_y_train, ad_y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [28]:
med_SES = ad_X_train['SES'].mean()
avg_MMSE = ad_X_train['MMSE'].mean()

In [29]:
ad_X_train.loc[:,'SES'] = ad_X_train.loc[:,'SES'].fillna(med_SES)
ad_X_train.loc[:,'MMSE'] = ad_X_train.loc[:,'MMSE'].fillna(avg_MMSE)

In [30]:
ad_X_test["Group"] = ad_y_test
ad_X_test = ad_X_test.dropna(axis = 0, how='any')

ad_y_test = ad_X_test["Group"]
ad_X_test=ad_X_test.loc[:,ad_X_test.columns!='Group']


### Defining the function to check the accuracy, precision, and recall scores for Decision Tree

In [31]:
def acc_test(cnt,fold):
    clf = tree.DecisionTreeClassifier(criterion="entropy")
    
    
    kf = KFold(n_splits=fold, random_state=None)
    oversample = RandomOverSampler(sampling_strategy='minority')
    acc_score = []
    precise_score = []
    recall = []
    for train_index, test_index in kf.split(ad_X_train):
        X_train, X_test = ad_X_train.iloc[train_index,:], ad_X_train.iloc[test_index,:]
        y_train, y_test = ad_y_train.iloc[train_index,:], ad_y_train.iloc[test_index,:]

        X_train, y_train = oversample.fit_resample(X_train, y_train)
        



        X_test=X_test.loc[:,X_test.columns!='Group']

        if cnt < 9:
            clf1 = tree.DecisionTreeClassifier().fit(X_train,y_train)

            sfs_forward = SequentialFeatureSelector(
            clf1 , n_features_to_select = cnt, direction = "forward").fit(X_train,y_train)


            clf.fit(X_train[X_train.columns[sfs_forward.get_support()]],y_train)

            y_pred = clf.predict(ad_X_test[ad_X_test.columns[sfs_forward.get_support()]])
        else:
            clf.fit(X_train,y_train)
            y_pred = clf.predict(ad_X_test)


        acc = accuracy_score(ad_y_test, y_pred)
        acc_score.append(acc)

        precise = precision_score(ad_y_test, y_pred, average='macro',zero_division=1)
        precise_score.append(precise)

        rec = recall_score(ad_y_test, y_pred, average = 'macro', zero_division = 1)
        recall.append(rec)        
        
    return acc_score, precise_score, recall
        
    

### Selecting One Feature

In [32]:
a5 = acc_test(1,5)
a10 = acc_test(1,10)
a15 = acc_test(1,15)
a20 = acc_test(1,20)
a25 = acc_test(1,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc1 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc1)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre1 =  (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre1)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec1 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5


print("The average of the recall score: ", rec1 )


Accuracy Scores
0.47102803738317756
0.43738317757009343
0.43302180685358255
0.43177570093457945
0.4325233644859813
The average of the accuracy score:  0.4411464174454829
*******************************
Precision Scores
0.42842524622059763
0.3727714535557151
0.36729884238408317
0.36449613736643105
0.3667380934213491
The average of the precision score:  0.3799459545896352
*******************************
Recall Scores
0.39965603502188873
0.37149111051550077
0.3669317132731767
0.3628651835968909
0.36534351827034756
The average of the recall score:  0.3732575121355609


### Selecting Two Features

In [33]:
a5 = acc_test(2,5)
a10 = acc_test(2,10)
a15 = acc_test(2,15)
a20 = acc_test(2,20)
a25 = acc_test(2,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc2 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc2)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre2 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre2)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec2 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec2 )

Accuracy Scores
0.7757009345794392
0.7728971962616822
0.7713395638629283
0.7672897196261682
0.7708411214953271
The average of the accuracy score:  0.771613707165109
*******************************
Precision Scores
0.5987192450052431
0.6066505577027904
0.6100884636305124
0.5968845745768614
0.6053370279547322
The average of the precision score:  0.6035359737740279
*******************************
Recall Scores
0.6096444206200303
0.614337085678549
0.6173218380535453
0.6045731707317074
0.6127124095416778
The average of the recall score:  0.611717784925102


### Selecting Three Features

In [34]:
a5 = acc_test(3,5)
a10 = acc_test(3,10)
a15 = acc_test(3,15)
a20 = acc_test(3,20)
a25 = acc_test(3,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc3 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc3)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre3 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre3)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec3 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec3 )

Accuracy Scores
0.7682242990654206
0.8214953271028037
0.822429906542056
0.8163551401869159
0.811214953271028
The average of the accuracy score:  0.8079439252336449
*******************************
Precision Scores
0.6345500556221622
0.6915745379030499
0.6938597266685811
0.6831306797353274
0.6802484702532798
The average of the precision score:  0.6766726940364801
*******************************
Recall Scores
0.627155364960243
0.6928683105512373
0.6894428065159772
0.6785490931832395
0.67462074510855
The average of the recall score:  0.6725272640638494


### Selecting Four Features

In [35]:
a5 = acc_test(4,5)
a10 = acc_test(4,10)
a15 = acc_test(4,15)
a20 = acc_test(4,20)
a25 = acc_test(4,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc4 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc4)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre4 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre4)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec4 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec4)

Accuracy Scores
0.805607476635514
0.8102803738317756
0.8130841121495327
0.8238317757009346
0.8082242990654206
The average of the accuracy score:  0.8122056074766355
*******************************
Precision Scores
0.68131814199746
0.6837509470546347
0.6860121794255958
0.7049789063225241
0.6842313635698496
The average of the precision score:  0.6880583076740129
*******************************
Recall Scores
0.6729473778254267
0.6801483069775753
0.6818860001786831
0.6966407576163673
0.6785660680782631
The average of the recall score:  0.682037702135263


### Selecting Five Features

In [36]:
a5 = acc_test(5,5)
a10 = acc_test(5,10)
a15 = acc_test(5,15)
a20 = acc_test(5,20)
a25 = acc_test(5,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc5 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc5)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre5 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre5)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec5 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec5)

Accuracy Scores
0.8299065420560747
0.8383177570093457
0.8218068535825545
0.8130841121495327
0.8295327102803738
The average of the accuracy score:  0.8265295950155762
*******************************
Precision Scores
0.711093969008043
0.7332577548418076
0.7047010946149654
0.6925405996055615
0.7174820845299131
The average of the precision score:  0.7118151005200581
*******************************
Recall Scores
0.6989815062985795
0.7132582864290181
0.7005003126954346
0.6876250781738587
0.7070687036540695
The average of the recall score:  0.7014867774501921


### Selecting Six Features

In [37]:
a5 = acc_test(6,5)
a10 = acc_test(6,10)
a15 = acc_test(6,15)
a20 = acc_test(6,20)
a25 = acc_test(6,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc6 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc6)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre6 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre6)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec6 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec6)

Accuracy Scores
0.8317757009345794
0.8327102803738318
0.8249221183800622
0.8257009345794393
0.8336448598130841
The average of the accuracy score:  0.8297507788161994
*******************************
Precision Scores
0.7133141890579635
0.7003005067484702
0.7088420644762161
0.7182711479846295
0.7281515139036292
The average of the precision score:  0.7137758844341817
*******************************
Recall Scores
0.6918877870097382
0.694096756901635
0.6955627028797761
0.7073204234789601
0.7126811399982131
The average of the recall score:  0.7003097620536645


### Selecting Seven Features

In [38]:
a5 = acc_test(7,5)
a10 = acc_test(7,10)
a15 = acc_test(7,15)
a20 = acc_test(7,20)
a25 = acc_test(7,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc7 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc7 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre7 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre7)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec7 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec7)

Accuracy Scores
0.8317757009345794
0.8401869158878504
0.843613707165109
0.8345794392523365
0.8351401869158879
The average of the accuracy score:  0.8370591900311526
*******************************
Precision Scores
0.7143232210571183
0.7320177109650859
0.7417896181197806
0.7323034891468441
0.7266216849456024
The average of the precision score:  0.7294111448468863
*******************************
Recall Scores
0.701639417493076
0.7126284284820871
0.7182226987105036
0.7122721790404717
0.7036719378182793
The average of the recall score:  0.7096869323088836


### Selecting Eight Features

In [39]:
a5 = acc_test(8,5)
a10 = acc_test(8,10)
a15 = acc_test(8,15)
a20 = acc_test(8,20)
a25 = acc_test(8,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc8 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc8 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre8 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre8 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec8 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec8)

Accuracy Scores
0.8373831775700934
0.8317757009345794
0.8348909657320872
0.8387850467289719
0.834018691588785
The average of the accuracy score:  0.8353707165109034
*******************************
Precision Scores
0.7365780838455257
0.7234771429196747
0.7254638042471936
0.7416887388686961
0.7264367780485353
The average of the precision score:  0.7307289095859252
*******************************
Recall Scores
0.7159251317787904
0.7099950862145984
0.7159623574257721
0.7266383007236665
0.7113999821316894
The average of the recall score:  0.7159841716549035


### Selecting Nine Features

In [40]:
a5 = acc_test(9,5)
a10 = acc_test(9,10)
a15 = acc_test(9,15)
a20 = acc_test(9,20)
a25 = acc_test(9,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc9 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc9 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre9 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre9 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec9 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec9)

Accuracy Scores
0.8448598130841122
0.8336448598130841
0.8336448598130841
0.8355140186915888
0.8366355140186915
The average of the accuracy score:  0.8368598130841122
*******************************
Precision Scores
0.7411653821343743
0.7299124153334904
0.7294300837819985
0.7359499098864462
0.7343763513441323
The average of the precision score:  0.7341668284960883
*******************************
Recall Scores
0.7116456714017689
0.7142410435093361
0.719632806218172
0.7219668542839275
0.722448851961047
The average of the recall score:  0.7179870454748504


## Scorecard

### Accuracy Score

In [41]:
print("Accuracy Score")
print("Selection One Feature: ", acc1)
print("Selection Two Features: ", acc2)
print("Selection Three Features: ", acc3)
print("Selection Four Features: ", acc4)
print("Selection Five Features: ", acc5)
print("Selection Six Features: ", acc6)
print("Selection Seven Features: ", acc7)
print("Selection Eight Features: ", acc8)
print("Selection Nine Features: ", acc9)


Accuracy Score
Selection One Feature:  0.4411464174454829
Selection Two Features:  0.771613707165109
Selection Three Features:  0.8079439252336449
Selection Four Features:  0.8122056074766355
Selection Five Features:  0.8265295950155762
Selection Six Features:  0.8297507788161994
Selection Seven Features:  0.8370591900311526
Selection Eight Features:  0.8353707165109034
Selection Nine Features:  0.8368598130841122


### Precision Score

In [42]:
print("Precision Score")
print("Selection One Feature: ", pre1)
print("Selection Two Features: ", pre2)
print("Selection Three Features: ", pre3)
print("Selection Four Features: ", pre4)
print("Selection Five Features: ", pre5)
print("Selection Six Features: ", pre6)
print("Selection Seven Features: ", pre7)
print("Selection Eight Features: ", pre8)
print("Selection Nine Features: ", pre9)

Precision Score
Selection One Feature:  0.3799459545896352
Selection Two Features:  0.6035359737740279
Selection Three Features:  0.6766726940364801
Selection Four Features:  0.6880583076740129
Selection Five Features:  0.7118151005200581
Selection Six Features:  0.7137758844341817
Selection Seven Features:  0.7294111448468863
Selection Eight Features:  0.7307289095859252
Selection Nine Features:  0.7341668284960883


### Recall Score

In [43]:
print("Recall Score")
print("Selection One Feature: ", rec1)
print("Selection Two Features: ", rec2)
print("Selection Three Features: ", rec3)
print("Selection Four Features: ", rec4)
print("Selection Five Features: ", rec5)
print("Selection Six Features: ", rec6)
print("Selection Seven Features: ", rec7)
print("Selection Eight Features: ", rec8)
print("Selection Nine Features: ", rec9)

Recall Score
Selection One Feature:  0.3732575121355609
Selection Two Features:  0.611717784925102
Selection Three Features:  0.6725272640638494
Selection Four Features:  0.682037702135263
Selection Five Features:  0.7014867774501921
Selection Six Features:  0.7003097620536645
Selection Seven Features:  0.7096869323088836
Selection Eight Features:  0.7159841716549035
Selection Nine Features:  0.7179870454748504


### Selecting the feature(s)

In [47]:

med_SES = stat.median(X['SES'])
avg_MMSE = X['MMSE'].mean()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.loc[:,'SES'] = X_train.loc[:,'SES'].fillna(med_SES)
X_train.loc[:,'MMSE'] = X_train.loc[:,'MMSE'].fillna(avg_MMSE)

X_test["Group"] = y_test
X_test = X_test.dropna(axis = 0, how='any')

y_test = X_test["Group"]
X_test=X_test.loc[:,X_test.columns!='Group']

clf1 = tree.DecisionTreeClassifier().fit(X_train,y_train)

sfs_forward = SequentialFeatureSelector(
clf1 , n_features_to_select = 6, direction = "forward").fit(X_train,y_train)

X_test.columns[sfs_forward.get_support()]

Index(['Visit', 'EDUC', 'SES', 'MMSE', 'CDR', 'nWBV'], dtype='object')