# Decision Tree Model

In [19]:
import pandas as pd
import numpy as np
import statistics as stat
from sklearn.model_selection import KFold
from sklearn import tree
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.linear_model import RidgeCV
from imblearn.over_sampling import RandomOverSampler

pd.options.mode.chained_assignment = None 


ad =  pd.read_csv('dataset.csv')

### a glimpse at the data

In [20]:
ad.head()
#https://scikit-learn.org/stable/auto_examples/feature_selection/plot_select_from_model_diabetes.html#sphx-glr-auto-examples-feature-selection-plot-select-from-model-diabetes-py

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [21]:
ad.isnull().values.any()

True

### Grouping CDR 2.0 to 1.0


In [22]:
ad.loc[ad["CDR"] == 2,"CDR"] = 1

### Splitting the columns into indepdent and dependent variables

In [23]:
X = ad.iloc[:,[3,4,7,8,9,10,11,12,13,14]]
y = ad.iloc[:,[2]]


### Defining the function to check the accuracy, precision, and recall scores for Decision Tree

In [24]:
def acc_test(cnt,fold):
    clf = tree.DecisionTreeClassifier()
    
    
    kf = KFold(n_splits=fold, random_state=4, shuffle = True)
    oversample = RandomOverSampler(sampling_strategy='minority')
    med_SES = stat.median(X['SES'])
    avg_MMSE = X['MMSE'].mean()
    acc_score = []
    precise_score = []
    recall = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]
        y_train, y_test = y.iloc[train_index,:], y.iloc[test_index,:]

        X_train, y_train = oversample.fit_resample(X_train, y_train)
        

        # impute missing values for training set
        X_train.loc[:,'SES'] = X_train.loc[:,'SES'].fillna(med_SES)
        X_train.loc[:,'MMSE'] = X_train.loc[:,'MMSE'].fillna(avg_MMSE)


        #Complete case for test set
        X_test["Group"] = y_test
        X_test = X_test.dropna(axis = 0, how='any')

        y_test = X_test["Group"]
        X_test=X_test.loc[:,X_test.columns!='Group']

        if cnt < 9:
            clf1 = tree.DecisionTreeClassifier().fit(X_train,y_train)

            sfs_forward = SequentialFeatureSelector(
            clf1 , n_features_to_select = cnt, direction = "forward").fit(X_train,y_train)


            clf.fit(X_train[X_train.columns[sfs_forward.get_support()]],y_train)

            y_pred = clf.predict(X_test[X_test.columns[sfs_forward.get_support()]])
        else:
            clf.fit(X_train,y_train)
            y_pred = clf.predict(X_test)


        acc = accuracy_score(y_test, y_pred)
        acc_score.append(acc)

        precise = precision_score(y_test, y_pred, average='macro',zero_division=1)
        precise_score.append(precise)

        rec = recall_score(y_test, y_pred, average = 'macro', zero_division = 1)
        recall.append(rec)        
        
    return acc_score, precise_score, recall
        
    

### Selecting One Feature

In [25]:
a5 = acc_test(1,5)
a10 = acc_test(1,10)
a15 = acc_test(1,15)
a20 = acc_test(1,20)
a25 = acc_test(1,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc1 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc1)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre1 =  (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre1)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec1 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5


print("The average of the recall score: ", rec1 )


Accuracy Scores
0.5030773917856113
0.4555299361366474
0.4898317962231006
0.47920859133126936
0.48403729603729606
The average of the accuracy score:  0.482337002302785
*******************************
Precision Scores
0.4659062214924284
0.3929114295290766
0.4042702359369026
0.43832611832611834
0.458973544973545
The average of the precision score:  0.43207751005161416
*******************************
Recall Scores
0.44725823769626827
0.39685772761859717
0.4376512666708745
0.4418001443001443
0.45617003367003367
The average of the recall score:  0.4359474819911836


### Selecting Two Features

In [26]:
a5 = acc_test(2,5)
a10 = acc_test(2,10)
a15 = acc_test(2,15)
a20 = acc_test(2,20)
a25 = acc_test(2,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc2 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc2)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre2 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre2)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec2 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec2 )

Accuracy Scores
0.7819610906440841
0.7884140623624295
0.810491875274484
0.8161005761953904
0.8234871794871795
The average of the accuracy score:  0.8040909567927136
*******************************
Precision Scores
0.5968439673989014
0.6322486287636375
0.6376847390082684
0.6886069486069486
0.6868074888074888
The average of the precision score:  0.6484383545170489
*******************************
Recall Scores
0.5938185462090386
0.6117513565339653
0.6678377396024455
0.6724551374551374
0.6992217412217412
The average of the recall score:  0.6490169042044656


### Selecting Three Features

In [27]:
a5 = acc_test(3,5)
a10 = acc_test(3,10)
a15 = acc_test(3,15)
a20 = acc_test(3,20)
a25 = acc_test(3,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc3 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc3)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre3 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre3)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec3 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec3 )

Accuracy Scores
0.8533073917486362
0.8529691724653274
0.834935441370224
0.8216905314757482
0.8524222444222445
The average of the accuracy score:  0.8430649562964361
*******************************
Precision Scores
0.7282959070699764
0.718451992150673
0.7129150733072301
0.6889808802308802
0.7561350501350501
The average of the precision score:  0.720955780578762
*******************************
Recall Scores
0.7249541641867618
0.729823698845438
0.6907424057424058
0.6831499981499981
0.7237402597402597
The average of the recall score:  0.7104821053329726


### Selecting Four Features

In [28]:
a5 = acc_test(4,5)
a10 = acc_test(4,10)
a15 = acc_test(4,15)
a20 = acc_test(4,20)
a25 = acc_test(4,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc4 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc4)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre4 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre4)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec4 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec4)

Accuracy Scores
0.8470337700083338
0.867151271343821
0.8482889767237594
0.8275
0.8441758241758242
The average of the accuracy score:  0.8468299684503476
*******************************
Precision Scores
0.7186847988944136
0.7791532274267164
0.747071780797271
0.7014186507936507
0.7297777777777777
The average of the precision score:  0.7352212471379659
*******************************
Recall Scores
0.7185349521054105
0.7545807453416149
0.709711341672126
0.6860656473156473
0.7375079365079366
The average of the recall score:  0.7212801245885471


### Selecting Five Features

In [29]:
a5 = acc_test(5,5)
a10 = acc_test(5,10)
a15 = acc_test(5,15)
a20 = acc_test(5,20)
a25 = acc_test(5,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc5 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc5)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre5 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre5)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec5 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec5)

Accuracy Scores
0.8356971789050831
0.8636427429840993
0.8558928414580589
0.8399324905400757
0.8635651015651016
The average of the accuracy score:  0.8517460710904838
*******************************
Precision Scores
0.692861034467205
0.7365843605998404
0.7487779540720717
0.7277961621711622
0.7822136012136012
The average of the precision score:  0.7376466225047761
*******************************
Recall Scores
0.7048152269323746
0.7246848401196228
0.7235199803827255
0.7160690235690236
0.7449206349206349
The average of the recall score:  0.7228019411848762


### Selecting Six Features

In [30]:
a5 = acc_test(6,5)
a10 = acc_test(6,10)
a15 = acc_test(6,15)
a20 = acc_test(6,20)
a25 = acc_test(6,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc6 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc6)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre6 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre6)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec6 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec6)

Accuracy Scores
0.8212986384985544
0.8471215579335024
0.8169038208168643
0.8417412710698314
0.8655644355644356
The average of the accuracy score:  0.8385259447766377
*******************************
Precision Scores
0.6507733284078664
0.705981370150168
0.662594433525806
0.7287382062382062
0.8117763347763348
The average of the precision score:  0.7119727346196763
*******************************
Recall Scores
0.6545364553157422
0.6806611021828413
0.693375165139871
0.7168588818588819
0.7677878787878788
The average of the recall score:  0.7026438966570431


### Selecting Seven Features

In [31]:
a5 = acc_test(7,5)
a10 = acc_test(7,10)
a15 = acc_test(7,15)
a20 = acc_test(7,20)
a25 = acc_test(7,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc7 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc7 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre7 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre7)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec7 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec7)

Accuracy Scores
0.8386613130443276
0.8560373046570989
0.8387422046552482
0.8472314671482628
0.8588911088911089
The average of the accuracy score:  0.8479126796792095
*******************************
Precision Scores
0.6954492337825671
0.7766025227209438
0.6985212935212936
0.745522903022903
0.7753088393088393
The average of the precision score:  0.7382809584713093
*******************************
Recall Scores
0.7023125475145848
0.7209265010351967
0.6992641164209792
0.7173735523735524
0.7496190476190476
The average of the recall score:  0.7178991529926722


### Selecting Eight Features

In [32]:
a5 = acc_test(8,5)
a10 = acc_test(8,10)
a15 = acc_test(8,15)
a20 = acc_test(8,20)
a25 = acc_test(8,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc8 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc8 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre8 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre8 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec8 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec8)

Accuracy Scores
0.8500876488368605
0.8463471104038367
0.811590689503733
0.8453220674234606
0.8584075924075925
The average of the accuracy score:  0.8423510217150966
*******************************
Precision Scores
0.7068703242354253
0.7360792653052715
0.6589368039368039
0.7771801346801347
0.769089947089947
The average of the precision score:  0.7296312950495165
*******************************
Recall Scores
0.6909337698302045
0.7516803607020999
0.6836697289638466
0.7145815295815295
0.777079365079365
The average of the recall score:  0.7235889508314092


### Selecting Nine(All) Features

In [33]:
a5 = acc_test(9,5)
a10 = acc_test(9,10)
a15 = acc_test(9,15)
a20 = acc_test(9,20)
a25 = acc_test(9,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc9 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc9 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre9 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre9 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec9 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec9)

Accuracy Scores
0.8296033878859189
0.861022978150712
0.8276319718928414
0.8429112917096663
0.8660825840825841
The average of the accuracy score:  0.8454504427443446
*******************************
Precision Scores
0.6782451022833043
0.6974975955417132
0.6736009977676645
0.678531746031746
0.767994708994709
The average of the precision score:  0.6991740301238274
*******************************
Recall Scores
0.6778471327537542
0.6939670997279693
0.7001450220077671
0.7084311521811522
0.7751375661375661
The average of the recall score:  0.7111055945616418


## Scorecard

### Accuracy Score

In [34]:
print("Accuracy Score")
print("Selection One Feature: ", acc1)
print("Selection Two Features: ", acc2)
print("Selection Three Features: ", acc3)
print("Selection Four Features: ", acc4)
print("Selection Five Features: ", acc5)
print("Selection Six Features: ", acc6)
print("Selection Seven Features: ", acc7)
print("Selection Eight Features: ", acc8)
print("Selection Nine Features: ", acc9)


Accuracy Score
Selection One Feature:  0.482337002302785
Selection Two Features:  0.8040909567927136
Selection Three Features:  0.8430649562964361
Selection Four Features:  0.8468299684503476
Selection Five Features:  0.8517460710904838
Selection Six Features:  0.8385259447766377
Selection Seven Features:  0.8479126796792095
Selection Eight Features:  0.8423510217150966
Selection Nine Features:  0.8454504427443446


### Precision Score

In [35]:
print("Precision Score")
print("Selection One Feature: ", pre1)
print("Selection Two Features: ", pre2)
print("Selection Three Features: ", pre3)
print("Selection Four Features: ", pre4)
print("Selection Five Features: ", pre5)
print("Selection Six Features: ", pre6)
print("Selection Seven Features: ", pre7)
print("Selection Eight Features: ", pre8)
print("Selection Nine Features: ", pre9)

Precision Score
Selection One Feature:  0.43207751005161416
Selection Two Features:  0.6484383545170489
Selection Three Features:  0.720955780578762
Selection Four Features:  0.7352212471379659
Selection Five Features:  0.7376466225047761
Selection Six Features:  0.7119727346196763
Selection Seven Features:  0.7382809584713093
Selection Eight Features:  0.7296312950495165
Selection Nine Features:  0.6991740301238274


### Recall Score

In [36]:
print("Recall Score")
print("Selection One Feature: ", rec1)
print("Selection Two Features: ", rec2)
print("Selection Three Features: ", rec3)
print("Selection Four Features: ", rec4)
print("Selection Five Features: ", rec5)
print("Selection Six Features: ", rec6)
print("Selection Seven Features: ", rec7)
print("Selection Eight Features: ", rec8)
print("Selection Nine Features: ", rec9)

Recall Score
Selection One Feature:  0.4359474819911836
Selection Two Features:  0.6490169042044656
Selection Three Features:  0.7104821053329726
Selection Four Features:  0.7212801245885471
Selection Five Features:  0.7228019411848762
Selection Six Features:  0.7026438966570431
Selection Seven Features:  0.7178991529926722
Selection Eight Features:  0.7235889508314092
Selection Nine Features:  0.7111055945616418


### Selecting the feature(s)

In [37]:
from sklearn.model_selection import train_test_split
med_SES = stat.median(X['SES'])
avg_MMSE = X['MMSE'].mean()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.loc[:,'SES'] = X_train.loc[:,'SES'].fillna(med_SES)
X_train.loc[:,'MMSE'] = X_train.loc[:,'MMSE'].fillna(avg_MMSE)

X_test["Group"] = y_test
X_test = X_test.dropna(axis = 0, how='any')

y_test = X_test["Group"]
X_test=X_test.loc[:,X_test.columns!='Group']

clf1 = tree.DecisionTreeClassifier().fit(X_train,y_train)

sfs_forward = SequentialFeatureSelector(
clf1 , n_features_to_select = 5, direction = "forward").fit(X_train,y_train)

X_test.columns[sfs_forward.get_support()]

Index(['Visit', 'MR Delay', 'SES', 'CDR', 'ASF'], dtype='object')