# Linear Regression Model

In [54]:
import pandas as pd
import numpy as np
import statistics as stat
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SequentialFeatureSelector
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.linear_model import RidgeCV
from imblearn.over_sampling import RandomOverSampler

pd.options.mode.chained_assignment = None 


ad =  pd.read_csv('dataset.csv')

### A glimpse at the data

In [55]:
ad.head()
#https://scikit-learn.org/stable/auto_examples/feature_selection/plot_select_from_model_diabetes.html#sphx-glr-auto-examples-feature-selection-plot-select-from-model-diabetes-py

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [56]:
ad.isnull().values.any()

True

### Grouping CDR 2.0 to 1.0


In [57]:
ad.loc[ad["CDR"] == 2,"CDR"] = 1

In [58]:
#ad = ad.dropna()

### Splitting the columns into indepdent and dependent variables

In [59]:
X = ad.iloc[:,[3,4,7,8,9,10,12,13,14]]
y = ad.iloc[:,[11]]

y=y.astype(str)


### Normalizing the values

In [60]:
def normalize(x):
    return (x-min(x))/((max(x)) - min(x))

In [61]:
X.loc[:,['Visit']]=pd.DataFrame(normalize(X['Visit']))
X.loc[:,['MR Delay']]=pd.DataFrame(normalize(X['MR Delay']))
X.loc[:,['Age']]=pd.DataFrame(normalize(X['Age']))
X.loc[:,['EDUC']]=pd.DataFrame(normalize(X['EDUC']))
X.loc[:,['SES']]=pd.DataFrame(normalize(X['SES']))
X.loc[:,['MMSE']]=pd.DataFrame(normalize(X['MMSE']))
X.loc[:,['eTIV']]=pd.DataFrame(normalize(X['eTIV']))
X.loc[:,['nWBV']]=pd.DataFrame(normalize(X['nWBV']))
X.loc[:,['ASF']]=pd.DataFrame(normalize(X['ASF']))

### Defining the function to check the accuracty, precise, and recall scores for number of features selected for Linear Regression

In [62]:
def acc_test(cnt,fold):
    regressor = LinearRegression()
    
    kf = KFold(n_splits=fold, random_state=None)
    oversample = RandomOverSampler(sampling_strategy='minority')
    med_SES = stat.median(X['SES'])
    avg_MMSE = X['MMSE'].mean()
    acc_score = []
    precise_score = []
    recall = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X.iloc[train_index,:], X.iloc[test_index,:]
        y_train, y_test = y.iloc[train_index,:], y.iloc[test_index,:]

        X_train, y_train = oversample.fit_resample(X_train, y_train)
        

        # impute missing values for training set
        X_train.loc[:,'SES'] = X_train.loc[:,'SES'].fillna(med_SES)
        X_train.loc[:,'MMSE'] = X_train.loc[:,'MMSE'].fillna(avg_MMSE)

        y_train = y_train.astype(float)
        y_train.loc[:,'CDR'] = y_train.loc[:,'CDR'].fillna(stat.median(y_train['CDR']))
        y_train = y_train.astype(str)

       

        #Complete case for test set
        X_test["CDR"] = y_test
        X_test = X_test.dropna(axis = 0, how='any')

        y_test = X_test["CDR"]
        X_test=X_test.loc[:,X_test.columns!='CDR']

        ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X_train,y_train)

        
        if cnt < 9:
            sfs_forward = SequentialFeatureSelector(
            ridge , n_features_to_select = cnt, direction = "forward").fit(X_train,y_train)

            regressor.fit(X_train[X_train.columns[sfs_forward.get_support()]],y_train)

            y_pred = regressor.predict(X_test[X_test.columns[sfs_forward.get_support()]])
        else:
            regressor.fit(X_train,y_train)
            y_pred = regressor.predict(X_test)
            
        new_y_pred = np.around(y_pred * 2.0) / 2.0
        new_y_pred = new_y_pred.astype(str)
        y_test = y_test.astype(str)

        acc = accuracy_score(y_test, new_y_pred)
        acc_score.append(acc)

        precise = precision_score(y_test, new_y_pred, average='macro',zero_division=1)
        precise_score.append(precise)

        rec = recall_score(y_test, new_y_pred, average = 'macro', zero_division = 1)
        recall.append(rec)

        
    return acc_score, precise_score, recall
    
    
        
        
    

### Selecting One Feature

In [63]:
a5 = acc_test(1,5)
a10 = acc_test(1,10)
a15 = acc_test(1,15)
a20 = acc_test(1,20)
a25 = acc_test(1,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc1 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc1)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre1 =  (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre1)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec1 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5


print("The average of the recall score: ", rec1 )

Accuracy Scores
0.5390651467696261
0.554031954887218
0.5388482074752098
0.5738745666957741
0.5526216006216006
The average of the accuracy score:  0.5516882952898857
*******************************
Precision Scores
0.5965259247096182
0.6318483509954098
0.6746806945491156
0.6474834887334887
0.683061938061938
The average of the precision score:  0.646720079409914
*******************************
Recall Scores
0.6235011348712927
0.5928183718586195
0.5907788290141232
0.5987381137381137
0.6146005291005291
The average of the recall score:  0.6040873957165356


### Selecting Two Features

In [64]:
a5 = acc_test(2,5)
a10 = acc_test(2,10)
a15 = acc_test(2,15)
a20 = acc_test(2,20)
a25 = acc_test(2,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc2 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc2)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre2 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre2)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec2 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec2 )

Accuracy Scores
0.5190462715210922
0.533468045112782
0.5648825324180016
0.5214912280701754
0.5556352536352537
The average of the accuracy score:  0.5389046661514609
*******************************
Precision Scores
0.5710927998476396
0.5999567240191548
0.6179220868952551
0.6135928885928886
0.6467804232804233
The average of the precision score:  0.6098689845270723
*******************************
Recall Scores
0.6102587977995363
0.5457129188321139
0.638417410476234
0.5839782902282902
0.6261216931216931
The average of the recall score:  0.6008978220915735


### Selecting Three Features

In [65]:
a5 = acc_test(3,5)
a10 = acc_test(3,10)
a15 = acc_test(3,15)
a20 = acc_test(3,20)
a25 = acc_test(3,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc3 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc3)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre3 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5


print("The average of the precision score: ", pre3)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec3 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec3 )

Accuracy Scores
0.570693262889444
0.5916827118471856
0.5480404271548437
0.5640340127278982
0.5294616494616494
The average of the accuracy score:  0.5607824128162042
*******************************
Precision Scores
0.5653449208830316
0.6025410467622744
0.6000998881674836
0.5897783697783698
0.6135939615939616
The average of the precision score:  0.5942716374370243
*******************************
Recall Scores
0.6218319177906507
0.5971221236623714
0.6056423424070483
0.6024023892773893
0.5989538239538239
The average of the recall score:  0.6051905194182566


### Selecting Four Features

In [66]:
a5 = acc_test(4,5)
a10 = acc_test(4,10)
a15 = acc_test(4,15)
a20 = acc_test(4,20)
a25 = acc_test(4,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc4 =  (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc4)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre4 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre4)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec4 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec4)

Accuracy Scores
0.5596754662265405
0.564209002235318
0.5483429951690821
0.5555867303061576
0.5535207015207015
The average of the accuracy score:  0.55626697909156
*******************************
Precision Scores
0.5575563146610769
0.5544406856759798
0.6252082684925823
0.5683989297739298
0.593581178081178
The average of the precision score:  0.5798370753369494
*******************************
Recall Scores
0.6363532036691442
0.5866563742128758
0.6178716871363931
0.6088193288193288
0.6081955266955267
The average of the recall score:  0.6115792241066537


### Selecting Five Features

In [67]:
a5 = acc_test(5,5)
a10 = acc_test(5,10)
a15 = acc_test(5,15)
a20 = acc_test(5,20)
a25 = acc_test(5,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc5 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc5)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre5 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5
print("The average of the precision score: ", pre5)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec5 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec5)

Accuracy Scores
0.5529654684686554
0.5651770473480999
0.5500172896008136
0.5419478182636077
0.5368589188589189
The average of the accuracy score:  0.5493933085080192
*******************************
Precision Scores
0.5385799524480184
0.5666046659396201
0.619512395737886
0.5994018944018944
0.5987127687127687
The average of the precision score:  0.5845623354480376
*******************************
Recall Scores
0.6463950566676535
0.5932503918796488
0.64043514764103
0.5886281773781774
0.6048815536315536
The average of the recall score:  0.6147180654396127


### Selecting Six Features

In [68]:
a5 = acc_test(6,5)
a10 = acc_test(6,10)
a15 = acc_test(6,15)
a20 = acc_test(6,20)
a25 = acc_test(6,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc6 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc6)

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre6 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre6)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec6 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5
print("The average of the recall score: ", rec6)

Accuracy Scores
0.5669959464144435
0.5588061369640317
0.538090007627765
0.5457179784869414
0.5318594738594739
The average of the accuracy score:  0.5482939086705311
*******************************
Precision Scores
0.5317100598828681
0.5413201432164028
0.6150929825439629
0.6021392958892958
0.5909701779701779
The average of the precision score:  0.5762465319005414
*******************************
Recall Scores
0.6790220673635308
0.6320247170556768
0.6293582125935067
0.5936959452584453
0.5958299663299663
The average of the recall score:  0.6259861817202251


### Selecting Seven Features

In [69]:
a5 = acc_test(7,5)
a10 = acc_test(7,10)
a15 = acc_test(7,15)
a20 = acc_test(7,20)
a25 = acc_test(7,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc7 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc7 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre7 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre7)

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec7 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec7)

Accuracy Scores
0.5509579855339147
0.5281472261735419
0.5367388761759471
0.5294350837500992
0.5419940059940059
The average of the accuracy score:  0.5374546355255018
*******************************
Precision Scores
0.48401812049157494
0.5019355825019248
0.5985278456111789
0.5601969095719096
0.5823520923520924
The average of the precision score:  0.5454061101057361
*******************************
Recall Scores
0.6708608949641759
0.5690146315243064
0.6024858012358012
0.5894712347837348
0.5924031986531987
The average of the recall score:  0.6048471522322434


### Selecting Eight Features

In [70]:
a5 = acc_test(8,5)
a10 = acc_test(8,10)
a15 = acc_test(8,15)
a20 = acc_test(8,20)
a25 = acc_test(8,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc8 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc8 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre8 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre8 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec8 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec8)

Accuracy Scores
0.5266493865791155
0.5162409825238773
0.5228563437579455
0.5258395682834537
0.5028338328338329
The average of the accuracy score:  0.518884022795645
*******************************
Precision Scores
0.463699739022932
0.4948809036680109
0.5998302639530709
0.5543877765752766
0.5614493746993747
The average of the precision score:  0.5348496115837331
*******************************
Recall Scores
0.6794658170214313
0.6430738260420923
0.5968480974363327
0.5817290579790579
0.5754415584415584
The average of the recall score:  0.6153116713840946


### Selecting nine (all) features

In [71]:
a5 = acc_test(9,5)
a10 = acc_test(9,10)
a15 = acc_test(9,15)
a20 = acc_test(9,20)
a25 = acc_test(9,25)

a5_acc = stat.mean(a5[0])
a10_acc = stat.mean(a10[0])
a15_acc = stat.mean(a15[0])
a20_acc = stat.mean(a20[0])
a25_acc = stat.mean(a25[0])

print("Accuracy Scores")
print(a5_acc)
print(a10_acc)
print(a15_acc)
print(a20_acc)
print(a25_acc)

acc9 = (a5_acc + a10_acc + a15_acc + a20_acc + a25_acc) / 5

print("The average of the accuracy score: ", acc9 )

print("*******************************")
print("Precision Scores")
a5_prec = stat.mean(a5[1])
a10_prec = stat.mean(a10[1])
a15_prec = stat.mean(a15[1])
a20_prec = stat.mean(a20[1])
a25_prec = stat.mean(a25[1])

print(a5_prec)
print(a10_prec)
print(a15_prec)
print(a20_prec)
print(a25_prec)

pre9 = (a5_prec + a10_prec + a15_prec + a20_prec + a25_prec) / 5

print("The average of the precision score: ", pre9 )

print("*******************************")
print("Recall Scores")
a5_rec = stat.mean(a5[2])
a10_rec = stat.mean(a10[2])
a15_rec = stat.mean(a15[2])
a20_rec = stat.mean(a20[2])
a25_rec = stat.mean(a25[2])

print(a5_rec)
print(a10_rec)
print(a15_rec)
print(a20_rec)
print(a25_rec)

rec9 = (a5_rec + a10_rec + a15_rec + a20_rec + a25_rec) / 5

print("The average of the recall score: ", rec9)

Accuracy Scores
0.511209462909791
0.49174875533428164
0.519256038647343
0.5227317681458549
0.4999180819180819
The average of the accuracy score:  0.5089728213910705
*******************************
Precision Scores
0.42542501197989
0.4679024376879885
0.5846354311705189
0.5657124703999704
0.5358921356421357
The average of the precision score:  0.5159134973761007
*******************************
Recall Scores
0.6538328469028462
0.5575311819816464
0.5955445752504576
0.5835745504495504
0.5613494468494469
The average of the recall score:  0.5903665202867895


## Scorecard

### Accuracy Score

In [72]:
print("Accuracy Score")
print("Selection One Feature: ", acc1)
print("Selection Two Features: ", acc2)
print("Selection Three Features: ", acc3)
print("Selection Four Features: ", acc4)
print("Selection Five Features: ", acc5)
print("Selection Six Features: ", acc6)
print("Selection Seven Features: ", acc7)
print("Selection Eight Features: ", acc8)
print("Selection Nine Features: ", acc9)


Accuracy Score
Selection One Feature:  0.5516882952898857
Selection Two Features:  0.5389046661514609
Selection Three Features:  0.5607824128162042
Selection Four Features:  0.55626697909156
Selection Five Features:  0.5493933085080192
Selection Six Features:  0.5482939086705311
Selection Seven Features:  0.5374546355255018
Selection Eight Features:  0.518884022795645
Selection Nine Features:  0.5089728213910705


### Precision Score

In [73]:
print("Precision Score")
print("Selection One Feature: ", pre1)
print("Selection Two Features: ", pre2)
print("Selection Three Features: ", pre3)
print("Selection Four Features: ", pre4)
print("Selection Five Features: ", pre5)
print("Selection Six Features: ", pre6)
print("Selection Seven Features: ", pre7)
print("Selection Eight Features: ", pre8)
print("Selection Nine Features: ", pre9)

Precision Score
Selection One Feature:  0.646720079409914
Selection Two Features:  0.6098689845270723
Selection Three Features:  0.5942716374370243
Selection Four Features:  0.5798370753369494
Selection Five Features:  0.5845623354480376
Selection Six Features:  0.5762465319005414
Selection Seven Features:  0.5454061101057361
Selection Eight Features:  0.5348496115837331
Selection Nine Features:  0.5159134973761007


### Recall Score

In [74]:
print("Recall Score")
print("Selection One Feature: ", rec1)
print("Selection Two Features: ", rec2)
print("Selection Three Features: ", rec3)
print("Selection Four Features: ", rec4)
print("Selection Five Features: ", rec5)
print("Selection Six Features: ", rec6)
print("Selection Seven Features: ", rec7)
print("Selection Eight Features: ", rec8)
print("Selection Nine Features: ", rec9)

Recall Score
Selection One Feature:  0.6040873957165356
Selection Two Features:  0.6008978220915735
Selection Three Features:  0.6051905194182566
Selection Four Features:  0.6115792241066537
Selection Five Features:  0.6147180654396127
Selection Six Features:  0.6259861817202251
Selection Seven Features:  0.6048471522322434
Selection Eight Features:  0.6153116713840946
Selection Nine Features:  0.5903665202867895


### Selecing the feature(s)

In [76]:
from sklearn.model_selection import train_test_split
med_SES = stat.median(X['SES'])
avg_MMSE = X['MMSE'].mean()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train.loc[:,'SES'] = X_train.loc[:,'SES'].fillna(med_SES)
X_train.loc[:,'MMSE'] = X_train.loc[:,'MMSE'].fillna(avg_MMSE)

y_train = y_train.astype(float)
y_train.loc[:,'CDR'] = y_train.loc[:,'CDR'].fillna(stat.median(y_train['CDR']))
y_train = y_train.astype(str)

ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X_train,y_train)

sfs_forward = SequentialFeatureSelector(
ridge , n_features_to_select = 4, direction = "forward").fit(X_train,y_train)

X_test.columns[sfs_forward.get_support()]

Index(['MR Delay', 'Age', 'MMSE', 'nWBV'], dtype='object')