In [1]:
import numpy as np
import csv
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Lasso
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from sklearn import linear_model
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
from sklearn.metrics.pairwise import sigmoid_kernel, rbf_kernel, polynomial_kernel
from sklearn import neighbors
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict


## 1. Load Data

In [2]:
# Load the features
X = np.loadtxt(open("AD_MRI_DATA/1-CN_AD/1.3-MRI_Volume.csv","rb"),delimiter=",",skiprows=0)
X = np.array(X, dtype=float)
print("X:" + str(X.shape))

# Load the labels
with open('AD_MRI_DATA/1-CN_AD/1-Phenotype.csv','r') as csvfile:
    reader = csv.reader(csvfile)
    c1 = [row[1]for row in reader]
    
del c1[0]
y = np.array(c1, dtype=int)
print("y:" + str(y.shape))


X:(134, 109)
y:(134,)


## 2. Lasso with Classifiers

### 2.1 Lasso

In [3]:
model = linear_model.LassoCV()
#alphas=[1,0.1,0.01,0.005,0.001,0.0001]
model.fit(X, y)

print("alpha: " + str(model.alpha_))
#print(lasso.coef_)
3
selector = SelectFromModel(estimator = model, prefit = True)
selector.get_support()
X_selected = selector.transform(X)
print("X_selected: " + str(X_selected.shape))

alpha: 19.014840181341103
X_selected: (134, 18)


### 2.2 Classifiers

In [4]:
for i in range(5):
    # SVM
    if i == 0:
        clf = SVC(C=1, random_state=42, probability=True)
        print("SVM")
        
        
    # KNN
    elif i == 1:
        clf = neighbors.KNeighborsClassifier(n_neighbors=2)
        print("KNN")
        
    # Gaussian Naive Bayes
    elif i == 2:
        clf = GaussianNB()
        print("Gaussian Naive Bayes")
        
    # Decision Tree
    elif i == 3:
        clf = tree.DecisionTreeClassifier()
        print("Decision Tree")
        
    # Random Forest
    else:
        clf = RandomForestClassifier()
        print("Random Forest")
    
    # Model Evaluation
    y_pred = cross_val_predict(clf, X_selected, y, cv=5)
    auc = cross_val_score(clf, X_selected, y, cv=5, scoring="roc_auc")
    print(confusion_matrix(y, y_pred,labels=[1,0]))
    print(classification_report(y, y_pred))
    auc_avg = (auc[0]+auc[1]+auc[2]+auc[3]+auc[4])/5
    print("auc: "+str(auc_avg)+"\n")


SVM
[[34 12]
 [ 1 87]]
              precision    recall  f1-score   support

           0       0.88      0.99      0.93        88
           1       0.97      0.74      0.84        46

    accuracy                           0.90       134
   macro avg       0.93      0.86      0.88       134
weighted avg       0.91      0.90      0.90       134

auc: 0.9801597676107481

KNN
[[28 18]
 [ 0 88]]
              precision    recall  f1-score   support

           0       0.83      1.00      0.91        88
           1       1.00      0.61      0.76        46

    accuracy                           0.87       134
   macro avg       0.92      0.80      0.83       134
weighted avg       0.89      0.87      0.86       134

auc: 0.9169208424110386

Gaussian Naive Bayes
[[45  1]
 [ 4 84]]
              precision    recall  f1-score   support

           0       0.99      0.95      0.97        88
           1       0.92      0.98      0.95        46

    accuracy                           0.96   

### 2.3 Multi-core SVM

In [5]:
sig_mat = sigmoid_kernel(X_selected)
rbf_mat = rbf_kernel(X_selected)
poly_mat = polynomial_kernel(X_selected)

best_score = 0
best_w1 = 0
best_w2 = 0
best_w3 = 0

for i in range(11):
    for j in range(11):
        w1 = i * 0.1
        w2 = j * 0.1
        w3 = 1 -w1-w2
        if w1<-0.01 or w2<-0.01 or w3<-0.01:
            break
        train_kernel = w1 * sig_mat + w2 * rbf_mat + w3 * poly_mat
        clf = SVC(kernel='precomputed', probability=True)
        acc = cross_val_score(clf, train_kernel, y, cv=5, scoring="accuracy")
        acc_avg = (acc[0] + acc[1] + acc[2])/3
        if acc_avg > best_score:
            best_w1 = w1
            best_w2 = w2
            best_w3 = w3
            best_score = acc_avg
        
print("w1:"+str(best_w1), "w2:"+str(best_w2), "w3:"+str(best_w3))
print("score:"+str(best_score))

w1:0.0 w2:0.0 w3:1.0
score:0.9629629629629629


In [6]:
train_kernel = best_w1 * sig_mat + best_w2 * rbf_mat + best_w3 * poly_mat
y_pred = cross_val_predict(clf, train_kernel, y, cv=5)
auc = cross_val_score(clf, train_kernel, y, cv=5, scoring="roc_auc")
print(confusion_matrix(y, y_pred,labels=[1,0]))
print(classification_report(y, y_pred))
auc_avg = (auc[0]+auc[1]+auc[2]+auc[3]+auc[4])/5
print("auc: "+str(auc_avg)+"\n")

[[43  3]
 [ 3 85]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        88
           1       0.93      0.93      0.93        46

    accuracy                           0.96       134
   macro avg       0.95      0.95      0.95       134
weighted avg       0.96      0.96      0.96       134

auc: 0.9923602033405954



## 3. ElasticNet with Classifiers

### 3.1 ElasticNet

In [7]:
model = ElasticNetCV(random_state=0)
#,alphas=[1,0.1,0.01,0.005,0.001,0.0001]
model.fit(X, y)

print("alpha: " + str(model.alpha_))
#print(els.coef_)

selector = SelectFromModel(estimator = model, prefit = True)
selector.get_support()
X_selected = selector.transform(X)
print("X_selected: " + str(X_selected.shape))

alpha: 38.02968036268219
X_selected: (134, 19)


### 3.2 Classifiers

In [8]:
for i in range(5):
    # SVM
    if i == 0:
        clf = SVC(C=1, random_state=42, probability=True)
        print("SVM")
        
        
    # KNN
    elif i == 1:
        clf = neighbors.KNeighborsClassifier(n_neighbors=2)
        print("KNN")
        
    # Gaussian Naive Bayes
    elif i == 2:
        clf = GaussianNB()
        print("Gaussian Naive Bayes")
        
    # Decision Tree
    elif i == 3:
        clf = tree.DecisionTreeClassifier()
        print("Decision Tree")
        
    # Random Forest
    else:
        clf = RandomForestClassifier()
        print("Random Forest")
    
    # Model Evaluation
    y_pred = cross_val_predict(clf, X_selected, y, cv=5)
    auc = cross_val_score(clf, X_selected, y, cv=5, scoring="roc_auc")
    print(confusion_matrix(y, y_pred,labels=[1,0]))
    print(classification_report(y, y_pred))
    auc_avg = (auc[0]+auc[1]+auc[2]+auc[3]+auc[4])/5
    print("auc: "+str(auc_avg)+"\n")

SVM
[[26 20]
 [ 5 83]]
              precision    recall  f1-score   support

           0       0.81      0.94      0.87        88
           1       0.84      0.57      0.68        46

    accuracy                           0.81       134
   macro avg       0.82      0.75      0.77       134
weighted avg       0.82      0.81      0.80       134

auc: 0.9234713144517066

KNN
[[14 32]
 [ 5 83]]
              precision    recall  f1-score   support

           0       0.72      0.94      0.82        88
           1       0.74      0.30      0.43        46

    accuracy                           0.72       134
   macro avg       0.73      0.62      0.62       134
weighted avg       0.73      0.72      0.68       134

auc: 0.7747785039941902

Gaussian Naive Bayes
[[46  0]
 [ 4 84]]
              precision    recall  f1-score   support

           0       1.00      0.95      0.98        88
           1       0.92      1.00      0.96        46

    accuracy                           0.97   

### 3.3 Multi-core SVM

In [9]:
sig_mat = sigmoid_kernel(X_selected)
rbf_mat = rbf_kernel(X_selected)
poly_mat = polynomial_kernel(X_selected)

best_score = 0
best_w1 = 0
best_w2 = 0
best_w3 = 0

for i in range(11):
    for j in range(11):
        w1 = i * 0.1
        w2 = j * 0.1
        w3 = 1 -w1-w2
        if w1<-0.01 or w2<-0.01 or w3<-0.01:
            break
        train_kernel = w1 * sig_mat + w2 * rbf_mat + w3 * poly_mat
        clf = SVC(kernel='precomputed', probability=True)
        acc = cross_val_score(clf, train_kernel, y, cv=5, scoring="accuracy")
        acc_avg = (acc[0] + acc[1] + acc[2])/3
        if acc_avg > best_score:
            best_w1 = w1
            best_w2 = w2
            best_w3 = w3
            best_score = acc_avg
        
print("w1:"+str(best_w1), "w2:"+str(best_w2), "w3:"+str(best_w3))
print("score:"+str(best_score))

w1:0.0 w2:0.0 w3:1.0
score:0.9629629629629629


In [10]:
train_kernel = best_w1 * sig_mat + best_w2 * rbf_mat + best_w3 * poly_mat
y_pred = cross_val_predict(clf, train_kernel, y, cv=5)
auc = cross_val_score(clf, train_kernel, y, cv=5, scoring="roc_auc")
print(confusion_matrix(y, y_pred,labels=[1,0]))
print(classification_report(y, y_pred))
auc_avg = (auc[0]+auc[1]+auc[2]+auc[3]+auc[4])/5
print("auc: "+str(auc_avg)+"\n")

[[41  5]
 [ 3 85]]
              precision    recall  f1-score   support

           0       0.94      0.97      0.96        88
           1       0.93      0.89      0.91        46

    accuracy                           0.94       134
   macro avg       0.94      0.93      0.93       134
weighted avg       0.94      0.94      0.94       134

auc: 0.9922294843863471

