In [1]:
import numpy as np
import csv
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Lasso
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
from sklearn.metrics import auc
from sklearn import linear_model
from sklearn.linear_model import ElasticNetCV
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform
from sklearn.metrics.pairwise import sigmoid_kernel, rbf_kernel, polynomial_kernel
from sklearn import neighbors
from sklearn import tree
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import preprocessing
from sklearn.model_selection import KFold


## 1. Data Preprocessing

In [2]:
# Load features
X = np.loadtxt(open("AD_MRI_DATA/1-CN_AD/1.1-MRI_CorticalThickness.csv","rb"),delimiter=",",skiprows=0)
X = np.array(X, dtype=float)
print("X:" + str(X.shape))

# Load labels
with open('AD_MRI_DATA/1-CN_AD/1-Phenotype.csv','r') as csvfile:
    reader = csv.reader(csvfile)
    c1 = [row[1]for row in reader]
    
del c1[0]
y = np.array(c1, dtype=int)
print("y:" + str(y.shape))


X:(134, 136)
y:(134,)


In [18]:
# Transform features by scaling each feature to a given range.
min_max_scaler = preprocessing.MinMaxScaler()
X = min_max_scaler.fit_transform(X)

# Split data into random train and test subsets.
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size = 0.2, random_state=42
)

print("X_train: " + str(X_train.shape))
print("X_test: " + str(X_test.shape))
print("y_train: " + str(y_train.shape))
print("y_test: " + str(y_test.shape))


X_train: (107, 136)
X_test: (27, 136)
y_train: (107,)
y_test: (27,)


## 2. Lasso feature extraction + classifiers

### 2.1 Lasso

In [19]:
model = linear_model.LassoCV()
#alphas=[1,0.1,0.01,0.005,0.001,0.0001]
model.fit(X, y)

print("alpha: " + str(model.alpha_))
#print(lasso.coef_)

selector = SelectFromModel(estimator = model, prefit = True)
selector.get_support()
X_selected = selector.transform(X_train)
print("X_selected: " + str(X_selected.shape))
X_test_sel = selector.transform(X_test)
print("X_test_sel: " + str(X_test_sel.shape))

alpha: 0.002004268249900883
X_selected: (107, 59)
X_test_sel: (27, 59)


### 2.2 Classifiers

In [15]:
for i in range(5):
    # SVM
    if i == 0:
        clf = SVC(C = 1, random_state=42, probability=True)
        print("SVM")
        
    # KNN
    elif i == 1:
        clf = neighbors.KNeighborsClassifier(n_neighbors=2)
        print("KNN")
        
    # Gaussian Naive Bayes
    elif i == 2:
        clf = GaussianNB()
        print("Gaussian Naive Bayes")
        
    # Decision Tree
    elif i == 3:
        clf = tree.DecisionTreeClassifier()
        print("Decision Tree")
        
    # Random Forest
    else:
        clf = RandomForestClassifier()
        print("Random Forest")
    
    # Model Evaluation
    clf.fit(X_selected, y_train)
    y_pred = clf.predict(X_test_sel)
    print('confusion matrix:')
    print(confusion_matrix(y_test, y_pred,labels=[1,0]))
    print(classification_report(y_test, y_pred))
    
    y_pred_prob = clf.predict_proba(X_test_sel)[:,1]
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
    auc(fpr, tpr)


SVM
confusion matrix:
[[10  0]
 [ 0 17]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        10

    accuracy                           1.00        27
   macro avg       1.00      1.00      1.00        27
weighted avg       1.00      1.00      1.00        27

KNN
confusion matrix:
[[ 5  5]
 [ 0 17]]
              precision    recall  f1-score   support

           0       0.77      1.00      0.87        17
           1       1.00      0.50      0.67        10

    accuracy                           0.81        27
   macro avg       0.89      0.75      0.77        27
weighted avg       0.86      0.81      0.80        27

Gaussian Naive Bayes
confusion matrix:
[[ 9  1]
 [ 3 14]]
              precision    recall  f1-score   support

           0       0.93      0.82      0.87        17
           1       0.75      0.90      0.82        10

    accuracy                           0.8

### 2.3 Multi-core SVM

In [8]:

sig_mat = sigmoid_kernel(X_selected)
rbf_mat = rbf_kernel(X_selected)
poly_mat = polynomial_kernel(X_selected)

best_score = 0
best_w1 = 0
best_w2 = 0
best_w3 = 0

for i in range(11):
    for j in range(11):
        w1 = i * 0.1
        w2 = j * 0.1
        w3 = 1 -w1-w2
        if w1<-0.01 or w2<-0.01 or w3<-0.01:
            break
        train_kernel = w1 * sig_mat + w2 * rbf_mat + w3 * poly_mat
        clf = SVC(kernel='precomputed', probability=True)
        acc = cross_val_score(clf, train_kernel, y, cv=5, scoring="accuracy")
        acc_avg = np.sum(acc) / 5
        if acc_avg > best_score:
            best_w1 = w1
            best_w2 = w2
            best_w3 = w3
            best_score = acc_avg
        
print("w1:"+str(best_w1), "w2:"+str(best_w2), "w3:"+str(best_w3))
print("score:"+str(best_score))

w1:0.0 w2:0.0 w3:1.0
score:0.9245014245014245


In [9]:
train_kernel = best_w1 * sig_mat + best_w2 * rbf_mat + best_w3 * poly_mat
y_pred = cross_val_predict(clf, train_kernel, y, cv=5)
auc1 = cross_val_score(clf, train_kernel, y, cv=5, scoring="roc_auc")
print(confusion_matrix(y, y_pred,labels=[1,0]))
print(classification_report(y, y_pred))
acc_avg = np.sum(acc) / 5
print("auc: "+str(auc_avg)+"\n")

[[37  9]
 [ 1 87]]
              precision    recall  f1-score   support

           0       0.91      0.99      0.95        88
           1       0.97      0.80      0.88        46

    accuracy                           0.93       134
   macro avg       0.94      0.90      0.91       134
weighted avg       0.93      0.93      0.92       134

auc: 0.9277051561365287



In [17]:
train_kernel.shape

(134, 134)

## 3. ElasticNet with Classifiers

### 3.1 ElasticNet

In [9]:
model = ElasticNetCV(random_state=0)
#,alphas=[1,0.1,0.01,0.005,0.001,0.0001]
model.fit(X, y)

print("alpha: " + str(model.alpha_))
#print(els.coef_)

selector = SelectFromModel(estimator = model, prefit = True)
selector.get_support()
X_selected = selector.transform(X)
print("X_selected: " + str(X_selected.shape))

alpha: 0.0018311152016526732
X_selected: (134, 43)


### 3.2 Classifiers

In [10]:
for i in range(5):
    # SVM
    if i == 0:
        clf = SVC(C=1, random_state=42, probability=True)
        print("SVM")
        
        
    # KNN
    elif i == 1:
        clf = neighbors.KNeighborsClassifier(n_neighbors=2)
        print("KNN")
        
    # Naive Gaussian Bayes
    elif i == 2:
        clf = GaussianNB()
        print("Naive Gaussian Bayes")
        
    # Decision Tree
    elif i == 3:
        clf = tree.DecisionTreeClassifier()
        print("Decision Tree")
        
    # Random Forest
    else:
        clf = RandomForestClassifier()
        print("Random Forest")
    
    # Model Evaluation
    y_pred = cross_val_predict(clf, X_selected, y, cv=5)
    auc1 = cross_val_score(clf, X_selected, y, cv=5, scoring="roc_auc")
    print(confusion_matrix(y, y_pred,labels=[1,0]))
    print(classification_report(y, y_pred))
    auc_avg = (auc[0]+auc[1]+auc[2]+auc[3]+auc[4])/5
    print("auc: "+str(auc_avg)+"\n")

SVM
[[29 17]
 [ 4 84]]
              precision    recall  f1-score   support

           0       0.83      0.95      0.89        88
           1       0.88      0.63      0.73        46

    accuracy                           0.84       134
   macro avg       0.86      0.79      0.81       134
weighted avg       0.85      0.84      0.84       134

auc: 0.9435729847494553

KNN
[[29 17]
 [ 2 86]]
              precision    recall  f1-score   support

           0       0.83      0.98      0.90        88
           1       0.94      0.63      0.75        46

    accuracy                           0.86       134
   macro avg       0.89      0.80      0.83       134
weighted avg       0.87      0.86      0.85       134

auc: 0.9064996368917939

高斯贝叶斯
[[36 10]
 [ 8 80]]
              precision    recall  f1-score   support

           0       0.89      0.91      0.90        88
           1       0.82      0.78      0.80        46

    accuracy                           0.87       134
   macr

### 3.3 Multi-core SVM

In [11]:
sig_mat = sigmoid_kernel(X_selected)
rbf_mat = rbf_kernel(X_selected)
poly_mat = polynomial_kernel(X_selected)

best_score = 0
best_w1 = 0
best_w2 = 0
best_w3 = 0

for i in range(11):
    for j in range(11):
        w1 = i * 0.1
        w2 = j * 0.1
        w3 = 1 -w1-w2
        if w1<-0.01 or w2<-0.01 or w3<-0.01:
            break
        train_kernel = w1 * sig_mat + w2 * rbf_mat + w3 * poly_mat
        clf = SVC(kernel='precomputed', probability=True)
        acc = cross_val_score(clf, train_kernel, y, cv=5, scoring="accuracy")
        acc_avg = np.sum(acc) / 5
        if acc_avg > best_score:
            best_w1 = w1
            best_w2 = w2
            best_w3 = w3
            best_score = acc_avg
        
print("w1:"+str(best_w1), "w2:"+str(best_w2), "w3:"+str(best_w3))
print("score:"+str(best_score))

w1:0.0 w2:0.0 w3:1.0
score:1.0


In [12]:
train_kernel = best_w1 * sig_mat + best_w2 * rbf_mat + best_w3 * poly_mat
y_pred = cross_val_predict(clf, train_kernel, y, cv=5)
auc1 = cross_val_score(clf, train_kernel, y, cv=5, scoring="roc_auc")
print(confusion_matrix(y, y_pred,labels=[1,0]))
print(classification_report(y, y_pred))
acc_avg = np.sum(acc) / 5
print("auc: "+str(auc_avg)+"\n")

[[44  2]
 [ 0 88]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99        88
           1       1.00      0.96      0.98        46

    accuracy                           0.99       134
   macro avg       0.99      0.98      0.98       134
weighted avg       0.99      0.99      0.98       134

auc: 0.9986928104575163

