In [2]:
!pip install --upgrade nilearn

from nilearn.datasets import fetch_abide_pcp
# Fetch the full data and update phenotypic data and cross_validation
abide = fetch_abide_pcp(derivatives = ['rois_cc200'], pipeline = 'cpac', quality_checked = False)

y = abide.phenotypic['DX_GROUP']


Downloading data from https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Phenotypic_V1_0b_preprocessed1.csv ...


Error while fetching file Phenotypic_V1_0b_preprocessed1.csv; dataset fetching aborted.

ProxyError: HTTPSConnectionPool(host='s3.amazonaws.com', port=443): Max retries exceeded with url: /fcp-indi/data/Projects/ABIDE_Initiative/Phenotypic_V1_0b_preprocessed1.csv (Caused by ProxyError('Cannot connect to proxy.', OSError('Tunnel connection failed: 407 Proxy Authentication Required')))

=====================================
Logistic Regression:
=====================================

In [None]:
measure = 'correlation'
C = [0.001, 0.01, 0.1]
lr_params = {'C': C}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.linear_model import LogisticRegression

skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
lr = LogisticRegression()

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_cc200)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(lr, lr_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_


print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: [LogisticRegression(C=0.01, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)]
best_scores: [0.6908212560386473]
'''

In [None]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = LogisticRegression(C = 0.01)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)
   
print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)  

'''
cross_recall: [0.6673267326732674]
cross_precision: [0.6904341921519341]
'''

==============================================
Ridge
==============================================

In [None]:
measure = 'correlation'
alpha = [100,1000,10000]
rc_params = {'alpha': alpha}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.linear_model import RidgeClassifier


skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
rc = RidgeClassifier()

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_cc200)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(rc, rc_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_


print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: [RidgeClassifier(alpha=1000, class_weight=None, copy_X=True,
        fit_intercept=True, max_iter=None, normalize=False,
        random_state=None, solver='auto', tol=0.001)]
best_scores: [0.6927536231884058]
'''


In [None]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = RidgeClassifier(alpha=1000)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)
  
print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)

'''
cross_recall: [0.6594059405940593]
cross_precision: [0.6960889851629156]
'''

===========================================
linearSVC l2
===========================================

In [2]:
measure = 'correlation'
C = [0.0001, 0.001, 0.01]
svc_params = {'C': C}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import LinearSVC

skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
svc_l2 = LinearSVC()

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_cc200)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(svc_l2, svc_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_

print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: [LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)]
best_scores: [0.6859903381642513]
'''

  return f(*args, **kwds)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  3.3min finished


best_estimators: LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)
best_scores: 0.6859903381642513


"\nbest_estimators: [LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,\n     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n     verbose=0)]\nbest_scores: [0.6859903381642513]\n"

In [3]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = LinearSVC(C = 0.001)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)

print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)

'''
cross_recall: [0.6613861386138613]
cross_precision: [0.6856377307555428]
'''

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.0min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


cross_recall: 0.6613861386138613
cross_precision: 0.6856377307555428


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  1.0min finished


'\ncross_recall: [0.6613861386138613]\ncross_precision: [0.6856377307555428]\n'

=====================================
svm rbf
=====================================

In [4]:
measure = 'correlation'
C = [1,10,100]
svc_params = {'C': C}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import SVC

skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
svc_rbf = SVC(gamma='auto')

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_cc200)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(svc_rbf, svc_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_

print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
best_scores: 0.6811594202898551
'''

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  7.3min finished


best_estimators: SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
best_scores: 0.6811594202898551


"\nbest_estimators: SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,\n  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n  max_iter=-1, probability=False, random_state=None, shrinking=True,\n  tol=0.001, verbose=False)\nbest_scores: 0.6811594202898551\n"

In [5]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = SVC(gamma='auto',C=10)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)

print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)

'''
cross_recall: 0.6257425742574256
cross_precision: 0.693116442730704
'''

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.4min finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


cross_recall: 0.6257425742574256
cross_precision: 0.693116442730704


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  2.4min finished


'\ncross_recall: [0.6613861386138613]\ncross_precision: [0.6856377307555428]\n'