In [None]:
import numpy as np
import pandas as pd
import os
import base_functions as bf
import pickle
from sklearn import svm
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
from sklearn import metrics

from nilearn import connectome, plotting
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
dtfile = './data/dataset1_connectivity.pkl'
D = pickle.load(open(dtfile, 'rb'))
all_session_subjects = D['all_session_subjects']
all_session_conn_vec = D['all_session_conn_vec']
all_session = D['all_session']
info_DF = D['info_DF']

In [None]:
## can change settings here to get feature patterns for partial sleep or after recovery sleep 
care_sess = 0 # focus on the first session, i.e., morning after sleep manipulation
dep_labels = bf.get_subject_info(info_DF, all_session_subjects[care_sess], ['deprive_labels'])
# dep_labels: 0->normal sleep; 1->partial sleep deprivation; 2->sleep deprivation
idx = np.logical_or(dep_labels==0, dep_labels==2)
X = all_session_conn_vec[care_sess][idx]
X = np.arctanh(X) #Fisher r to z
X[np.isnan(X)] = 0
Y = dep_labels[idx]
Y[Y==Y.min()] = 0
Y[Y!=Y.min()] = 1
subjects = all_session_subjects[care_sess]
subjects = [subjects[i] for i,v in enumerate(idx) if v==True]

In [None]:
from sklearn.utils import resample
D = pickle.load(open('./data/trained_model.pkl', 'rb'))
trained_clf = D['trained_clf']
patterns = D['patterns']
best_C = trained_clf.C

In [None]:
## bootstrap analysis

boot_num = 1000
all_boot_coef_vals = []
all_boot_patterns = []
for ibt in range(boot_num):
    print(ibt)
    bX, bY = resample(X, Y, replace=True, stratify=Y)
    
    # parameters = {'C':np.linspace(0.00001,10000,20)}
    # clf = GridSearchCV(svm.SVC(kernel='linear', probability=False), parameters, n_jobs=-1)
    clf = svm.SVC(kernel='linear', probability=False, C=best_C)
    clf.fit(bX, bY)
    coef_vals = clf.coef_
    patterns = bf.weight_transform(bX, coef_vals)
    all_boot_coef_vals.append(coef_vals)
    all_boot_patterns.append(patterns)
    pickle.dump({'all_boot_coef_vals':all_boot_coef_vals,
                 'all_boot_patterns':all_boot_patterns,
                 },
                 open('./data/Bootstrap1000_parameters.pkl', 'wb'))

In [None]:
# estimate p values
from statsmodels.stats.multitest import multipletests

mpattern = np.mean(all_boot_patterns, axis=0, keepdims=True)
vpattern = np.std(all_boot_patterns, axis=0, keepdims=True)
boot_zvals = mpattern / vpattern
boot_zvals = boot_zvals.reshape(-1)

pvals = stats.norm.sf(np.abs(boot_zvals))*2
H, pvals_fdr, _, _ = multipletests(pvals, alpha=0.05, method='fdr_bh')
pvals_fdr[pvals_fdr<0.05].shape

In [None]:
pickle.dump({'boot_zvals':boot_zvals,
             'pvals':pvals,
             'pvals_fdr':pvals_fdr,
             'boot_patterns':mpattern.reshape(-1),
             'patterns':patterns.reshape(-1)},
             open('./data/D1Morning_sig_patterns.pkl', 'wb'))