### Import Libraries

In [1]:
import numpy as np
import pandas as pd
import BPt as bp
import os

from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)




###  Define helper functions and path of data

In [2]:
path = "../ABCD 3.0/"
read_file = lambda file_name: pd.read_csv(file_name, delimiter='\t', skiprows=[1], index_col='subjectkey', low_memory=False)
baseline = lambda df: df[df['eventname']=='baseline_year_1_arm_1']
followup = lambda df: df[df['eventname']=='1_year_follow_up_y_arm_1']

###  Covariate Files

In [4]:
### Calculating BMI
ant01 = followup(read_file(path+'abcd_ant01.txt'))[['anthroweightcalc','anthroheightcalc']]
m = ant01['anthroweightcalc']
h2 = ant01['anthroheightcalc']**2
ant01['bmi'] = (m/h2)*703
ant01 = ant01[['bmi']]
print('ant01 shape:', ant01.shape)

ant01 shape: (4951, 1)


In [5]:
### Puberty Score
ppdms01 =baseline(read_file(path+'abcd_ppdms01.txt'))
ppdms01 = ppdms01.replace(999.0, 0)
ppdms01 = ppdms01.replace(np.nan, 0)
ppdms01['pubertal_score'] = ppdms01.apply(lambda x : x['pds_1_p'] + x['pds_2_p'] + x['pds_3_p'] + x['pds_m4_p'] + x['pds_m5_p']  if (x['pubertal_sex_p']==1.0) else x['pds_1_p'] + x['pds_2_p'] + x['pds_3_p'] + x['pds_f4_p'] + x['pds_f5b_p'], axis=1, result_type='reduce')
ppdms01 = ppdms01[['pubertal_score']].astype('int')
ppdms01 = ppdms01.replace(0, np.nan)
print('ppdms01 shape:', ppdms01.shape)

ppdms01 shape: (11875, 1)


In [6]:
### Medical
medsy01 = baseline(read_file(path+'medsy01.txt'))
a = pd.Series(medsy01.columns)
cols = a[a.str.contains('_24')].values
medsy01 = medsy01[cols]
medsy01 = medsy01.replace(999.0, 0)
medsy01 = medsy01.fillna(0)
f = lambda x: 1 if x>=1 else 0
cols = medsy01.columns
a = pd.Series(medsy01.columns)
rx_cols = a[a.str.contains('rx')].values
otc_cols = a[a.str.contains('otc')].values
medsy01['rx_24']=medsy01[rx_cols].sum(axis=1).apply(f)
medsy01['otc_24']=medsy01[otc_cols].sum(axis=1).apply(f)
medsy01['caff_24'] = medsy01['caff_24'].astype(int)
medsy01 = medsy01[['rx_24', 'otc_24', 'caff_24']]
print('medsy01 shape:', medsy01.shape)

medsy01 shape: (11875, 3)


In [7]:
### Socioecomonic Factors
pdem02 = baseline(read_file(path+'pdem02.txt'))
cols = ['demo_brthdat_v2','demo_ed_v2',
        'demo_race_a_p___10','demo_race_a_p___11','demo_race_a_p___12',
        'demo_race_a_p___13','demo_race_a_p___14','demo_race_a_p___15',
        'demo_race_a_p___16','demo_race_a_p___17','demo_race_a_p___18',
        'demo_race_a_p___19','demo_race_a_p___20','demo_race_a_p___21',
        'demo_race_a_p___22','demo_race_a_p___23', 
        'demo_prnt_marital_v2','demo_prnt_ed_v2','demo_prnt_income_v2',
        'demo_prnt_prtnr_v2','demo_prtnr_ed_v2','demo_comb_income_v2']
pdem02['race_white'] = pdem02['demo_race_a_p___10']
pdem02['race_mixed'] = pdem02[['demo_race_a_p___11','demo_race_a_p___12','demo_race_a_p___13',
                               'demo_race_a_p___14','demo_race_a_p___15','demo_race_a_p___16',
                               'demo_race_a_p___17','demo_race_a_p___18','demo_race_a_p___19',
                               'demo_race_a_p___20','demo_race_a_p___21','demo_race_a_p___22',
                               'demo_race_a_p___23']].sum(axis=1)
pdem02['race_mixed'] = pdem02['race_mixed'].apply(f)
pdem02['demo_prnt_ed_v2'] = pdem02['demo_prnt_ed_v2'].replace(999, 0) 
pdem02['demo_prnt_ed_v2'] = pdem02['demo_prnt_ed_v2'].replace(777, 0)
pdem02['demo_prnt_ed_v2'] = pdem02['demo_prnt_ed_v2'].replace(np.nan, 0)
pdem02['demo_prtnr_ed_v2'] = pdem02['demo_prtnr_ed_v2'].replace(999, 0) 
pdem02['demo_prtnr_ed_v2'] = pdem02['demo_prtnr_ed_v2'].replace(777, 0)
pdem02['demo_prtnr_ed_v2'] = pdem02['demo_prtnr_ed_v2'].replace(np.nan, 0)
pdem02['parent_edu_max'] = pdem02[['demo_prnt_ed_v2','demo_prtnr_ed_v2']].max(axis=1)
pdem02['parent_edu_max'] = pdem02['parent_edu_max'].replace(0, np.nan)
pdem02 = pdem02[['demo_brthdat_v2','demo_ed_v2','race_white','race_mixed',
                 'demo_prnt_marital_v2','parent_edu_max','demo_prnt_prtnr_v2',
                 'demo_comb_income_v2']]
pdem02 = pdem02.replace(999.0, np.nan)
pdem02 = pdem02.replace(777.0, np.nan)
print('pdem02 shape:', pdem02.shape)

pdem02 shape: (11875, 8)


In [8]:
###  Sleeping, Screen time, and Family Environment
sds01 = baseline(read_file(path+'abcd_sds01.txt'))[['sleepdisturb1_p']]
sds01 = sds01.reset_index().drop_duplicates().set_index('subjectkey')
print('sds01 shape:', sds01.shape)

stq01 = baseline(read_file(path+'stq01.txt'))[['screentime2_p_hours']]
print('stq01 shape:', stq01.shape)

fes02 = baseline(read_file(path+'fes02.txt'))[['fam_enviro1_p','fam_enviro2r_p', 'fam_enviro3_p',
                                                'fam_enviro4r_p','fam_enviro5_p', 'fam_enviro6_p',                                                
                                                'fam_enviro7r_p','fam_enviro8_p', 'fam_enviro9r_p']]
fes02['fam_enviro_sum'] = fes02.sum(axis=1)
fes02 = fes02[['fam_enviro_sum']].astype('int')
fes02 = fes02.reset_index().drop_duplicates().set_index('subjectkey')
print('fes02 shape:', fes02.shape)

sds01 shape: (11875, 1)
stq01 shape: (11875, 1)
fes02 shape: (11875, 1)


In [9]:
### Gender
lt01 = baseline(read_file(path + 'abcd_lt01.txt'))[['gender']]
f = lambda x: 1 if x=='F' else x
m = lambda x: 0 if x=='M' else x
lt01['gender'] = lt01['gender'].apply(m).apply(f)
print('lt01 shape:', lt01.shape)

lt01 shape: (11875, 1)


In [10]:
### Family Depression
fhxp102 = baseline(read_file(path+'fhxp102.txt'))[['fam_history_q6a_depression', 'fam_history_q6d_depression']]
fhxp102 = fhxp102.replace(np.nan, 0)
fhxp102 = fhxp102.replace(999.0, np.nan)
fhxp102 = fhxp102.reset_index().drop_duplicates().set_index('subjectkey')
fhxp102['fam_history_depression'] = np.logical_or(fhxp102['fam_history_q6a_depression'], fhxp102['fam_history_q6d_depression'])
fhxp102 = fhxp102.drop(['fam_history_q6a_depression', 'fam_history_q6d_depression'], axis=1).astype('int')
print('fhxp102 shape:', fhxp102.shape)

fhxp102 shape: (11875, 1)


In [11]:
### original cov
covariates = pd.concat([fhxp102,lt01,fes02,stq01,sds01,pdem02,medsy01,ppdms01,ant01],axis=1)

covariates = covariates.astype('float', errors='ignore')
print('original covariates shape:', covariates.shape)

original covariates shape: (11875, 18)


In [12]:
##  Life events

# gish_y_ss_m_sum_nm & pps_y_ss_bother_sum_nm help boost up 8% of variance in bpm but same in cbcl
mhy = followup(read_file(path+'abcd_mhy02.txt'))
a = pd.Series(mhy.columns)
cols = a[a.str.contains('affected_*')].values
mhy = mhy[['ple_y_ss_affected_bad_sum','ple_y_ss_affected_good_sum',
           'pps_y_ss_bother_sum_nm','gish_y_ss_m_sum_nm'
          ]]
mhy.dropna(axis=1, how='all')
print('mhy shape:', mhy.shape)

mhy shape: (11235, 4)


In [13]:
##  Parent Acceptance
sscey01= baseline(read_file(path+'abcd_sscey01.txt'))
a = pd.Series(sscey01.columns)

cols = a[a.str.contains('_ss_')].values
sscey01= sscey01[['srpf_y_ss_iiss','crpbi_y_ss_caregiver','psb_y_ss_mean','fes_y_ss_fc_pr','pmq_y_ss_mean',]]
sscey01.dropna(axis=1, how='all')
print('sscey01 shape:', sscey01.shape)
sscey01

sscey01 shape: (11878, 5)


Unnamed: 0_level_0,srpf_y_ss_iiss,crpbi_y_ss_caregiver,psb_y_ss_mean,fes_y_ss_fc_pr,pmq_y_ss_mean
subjectkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NDAR_INV005V6D2C,14.0,,2.000000,0.0,4.6
NDAR_INV00BD7VDC,13.0,2.4,1.666667,0.0,4.4
NDAR_INV00LJVZK2,13.0,2.4,1.666667,2.0,4.8
NDAR_INV014RTM1V,14.0,3.0,2.000000,0.0,5.0
NDAR_INV0182J779,15.0,2.8,2.000000,0.0,4.2
...,...,...,...,...,...
NDAR_INVZWWDT1TG,13.0,3.0,2.000000,1.0,4.8
NDAR_INVZXL47HRG,15.0,3.0,2.000000,0.0,4.8
NDAR_INVZYTK0K1Y,15.0,2.4,1.333333,3.0,4.2
NDAR_INVZZ4XNM65,14.0,2.4,1.666667,3.0,4.8


In [14]:
## Parent_monitor
pmq = baseline(read_file(path+'pmq01.txt'))
a = pd.Series(pmq.columns)
cols = a[a.str.contains('parent_monitor')].values
pmq = pmq[cols]
print('Parent_monitor shape:', pmq.shape)
pmq

Parent_monitor shape: (11875, 5)


Unnamed: 0_level_0,parent_monitor_q1_y,parent_monitor_q2_y,parent_monitor_q3_y,parent_monitor_q4_y,parent_monitor_q5_y
subjectkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
NDAR_INV0A87RKWD,5.0,5.0,5.0,5.0,5.0
NDAR_INV0AYJMFMW,5.0,3.0,4.0,5.0,5.0
NDAR_INV0BEHBEJ3,5.0,4.0,5.0,4.0,5.0
NDAR_INV0BEPJHU1,4.0,5.0,5.0,1.0,3.0
NDAR_INV0CCVJ39W,5.0,5.0,5.0,5.0,5.0
...,...,...,...,...,...
NDAR_INVZZPKBDAC,5.0,5.0,5.0,3.0,4.0
NDAR_INVZZZP87KR,5.0,5.0,5.0,4.0,5.0
NDAR_INVF0C17HWX,5.0,4.0,4.0,3.0,3.0
NDAR_INV9EVRB30H,5.0,5.0,5.0,5.0,5.0


In [15]:
### Adult Self Report
asrs01 =baseline(read_file(path+'abcd_asrs01.txt'))[['asr_scr_attention_r','asr_scr_external_r','asr_scr_internal_r']]
print('asrs01 shape:', asrs01.shape)

asrs01 shape: (11878, 3)


In [16]:
extra=pd.concat([asrs01,sscey01,mhy],axis=1).dropna()
print('extra shape:', extra.shape)

extra shape: (10381, 12)


In [17]:
final=pd.concat([extra,covariates],axis=1).dropna()
final = final.astype('float', errors='ignore')
print('final shape:', final.shape)

final shape: (4179, 30)


### Loading Structural MRI

In [18]:
def load_from_rds(names, eventname='baseline_year_1_arm_1'):
    
    data = pd.read_csv('D:\BLUEMOON\\nda_data\\nda_rds_201.csv',
                       usecols=['src_subject_id', 'eventname'] + names,
                       na_values=['777', 999, '999', 777])
    
    data = data.loc[data[data['eventname'] == eventname].index]
    data = data.set_index('src_subject_id')
    data = data.drop('eventname', axis=1)
    
    return data

In [19]:
all_cols = list(pd.read_csv('D:\BLUEMOON\\nda_data\\nda_rds_201.csv', nrows=0))
all_cols[:10]

['subjectid',
 'src_subject_id',
 'eventname',
 'anthro_1_height_in',
 'anthro_2_height_in',
 'anthro_3_height_in',
 'anthro_height_calc',
 'anthro_weight_cast',
 'anthro_weight_a_location',
 'anthro_weight1_lb']

In [20]:
measures = ['smri_thick','smri_area','smri_vol_subcort']
                 
parcs = ['.destrieux', '_subcort.aseg']

data_cols = [col for col in all_cols
             if any([ct for ct in measures if ct in col])
             and any([p for p in parcs if p in col])] 
len(data_cols)


346

In [21]:
# Load the actual data from the saved csv
df = load_from_rds(data_cols)
df

Unnamed: 0_level_0,smri_thick_cort.destrieux_g.and.s.frontomargin.lh,smri_thick_cort.destrieux_g.and.s.occipital.inf.lh,smri_thick_cort.destrieux_g.and.s.paracentral.lh,smri_thick_cort.destrieux_g.and.s.subcentral.lh,smri_thick_cort.destrieux_g.and.s.transv.frontopol.lh,smri_thick_cort.destrieux_g.and.s.cingul.ant.lh,smri_thick_cort.destrieux_g.and.s.cingul.mid.ant.lh,smri_thick_cort.destrieux_g.and.s.cingul.mid.post.lh,smri_thick_cort.destrieux_g.cingul.post.dorsal.lh,smri_thick_cort.destrieux_g.cingul.post.ventral.lh,...,smri_vol_subcort.aseg_cc.mid.posterior,smri_vol_subcort.aseg_cc.central,smri_vol_subcort.aseg_cc.mid.anterior,smri_vol_subcort.aseg_cc.anterior,smri_vol_subcort.aseg_wholebrain,smri_vol_subcort.aseg_latventricles,smri_vol_subcort.aseg_allventricles,smri_vol_subcort.aseg_intracranialvolume,smri_vol_subcort.aseg_supratentorialvolume,smri_vol_subcort.aseg_subcorticalgrayvolume
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV003RTV85,2.643,2.597,2.682,3.016,2.776,3.012,2.894,2.874,2.865,2.350,...,368.5,403.0,396.9,546.9,1.099494e+06,4693.2,6299.4,1.354788e+06,9.738411e+05,54112.0
NDAR_INV005V6D2C,,,,,,,,,,,...,,,,,,,,,,
NDAR_INV007W6H7B,2.798,2.635,2.620,2.963,3.038,2.948,2.966,2.728,3.263,1.882,...,352.1,371.1,336.7,684.0,1.444690e+06,13426.2,18810.3,1.703982e+06,1.290405e+06,71188.0
NDAR_INV00BD7VDC,2.570,3.008,2.771,3.116,2.753,3.137,3.222,3.062,3.315,3.065,...,365.3,357.6,432.3,720.6,1.421171e+06,8375.3,11828.6,1.679526e+06,1.283405e+06,61985.0
NDAR_INV00CY2MDM,2.589,2.495,2.732,2.982,2.979,2.953,2.732,2.819,2.908,2.967,...,463.8,414.5,398.6,824.5,1.186497e+06,19138.9,21191.9,1.561216e+06,1.072113e+06,61855.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVZZNX6W2P,2.604,2.839,2.642,3.017,2.990,3.119,3.014,2.871,3.240,2.254,...,476.3,338.0,366.9,761.9,1.139532e+06,11129.1,14259.9,1.480336e+06,1.001272e+06,59550.0
NDAR_INVZZPKBDAC,2.665,2.915,2.661,3.114,2.968,3.167,3.058,2.976,3.355,2.168,...,357.2,429.8,367.8,609.3,1.134203e+06,2855.1,4925.1,1.470497e+06,9.897016e+05,61090.0
NDAR_INVZZZ2ALR6,2.517,2.743,2.607,3.210,2.847,2.954,2.965,2.846,3.211,2.741,...,499.8,436.3,472.6,855.6,1.301402e+06,8278.4,10434.1,1.455727e+06,1.172208e+06,64413.0
NDAR_INVZZZNB0XC,2.806,2.835,2.678,3.344,2.975,3.134,3.425,3.251,3.288,2.535,...,385.8,419.5,424.1,691.2,1.150473e+06,6483.5,8978.0,1.480286e+06,1.040864e+06,55505.0


In [22]:
data = data.drop_cols(exclusions=['.ventricle', '_csf', '.white.matter'], scope='_subcort.aseg')

NameError: name 'data' is not defined

In [None]:
# Cast from a dataframe to BPt Dataset class
data = bp.Dataset(df)
    
# Obsificate subject ID for public example
#data.index = list(range(len(data)))

# Set optional verbosity of
data.verbose = 1

In [None]:
### Filter subjects through freesurfer qc
fsqc = baseline(read_file(path + 'freesqc01.txt'))[['fsqc_qc']]
structural=pd.concat([fsqc,data],axis=1)
print('final shape:', structural.shape)
structural['fsqc_qc'] = structural['fsqc_qc'].fillna(0)
structural = structural[structural['fsqc_qc'] == 1].drop(['fsqc_qc'], axis=1)
structural = structural.astype('float', errors='ignore')
print('structural post freesurfer qc shape:', structural.shape)

final shape: (11879, 338)
structural post freesurfer qc shape: (11265, 337)


### Loading DTI data

In [88]:
### Select Keyword(s) from DTI
measures = ['dmri_dti.fa.wm',
            #'dmri_dti.md.wm'
        #'dmri_dti.full.fa_fibe'
           ]
                 
parcs = ['.destrieux']

data_cols = [col for col in all_cols
             if any([ct for ct in measures if ct in col])
             #and any([p for p in parcs if p in col])
             ] 
len(data_cols)

222

In [89]:
data_cols

['dmri_dti.fa.wm_cort.destrieux_g.and.s.frontomargin.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.and.s.occipital.inf.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.and.s.paracentral.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.and.s.subcentral.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.and.s.transv.frontopol.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.and.s.cingul.ant.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.and.s.cingul.mid.ant.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.and.s.cingul.mid.post.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.cingul.post.dorsal.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.cingul.post.ventral.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.cuneus.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.front.inf.opercular.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.front.inf.orbital.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.front.inf.triangul.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.front.middle.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.front.sup.lh',
 'dmri_dti.fa.wm_cort.destrieux_g.ins.lg.and.s.cent.ins.lh',
 'dmri_dti.fa.wm_cort.destrieux_

In [83]:
fiber = load_from_rds(data_cols)
fiber= bp.Dataset(fiber)
fiber.verbose = 1
print('fiber shape:', fiber.shape)

fiber shape: (11875, 42)


In [90]:
dti = load_from_rds(data_cols)
dti= bp.Dataset(dti)
dti.verbose = 1
print('dti shape:', dti.shape)

dti shape: (11875, 222)


In [91]:
dti_combo=pd.concat([dti,fiber],axis=1)

In [92]:
dti_combo

Unnamed: 0_level_0,dmri_dti.fa.wm_cort.desikan_bankssts.lh,dmri_dti.fa.wm_cort.desikan_bankssts.rh,dmri_dti.fa.wm_cort.desikan_caudalanteriorcingulate.lh,dmri_dti.fa.wm_cort.desikan_caudalanteriorcingulate.rh,dmri_dti.fa.wm_cort.desikan_caudalmiddlefrontal.lh,dmri_dti.fa.wm_cort.desikan_caudalmiddlefrontal.rh,dmri_dti.fa.wm_cort.desikan_cuneus.lh,dmri_dti.fa.wm_cort.desikan_cuneus.rh,dmri_dti.fa.wm_cort.desikan_entorhinal.lh,dmri_dti.fa.wm_cort.desikan_entorhinal.rh,...,dmri_dti.full.fa_fiber.at_scs.lh,dmri_dti.full.fa_fiber.at_scs.rh,dmri_dti.full.fa_fiber.at_sifc.lh,dmri_dti.full.fa_fiber.at_sifc.rh,dmri_dti.full.fa_fiber.at_slf.lh,dmri_dti.full.fa_fiber.at_slf.rh,dmri_dti.full.fa_fiber.at_tslf.lh,dmri_dti.full.fa_fiber.at_tslf.rh,dmri_dti.full.fa_fiber.at_unc.lh,dmri_dti.full.fa_fiber.at_unc.rh
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV003RTV85,0.340157,0.349212,0.447083,0.461398,0.340449,0.311749,0.259963,0.240798,0.243204,0.268007,...,0.458440,0.501362,0.427600,0.404682,0.532783,0.504443,0.550447,0.506624,0.452455,0.443220
NDAR_INV005V6D2C,,,,,,,,,,,...,,,,,,,,,,
NDAR_INV007W6H7B,0.388828,0.312500,0.415262,0.433088,0.293610,0.260048,0.273741,0.274300,0.334242,0.294380,...,0.480223,0.537668,0.390823,0.282790,0.535807,0.516950,0.550986,0.528167,0.489879,0.406760
NDAR_INV00BD7VDC,0.383464,0.375570,0.365933,0.324137,0.362220,0.343888,0.283605,0.241835,0.216066,0.226709,...,0.419150,0.428401,0.424115,0.415025,0.493263,0.485780,0.512133,0.501521,0.496612,0.469323
NDAR_INV00CY2MDM,0.360118,0.304463,0.439397,0.425005,0.347682,0.340680,0.265267,0.245714,0.284462,0.240159,...,0.447563,0.459576,0.444766,0.420438,0.508993,0.509723,0.528875,0.522597,0.480603,0.460824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVZZNX6W2P,0.352371,0.377136,0.514657,0.430468,0.396920,0.371269,0.281377,0.255469,0.280433,0.265119,...,0.470292,0.483663,0.406330,0.392644,0.498407,0.494570,0.505710,0.489329,0.429824,0.426006
NDAR_INVZZPKBDAC,0.366505,0.367588,0.387581,0.424887,0.328114,0.332038,0.214938,0.245684,0.221041,0.238769,...,0.421950,0.453794,0.364654,0.322565,0.466707,0.478108,0.484152,0.495683,0.437808,0.409998
NDAR_INVZZZ2ALR6,0.348939,0.403456,0.454100,0.529742,0.308415,0.292644,0.263650,0.267204,0.318626,0.295896,...,0.513671,0.539241,0.348594,0.337829,0.491605,0.463953,0.503071,0.470570,0.420897,0.427835
NDAR_INVZZZNB0XC,0.320196,0.376880,0.515554,0.441372,0.371629,0.289887,0.229527,0.233831,0.277044,0.300704,...,0.414326,0.436636,0.428477,0.396604,0.488379,0.495322,0.502859,0.504556,0.446710,0.433479


### Loading Resting State fMRI

In [40]:
measures = ['tfmri_nback_all_2.back_',]
                 
parcs = ['.destrieux']

data_cols = [col for col in all_cols
             if any([ct for ct in measures if ct in col])
             and any([p for p in parcs if p in col])
                    ] 
nback = load_from_rds(data_cols)
nback = bp.Dataset(nback)
nback .verbose = 1
print('nback shape:', nback.shape)


nback shape: (11875, 296)


In [41]:
measures = ['rsfmri_var_cort','rsfmri_var_subcort']
                 
parcs = ['gordon']

data_cols = [col for col in all_cols
             if any([ct for ct in measures if ct in col])
             and any([p for p in parcs if p in col])
                    ] 
rsfc = load_from_rds(data_cols)
rsfc = bp.Dataset(rsfc)
rsfc .verbose = 1
print('rsfc shape:', rsfc.shape)

rsfc shape: (11875, 333)


In [42]:
### QC section from the paper
mrirstv02 = baseline(read_file(path+'abcd_mrirstv02.txt'))[['rsfmri_var_meanmotion', 'rsfmri_var_ntpoints']]
print('mrirstv02 shape:', mrirstv02.shape)

betnet02 = baseline(read_file(path+'abcd_betnet02.txt')).iloc[:,21:-2]
features = []
for col in betnet02.columns:
    elements = col.split('_')
    if 'n' in elements:
        continue
    else:
        features.append(col)
betnet02 = betnet02[features]
print('betnet02 shape:', betnet02.shape)

subset = ['aglh', 'agrh', 'hplh', 'hprh', 'aalh', 'aarh', 'ptlh', 'ptrh', 'cdelh', 'cderh']
mrirscor02 = baseline(read_file(path + 'mrirscor02.txt'))
features = []
for col in list(mrirscor02.columns):
    elements = col.split('_')
    if('none' in elements):
        continue
    for region in subset:
        if(region in elements):
            features.append(col)
mrirscor02 = mrirscor02[features]

d1 = mrirstv02.merge(betnet02, on='subjectkey', how='outer', validate='1:1')
d2 = d1.merge(mrirscor02, on='subjectkey', how='outer', validate='1:1')
print('functional union shape:', d2.shape)
d3 = d2.merge(fsqc, on='subjectkey', how='inner', validate='1:1')
d3['fsqc_qc'] = d3['fsqc_qc'].fillna(0)
functional = d3
print('final shape:', functional.shape)

functional = functional[functional['fsqc_qc'] == 1].drop(['fsqc_qc'], axis=1)
print('functional post freesurfer qc shape:', functional.shape)

exclude_subjects = set()
#fmriqc01 = baseline(read_file(path+'fmriqc01.txt'))[['fmri_postqc_b0warp', 'fmri_postqc_imgqual', 'fmri_postqc_cutoff']]
#SK = set(fmriqc01.index.values)
#imputer = SimpleImputer(strategy='constant')
#fmriqc01[:] = imputer.fit_transform(fmriqc01)
#sk = set(fmriqc01[(fmriqc01['fmri_postqc_b0warp']<=1.5) & (fmriqc01['fmri_postqc_imgqual']<=1.5) & (fmriqc01['fmri_postqc_cutoff']<=1.5)].index.values)
#excluded_subjects = SK - sk
#exclude_subjects = exclude_subjects.union(excluded_subjects)

mrirstv02 = baseline(read_file(path+'abcd_mrirstv02.txt'))[['rsfmri_var_meanmotion', 'rsfmri_var_ntpoints']]
SK = set(mrirstv02.index.values)
sk = set(mrirstv02[mrirstv02['rsfmri_var_ntpoints']>375].index.values)
excluded_subjects = SK - sk
exclude_subjects = exclude_subjects.union(excluded_subjects)

indexes_to_keep = list(set(functional.index.values) - exclude_subjects)
functional = functional.loc[indexes_to_keep]
functional = functional.astype('float', errors='ignore')
print('functional post other qc filtering shape:', functional.shape)

mrirstv02 shape: (11309, 2)
betnet02 shape: (11309, 144)
functional union shape: (11347, 266)


NameError: name 'fsqc' is not defined

In [None]:
amy_r=pd.read_csv(path+'Right_Amy_ROI350_8_min_subs_gordon_parcel_signals.csv').set_index('src_subject_id')
amy_l=pd.read_csv(path+'Left_amy_ROI341_8_min_subs_gordon_parcel_signals.csv').set_index('src_subject_id')
amy=amy_r.merge(amy_l, on='src_subject_id', how='inner', validate='1:1')
amy.index.names = ["subjectkey"]
amy.index=['NDAR_'+sub.split('sub-NDAR')[-1] for sub in amy.index]
amy.index.names = ["subjectkey"]
amy

Unnamed: 0_level_0,ROI_1_x,ROI_2_x,ROI_3_x,ROI_4_x,ROI_5_x,ROI_6_x,ROI_7_x,ROI_8_x,ROI_9_x,ROI_10_x,...,ROI_343_y,ROI_344_y,ROI_345_y,ROI_346_y,ROI_347_y,ROI_348_y,ROI_349_y,ROI_350_y,ROI_351_y,ROI_352_y
subjectkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV003RTV85,0.139387,-0.133080,0.033838,-0.056374,0.000346,0.033346,-0.071059,-0.015134,0.217674,0.110767,...,0.276991,-0.014092,-0.007936,0.010850,0.136591,-0.117602,0.296113,0.406991,0.043604,0.208844
NDAR_INV007W6H7B,0.062801,-0.328568,0.180361,0.140085,-0.180430,0.058728,0.023009,-0.356795,0.000375,-0.036100,...,0.127283,-0.203762,0.094289,0.196047,0.053988,0.113364,0.278264,0.496836,-0.201546,0.026713
NDAR_INV00BD7VDC,0.091722,-0.011405,0.340403,0.041326,0.001664,0.170327,0.206908,-0.028080,-0.070533,0.059862,...,-0.318248,-0.508219,-0.237959,0.217917,0.159019,-0.115625,0.087089,0.720698,-0.323583,0.141851
NDAR_INV00HEV6HB,0.146349,0.022672,0.047482,-0.038345,-0.221187,0.176130,-0.102057,-0.110172,0.003761,0.118278,...,-0.234590,-0.040273,-0.041952,0.047391,0.027372,-0.112265,0.173832,0.565325,0.093353,-0.032319
NDAR_INV00J52GPG,-0.186456,0.033474,-0.043153,-0.401150,-0.211270,-0.408146,-0.399043,0.149138,0.029946,0.158684,...,-0.053659,0.244700,-0.087495,-0.181084,0.308361,-0.003262,0.387755,0.455609,-0.125023,0.166350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVZZL0VA2F,-0.006232,0.020346,-0.032550,-0.040501,-0.247395,-0.123546,-0.064687,-0.259296,-0.061981,-0.163031,...,0.211564,-0.125380,0.096187,0.055928,0.325503,0.391787,0.444704,0.398533,0.093618,0.228458
NDAR_INVZZLZCKAY,-0.208031,0.206014,0.140165,-0.058937,0.130904,-0.141844,-0.011817,-0.022315,-0.035458,-0.192558,...,0.140731,-0.005659,0.141873,0.014645,0.136544,-0.051722,0.442052,0.360468,0.212219,0.030078
NDAR_INVZZNX6W2P,0.064826,0.310015,-0.068653,-0.251647,0.178675,-0.197334,-0.264822,0.038369,0.101586,-0.098347,...,-0.046214,-0.279065,-0.127643,-0.174893,-0.080007,-0.225866,0.464864,0.658814,-0.170457,-0.432946
NDAR_INVZZZ2ALR6,0.187734,-0.003934,-0.046376,0.034443,-0.043897,-0.081355,-0.292977,0.030635,-0.039109,0.196512,...,0.070763,0.207157,0.093033,0.074020,-0.036331,0.119637,0.570515,0.629203,0.183270,0.154867


### ML

In [43]:
# Loading Target variable

targets_to_load = ['cbcl_scr_syn_internal_r']
target_file=path+'abcd_cbcls01.txt'
#targets_to_load = ['bpm_y_scr_internal_r']
#target_file=path+'abcd_bpmt01.txt'
target =pd.read_csv(target_file,delimiter = "\t",skiprows=[1],
                    usecols=['src_subject_id'] + targets_to_load,
                    index_col='src_subject_id', na_values=['777', 999, '999', 777]
                   )
target = target[~target.index.duplicated(keep='first')]
target

Unnamed: 0_level_0,cbcl_scr_syn_internal_r
src_subject_id,Unnamed: 1_level_1
NDAR_INV59BE4FA2,0.0
NDAR_INV9EVRB30H,8.0
NDAR_INVF0C17HWX,1.0
NDAR_INV0A87RKWD,0.0
NDAR_INV0AU5R8NA,3.0
...,...
NDAR_INVZTA7JACL,9.0
NDAR_INVZVYCX57J,9.0
NDAR_INVZXC2YRV3,2.0
NDAR_INVZYLV9BMB,6.0


In [93]:
### Concatenate dataframes for brain features and generate an overall dataframe with target
brain=pd.concat([
    #rsfc,
                #structural,
                dti_combo
                ]
                ,axis=1)
overall=pd.concat([
                    #final,
                    brain,
                    #nback,
                    target],axis=1).dropna()

In [94]:
data = bp.Dataset(overall,
                  targets=targets_to_load,
                  #non_inputs='rel_family_id'
                 )

data = data.drop_subjects_by_nan(scope='target')



# Split data with family stratification
data = data.dropna()

#data = data.ordinalize(scope='rel_family_id')

#family_strat = bp.CVStrategy(groups='rel_family_id')
data

Setting NaN threshold to: 0.5


Unnamed: 0_level_0,dmri_dti.fa.wm_cort.desikan_bankssts.lh,dmri_dti.fa.wm_cort.desikan_bankssts.rh,dmri_dti.fa.wm_cort.desikan_caudalanteriorcingulate.lh,dmri_dti.fa.wm_cort.desikan_caudalanteriorcingulate.rh,dmri_dti.fa.wm_cort.desikan_caudalmiddlefrontal.lh,dmri_dti.fa.wm_cort.desikan_caudalmiddlefrontal.rh,dmri_dti.fa.wm_cort.desikan_cuneus.lh,dmri_dti.fa.wm_cort.desikan_cuneus.rh,dmri_dti.fa.wm_cort.desikan_entorhinal.lh,dmri_dti.fa.wm_cort.desikan_entorhinal.rh,...,dmri_dti.full.fa_fiber.at_scs.lh,dmri_dti.full.fa_fiber.at_scs.rh,dmri_dti.full.fa_fiber.at_sifc.lh,dmri_dti.full.fa_fiber.at_sifc.rh,dmri_dti.full.fa_fiber.at_slf.lh,dmri_dti.full.fa_fiber.at_slf.rh,dmri_dti.full.fa_fiber.at_tslf.lh,dmri_dti.full.fa_fiber.at_tslf.rh,dmri_dti.full.fa_fiber.at_unc.lh,dmri_dti.full.fa_fiber.at_unc.rh
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV003RTV85,0.340157,0.349212,0.447083,0.461398,0.340449,0.311749,0.259963,0.240798,0.243204,0.268007,...,0.458440,0.501362,0.427600,0.404682,0.532783,0.504443,0.550447,0.506624,0.452455,0.443220
NDAR_INV007W6H7B,0.388828,0.312500,0.415262,0.433088,0.293610,0.260048,0.273741,0.274300,0.334242,0.294380,...,0.480223,0.537668,0.390823,0.282790,0.535807,0.516950,0.550986,0.528167,0.489879,0.406760
NDAR_INV00BD7VDC,0.383464,0.375570,0.365933,0.324137,0.362220,0.343888,0.283605,0.241835,0.216066,0.226709,...,0.419150,0.428401,0.424115,0.415025,0.493263,0.485780,0.512133,0.501521,0.496612,0.469323
NDAR_INV00CY2MDM,0.360118,0.304463,0.439397,0.425005,0.347682,0.340680,0.265267,0.245714,0.284462,0.240159,...,0.447563,0.459576,0.444766,0.420438,0.508993,0.509723,0.528875,0.522597,0.480603,0.460824
NDAR_INV00HEV6HB,0.361244,0.377172,0.485102,0.456524,0.344771,0.352633,0.225064,0.226996,0.210805,0.201679,...,0.415918,0.429651,0.410429,0.393633,0.517234,0.514664,0.529951,0.527461,0.445652,0.401750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVZZNX6W2P,0.352371,0.377136,0.514657,0.430468,0.396920,0.371269,0.281377,0.255469,0.280433,0.265119,...,0.470292,0.483663,0.406330,0.392644,0.498407,0.494570,0.505710,0.489329,0.429824,0.426006
NDAR_INVZZPKBDAC,0.366505,0.367588,0.387581,0.424887,0.328114,0.332038,0.214938,0.245684,0.221041,0.238769,...,0.421950,0.453794,0.364654,0.322565,0.466707,0.478108,0.484152,0.495683,0.437808,0.409998
NDAR_INVZZZ2ALR6,0.348939,0.403456,0.454100,0.529742,0.308415,0.292644,0.263650,0.267204,0.318626,0.295896,...,0.513671,0.539241,0.348594,0.337829,0.491605,0.463953,0.503071,0.470570,0.420897,0.427835
NDAR_INVZZZNB0XC,0.320196,0.376880,0.515554,0.441372,0.371629,0.289887,0.229527,0.233831,0.277044,0.300704,...,0.414326,0.436636,0.428477,0.396604,0.488379,0.495322,0.502859,0.504556,0.446710,0.433479

Unnamed: 0_level_0,cbcl_scr_syn_internal_r
src_subject_id,Unnamed: 1_level_1
NDAR_INV003RTV85,1.0
NDAR_INV007W6H7B,8.0
NDAR_INV00BD7VDC,5.0
NDAR_INV00CY2MDM,5.0
NDAR_INV00HEV6HB,5.0
...,...
NDAR_INVZZNX6W2P,4.0
NDAR_INVZZPKBDAC,1.0
NDAR_INVZZZ2ALR6,5.0
NDAR_INVZZZNB0XC,2.0


In [95]:
## Train-test split

data = data.set_test_split(size=.25, random_state=2)
data

Performing test split on: 10291 subjects.
random_state: 2
Test split size: 0.25

Performed train/test split
Train size: 7718
Test size:  2573


Unnamed: 0_level_0,dmri_dti.fa.wm_cort.desikan_bankssts.lh,dmri_dti.fa.wm_cort.desikan_bankssts.rh,dmri_dti.fa.wm_cort.desikan_caudalanteriorcingulate.lh,dmri_dti.fa.wm_cort.desikan_caudalanteriorcingulate.rh,dmri_dti.fa.wm_cort.desikan_caudalmiddlefrontal.lh,dmri_dti.fa.wm_cort.desikan_caudalmiddlefrontal.rh,dmri_dti.fa.wm_cort.desikan_cuneus.lh,dmri_dti.fa.wm_cort.desikan_cuneus.rh,dmri_dti.fa.wm_cort.desikan_entorhinal.lh,dmri_dti.fa.wm_cort.desikan_entorhinal.rh,...,dmri_dti.full.fa_fiber.at_scs.lh,dmri_dti.full.fa_fiber.at_scs.rh,dmri_dti.full.fa_fiber.at_sifc.lh,dmri_dti.full.fa_fiber.at_sifc.rh,dmri_dti.full.fa_fiber.at_slf.lh,dmri_dti.full.fa_fiber.at_slf.rh,dmri_dti.full.fa_fiber.at_tslf.lh,dmri_dti.full.fa_fiber.at_tslf.rh,dmri_dti.full.fa_fiber.at_unc.lh,dmri_dti.full.fa_fiber.at_unc.rh
src_subject_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NDAR_INV003RTV85,0.340157,0.349212,0.447083,0.461398,0.340449,0.311749,0.259963,0.240798,0.243204,0.268007,...,0.458440,0.501362,0.427600,0.404682,0.532783,0.504443,0.550447,0.506624,0.452455,0.443220
NDAR_INV007W6H7B,0.388828,0.312500,0.415262,0.433088,0.293610,0.260048,0.273741,0.274300,0.334242,0.294380,...,0.480223,0.537668,0.390823,0.282790,0.535807,0.516950,0.550986,0.528167,0.489879,0.406760
NDAR_INV00BD7VDC,0.383464,0.375570,0.365933,0.324137,0.362220,0.343888,0.283605,0.241835,0.216066,0.226709,...,0.419150,0.428401,0.424115,0.415025,0.493263,0.485780,0.512133,0.501521,0.496612,0.469323
NDAR_INV00CY2MDM,0.360118,0.304463,0.439397,0.425005,0.347682,0.340680,0.265267,0.245714,0.284462,0.240159,...,0.447563,0.459576,0.444766,0.420438,0.508993,0.509723,0.528875,0.522597,0.480603,0.460824
NDAR_INV00HEV6HB,0.361244,0.377172,0.485102,0.456524,0.344771,0.352633,0.225064,0.226996,0.210805,0.201679,...,0.415918,0.429651,0.410429,0.393633,0.517234,0.514664,0.529951,0.527461,0.445652,0.401750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NDAR_INVZZNX6W2P,0.352371,0.377136,0.514657,0.430468,0.396920,0.371269,0.281377,0.255469,0.280433,0.265119,...,0.470292,0.483663,0.406330,0.392644,0.498407,0.494570,0.505710,0.489329,0.429824,0.426006
NDAR_INVZZPKBDAC,0.366505,0.367588,0.387581,0.424887,0.328114,0.332038,0.214938,0.245684,0.221041,0.238769,...,0.421950,0.453794,0.364654,0.322565,0.466707,0.478108,0.484152,0.495683,0.437808,0.409998
NDAR_INVZZZ2ALR6,0.348939,0.403456,0.454100,0.529742,0.308415,0.292644,0.263650,0.267204,0.318626,0.295896,...,0.513671,0.539241,0.348594,0.337829,0.491605,0.463953,0.503071,0.470570,0.420897,0.427835
NDAR_INVZZZNB0XC,0.320196,0.376880,0.515554,0.441372,0.371629,0.289887,0.229527,0.233831,0.277044,0.300704,...,0.414326,0.436636,0.428477,0.396604,0.488379,0.495322,0.502859,0.504556,0.446710,0.433479

Unnamed: 0_level_0,cbcl_scr_syn_internal_r
src_subject_id,Unnamed: 1_level_1
NDAR_INV003RTV85,1.0
NDAR_INV007W6H7B,8.0
NDAR_INV00BD7VDC,5.0
NDAR_INV00CY2MDM,5.0
NDAR_INV00HEV6HB,5.0
...,...
NDAR_INVZZNX6W2P,4.0
NDAR_INVZZPKBDAC,1.0
NDAR_INVZZZ2ALR6,5.0
NDAR_INVZZZNB0XC,2.0


In [101]:
# Let's define a Pipeline, first by creating a series of based objects

# Standard Scaling
scaler = bp.Scaler('standard', scope='float')

# Ridge Regression model
ridge = bp.Model('ridge', params=1)
elastic = bp.Model('elastic', params=1)
lgbm = bp.Model('lgbm',params=1)
svm= bp.Model('svm',params=1)
hgb= bp.Model('elastic',params=1) 

# Parameter search strategy,
# using different evolution and a custom nested
# CV strategy.


search = bp.ParamSearch(search_type='RandomSearch',
                        n_iter=100,
                        cv=bp.CV(splits=.5,
                                 n_repeats=5))

pipe = bp.Pipeline([scaler,elastic], param_search=search)
pipe

Pipeline(param_search=ParamSearch(cv=CV(cv_strategy=CVStrategy(), n_repeats=5,
                                        splits=0.5),
                                  n_iter=100),
         steps=[Scaler(obj='standard'), Model(obj='elastic', params=1)])

In [102]:
# We can store some commonly used parameters in this
# ProblemSpec object. Though note, problem_type and scorer can both be automatically
# detected and set.
spec = bp.ProblemSpec(problem_type='regression',
                      scorer=['r2', 'explained_variance', 'neg_mean_squared_error'],
                      random_state=51,
                      n_jobs=8,
                      #scope='wm'
                      )

In [103]:
### CBCL
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      progress_bar=False,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)

results

Predicting target = cbcl_scr_syn_internal_r
Using problem_type = regression
Using scope = all (defining a total of 264 features).
Evaluating 7718 total data points.

Training Set: (5788, 264)
Validation Set: (1930, 264)
Fit fold in 42.2 seconds.
r2: -0.0047
explained_variance: 0.0005
neg_mean_squared_error: -34.02

Training Set: (5788, 264)
Validation Set: (1930, 264)
Fit fold in 22.0 seconds.
r2: 0.0021
explained_variance: 0.0033
neg_mean_squared_error: -28.40

Training Set: (5789, 264)
Validation Set: (1929, 264)
Fit fold in 21.6 seconds.
r2: 0.0009
explained_variance: 0.0010
neg_mean_squared_error: -29.97

Training Set: (5789, 264)
Validation Set: (1929, 264)
Fit fold in 20.8 seconds.
r2: -0.0002
explained_variance: 0.0009
neg_mean_squared_error: -29.46



BPtEvaluator
------------
r2: -0.0005 ± 0.0026
explained_variance: 0.0014 ± 0.0011
neg_mean_squared_error: -30.46 ± 2.13

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_', 'cv']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated With:
target: cbcl_scr_syn_internal_r
problem_type: regression
scope: all
subjects: train
random_state: 51
n_jobs: 8


In [104]:
## Feature Importance of the model
fis = results.get_fis(mean=True)
fis.sort_values()
len(fis)

46

In [105]:
fis = results.get_fis(mean=True)
fis.sort_values()

dmri_dti.fa.wm_cort.destrieux_s.parieto.occipital.rh         -0.033369
dmri_dti.fa.wm_cort.desikan_lingual.rh                       -0.032042
dmri_dti.fa.wm_cort.destrieux_g.temp.sup.g.t.transv.lh       -0.014728
dmri_dti.fa.wm_cort.destrieux_s.temporal.inf.rh              -0.011947
dmri_dti.fa.wm_cort.destrieux_s.oc.sup.and.transversal.rh    -0.008488
dmri_dti.full.fa_fiber.at_pscs.rh                            -0.007589
dmri_dti.fa.wm_cort.destrieux_pole.occipital.lh              -0.006392
dmri_dti.fa.wm_cort.destrieux_s.orbital.med.olfact.rh        -0.006107
dmri_dti.fa.wm_cort.destrieux_g.oc.temp.med.lingual.rh       -0.005880
dmri_dti.fa.wm_cort.destrieux_s.suborbital.lh                -0.005465
dmri_dti.full.fa_fiber.at_fscs.lh                            -0.003534
dmri_dti.fa.wm_cort.desikan_posteriorcingulate.rh            -0.002838
dmri_dti.full.fa_fiber.at_pscs.lh                            -0.002749
dmri_dti.full.fa_fiber.at_scs.lh                             -0.000963
dmri_d

In [106]:
feat_selector = bp.FeatSelector('selector', params=1)



# This param search is responsible for optimizing the selected features from feat_selector

# We create a nested elastic net model to optimize - no particular CV should be okay
random_search = bp.ParamSearch('RandomSearch', n_iter=100)
elastic_search = bp.Model('elastic', params=1,
                          param_search=random_search)

# Put it all together in a pipeline
fs_pipe = bp.Pipeline([scaler, feat_selector, elastic_search])

In [107]:
results = bp.evaluate(pipeline=fs_pipe, dataset=data,progress_bar=False,
                      problem_spec=spec)
results

Predicting target = cbcl_scr_syn_internal_r
Using problem_type = regression
Using scope = all (defining a total of 264 features).
Evaluating 7718 total data points.

Training Set: (6174, 264)
Validation Set: (1544, 264)
Fit fold in 16.9 seconds.
r2: -0.0049
explained_variance: -0.0008
neg_mean_squared_error: -34.01

Training Set: (6174, 264)
Validation Set: (1544, 264)
Fit fold in 10.9 seconds.
r2: 0.0001
explained_variance: 0.0005
neg_mean_squared_error: -30.61

Training Set: (6174, 264)
Validation Set: (1544, 264)
Fit fold in 9.8 seconds.
r2: 0.0039
explained_variance: 0.0042
neg_mean_squared_error: -28.53

Training Set: (6175, 264)
Validation Set: (1543, 264)
Fit fold in 9.6 seconds.
r2: 0.0006
explained_variance: 0.0013
neg_mean_squared_error: -30.82

Training Set: (6175, 264)
Validation Set: (1543, 264)
Fit fold in 9.6 seconds.
r2: -0.0015
explained_variance: 0.0006
neg_mean_squared_error: -28.34



BPtEvaluator
------------
r2: -0.0004 ± 0.0029
explained_variance: 0.0012 ± 0.0017
neg_mean_squared_error: -30.46 ± 2.05

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_', 'cv']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated With:
target: cbcl_scr_syn_internal_r
problem_type: regression
scope: all
subjects: train
random_state: 51
n_jobs: 8


In [108]:
fis = results.get_fis(mean=True)
fis.sort_values()


dmri_dti.fa.wm_cort.desikan_lingual.rh                       -0.066392
dmri_dti.fa.wm_cort.destrieux_s.parieto.occipital.rh         -0.060035
dmri_dti.fa.wm_cort.destrieux_g.oc.temp.med.lingual.rh       -0.008373
dmri_dti.full.fa_fiber.at_scs.rh                             -0.007804
dmri_dti.fa.wm_cort.desikan_transversetemporal.lh            -0.005930
dmri_dti.fa.wm_cort.destrieux_s.oc.temp.med.and.lingual.rh   -0.003627
dmri_dti.full.fa_fiber.at_fscs.rh                            -0.000954
dmri_dti.fa.wm_cort.destrieux_g.cuneus.lh                    -0.000527
dmri_dti.full.fa_fiber.at_pscs.lh                            -0.000021
dmri_dti.fa.wm_cort.destrieux_g.temporal.inf.rh               0.000005
dmri_dti.fa.wm_cort.destrieux_s.front.inf.lh                  0.000357
dmri_dti.fa.wm_cort.desikan_superiorfrontal.lh                0.001502
dmri_dti.fa.wm_cort.destrieux_lat.fis.ant.vertical.lh         0.001559
dmri_dti.fa.wm_cort.destrieux_g.precentral.rh                 0.002800
dmri_d

In [None]:
### lgbm with cov bpm
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      progress_bar=False,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_r
Using problem_type = regression
Using scope = all (defining a total of 30 features).
Evaluating 3134 total data points.

Training Set: (2350, 30)
Validation Set: (784, 30)


Exception ignored in: <function tqdm.__del__ at 0x000002AA7F9F7950>
Traceback (most recent call last):
  File "C:\Users\simon\AppData\Roaming\Python\Python37\site-packages\tqdm\std.py", line 1147, in __del__
    self.close()
  File "C:\Users\simon\AppData\Roaming\Python\Python37\site-packages\tqdm\notebook.py", line 286, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


Fit fold in 3.6 seconds.
r2: 0.1825
explained_variance: 0.1830
neg_mean_squared_error: -20.61

Training Set: (2350, 30)
Validation Set: (784, 30)
Fit fold in 3.5 seconds.
r2: 0.2701
explained_variance: 0.2702
neg_mean_squared_error: -20.90

Training Set: (2351, 30)
Validation Set: (783, 30)
Fit fold in 3.4 seconds.
r2: 0.2072
explained_variance: 0.2079
neg_mean_squared_error: -22.09

Training Set: (2351, 30)
Validation Set: (783, 30)
Fit fold in 3.4 seconds.
r2: 0.2591
explained_variance: 0.2591
neg_mean_squared_error: -21.92



BPtEvaluator
------------
r2: 0.2297 ± 0.0361
explained_variance: 0.2300 ± 0.0359
neg_mean_squared_error: -21.38 ± 0.6371

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_', 'cv']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated With:
target: cbcl_scr_syn_internal_r
problem_type: regression
scope: all
subjects: train
random_state: 51
n_jobs: 8


In [None]:
### elastic internalizing with all covariates and prs
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_r
Using problem_type = regression
Using scope = all (defining a total of 30 features).
Evaluating 3134 total data points.


ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html

In [None]:
### elastic internalizing all no parental history
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_t
Using problem_type = regression
Using scope = all (defining a total of 397 features).
Evaluating 5318 total data points.


Folds:   0%|          | 0/4 [00:00<?, ?it/s]


Train size: 3988 - Val size: 1330
Fit fold in 36.397 seconds.
r2: 0.12831103085801654
explained_variance: 0.12843002393999303
neg_mean_squared_error: -96.85926592195611

Train size: 3988 - Val size: 1330
Fit fold in 30.490 seconds.
r2: 0.12113014133082545
explained_variance: 0.12197106981899819
neg_mean_squared_error: -97.71013698309838

Train size: 3989 - Val size: 1329
Fit fold in 32.028 seconds.
r2: 0.13021923413321335
explained_variance: 0.13083577468798568
neg_mean_squared_error: -93.88792892396408

Train size: 3989 - Val size: 1329
Fit fold in 32.488 seconds.
r2: 0.13257665860509493
explained_variance: 0.1325851668477036
neg_mean_squared_error: -96.2235039844999



BPtEvaluator
------------
mean_scores = {'r2': 0.12805926623178757, 'explained_variance': 0.12845550882367013, 'neg_mean_squared_error': -96.17020895337961}
std_scores = {'r2': 0.004276343429548157, 'explained_variance': 0.004023938980181508, 'neg_mean_squared_error': 1.4193150051757732}

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated with:
ProblemSpec(n_jobs=8, problem_type='regression', random_state=51,
            scorer={'explained_variance': make_scorer(explained_variance_score),
                    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
                    'r2': make_scorer(r2_score)},
            subjects='train', target='cbcl_scr_syn_internal_t')

In [None]:
### elastic internalizing cov only no parental history
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_t
Using problem_type = regression
Using scope = all (defining a total of 17 features).
Evaluating 7268 total data points.


Folds:   0%|          | 0/4 [00:00<?, ?it/s]


Train size: 5451 - Val size: 1817
Fit fold in 21.666 seconds.
r2: 0.1183595450352094
explained_variance: 0.12008179610234138
neg_mean_squared_error: -99.98286465477453

Train size: 5451 - Val size: 1817
Fit fold in 17.366 seconds.
r2: 0.10368315598619171
explained_variance: 0.10888452731162612
neg_mean_squared_error: -97.38104602059458

Train size: 5451 - Val size: 1817
Fit fold in 17.388 seconds.
r2: 0.0874825263087573
explained_variance: 0.08847115529255534
neg_mean_squared_error: -101.46310869291649

Train size: 5451 - Val size: 1817
Fit fold in 16.753 seconds.
r2: 0.1251391714344876
explained_variance: 0.1289352556887453
neg_mean_squared_error: -96.77727095680798



BPtEvaluator
------------
mean_scores = {'r2': 0.1086660996911615, 'explained_variance': 0.11159318359881704, 'neg_mean_squared_error': -98.9010725812734}
std_scores = {'r2': 0.01448187215550371, 'explained_variance': 0.015122574338426684, 'neg_mean_squared_error': 1.9075715722420588}

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated with:
ProblemSpec(n_jobs=8, problem_type='regression', random_state=51,
            scorer={'explained_variance': make_scorer(explained_variance_score),
                    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
                    'r2': make_scorer(r2_score)},
            subjects='train', target='cbcl_scr_syn_internal_t')

In [None]:
### elastic internalizing cov only
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_t
Using problem_type = regression
Using scope = all (defining a total of 18 features).
Evaluating 7268 total data points.


Folds:   0%|          | 0/4 [00:00<?, ?it/s]


Train size: 5451 - Val size: 1817
Fit fold in 22.650 seconds.
r2: 0.14655284770043975
explained_variance: 0.14782473828950482
neg_mean_squared_error: -96.7855894518559

Train size: 5451 - Val size: 1817
Fit fold in 17.963 seconds.
r2: 0.1160290894934084
explained_variance: 0.12023257576647028
neg_mean_squared_error: -96.03971239837946

Train size: 5451 - Val size: 1817
Fit fold in 17.451 seconds.
r2: 0.11150450625816988
explained_variance: 0.11246574842529611
neg_mean_squared_error: -98.7920970872242

Train size: 5451 - Val size: 1817
Fit fold in 18.581 seconds.
r2: 0.14804737001912605
explained_variance: 0.15164488890809125
neg_mean_squared_error: -94.24316168002957



BPtEvaluator
------------
mean_scores = {'r2': 0.13053345336778602, 'explained_variance': 0.13304198784734061, 'neg_mean_squared_error': -96.46514015437228}
std_scores = {'r2': 0.016851080608940656, 'explained_variance': 0.016971006931839873, 'neg_mean_squared_error': 1.630612878308093}

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated with:
ProblemSpec(n_jobs=8, problem_type='regression', random_state=51,
            scorer={'explained_variance': make_scorer(explained_variance_score),
                    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
                    'r2': make_scorer(r2_score)},
            subjects='train', target='cbcl_scr_syn_internal_t')

In [None]:
### elastic internalizing cov only
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_t
Using problem_type = regression
Using scope = all (defining a total of 18 features).
Evaluating 7268 total data points.


Folds:   0%|          | 0/4 [00:00<?, ?it/s]


Train size: 5451 - Val size: 1817
Fit fold in 22.522 seconds.
r2: 0.1468642952234971
explained_variance: 0.14813183991452594
neg_mean_squared_error: -96.75026959400506

Train size: 5451 - Val size: 1817
Fit fold in 17.998 seconds.
r2: 0.1160290894934084
explained_variance: 0.12023257576647028
neg_mean_squared_error: -96.03971239837946

Train size: 5451 - Val size: 1817
Fit fold in 17.885 seconds.
r2: 0.11150450625816988
explained_variance: 0.11246574842529611
neg_mean_squared_error: -98.7920970872242

Train size: 5451 - Val size: 1817
Fit fold in 18.335 seconds.
r2: 0.14804737001912605
explained_variance: 0.15164488890809125
neg_mean_squared_error: -94.24316168002957



BPtEvaluator
------------
mean_scores = {'r2': 0.13061131524855035, 'explained_variance': 0.1331187632535959, 'neg_mean_squared_error': -96.45631018990957}
std_scores = {'r2': 0.016925475043535292, 'explained_variance': 0.017038270540495278, 'neg_mean_squared_error': 1.6289484804326015}

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated with:
ProblemSpec(n_jobs=8, problem_type='regression', random_state=51,
            scorer={'explained_variance': make_scorer(explained_variance_score),
                    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
                    'r2': make_scorer(r2_score)},
            subjects='train', target='cbcl_scr_syn_internal_t')

In [None]:
### elastic internalizing dti only
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_t
Using problem_type = regression
Using scope = all (defining a total of 89 features).
Evaluating 6576 total data points.


Folds:   0%|          | 0/4 [00:00<?, ?it/s]


Train size: 4932 - Val size: 1644
Fit fold in 28.432 seconds.
r2: 0.11508706138607572
explained_variance: 0.11701070088958998
neg_mean_squared_error: -94.86345045504217

Train size: 4932 - Val size: 1644
Fit fold in 24.502 seconds.
r2: 0.10680526480064689
explained_variance: 0.1068889340270559
neg_mean_squared_error: -103.41746951802959

Train size: 4932 - Val size: 1644
Fit fold in 23.822 seconds.
r2: 0.1324180240785926
explained_variance: 0.13242644555444272
neg_mean_squared_error: -96.2421318962984

Train size: 4932 - Val size: 1644
Fit fold in 23.541 seconds.
r2: 0.13465339783919894
explained_variance: 0.1359619773819586
neg_mean_squared_error: -100.2449139597064



BPtEvaluator
------------
mean_scores = {'r2': 0.12224093702612854, 'explained_variance': 0.1230720144632618, 'neg_mean_squared_error': -98.69199145726915}
std_scores = {'r2': 0.011694872787201306, 'explained_variance': 0.01175040573478336, 'neg_mean_squared_error': 3.3690224574614063}

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated with:
ProblemSpec(n_jobs=8, problem_type='regression', random_state=51,
            scorer={'explained_variance': make_scorer(explained_variance_score),
                    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
                    'r2': make_scorer(r2_score)},
            subjects='train', target='cbcl_scr_syn_internal_t')

In [None]:
### elastic internalizing dti
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

Predicting target = cbcl_scr_syn_internal_t
Using problem_type = regression
Using scope = all (defining a total of 468 features).
Evaluating 5201 total data points.


Folds:   0%|          | 0/4 [00:00<?, ?it/s]


Train size: 3900 - Val size: 1301
Fit fold in 35.734 seconds.
r2: 0.1061702149764937
explained_variance: 0.10621314023632722
neg_mean_squared_error: -94.35693289387672

Train size: 3901 - Val size: 1300
Fit fold in 32.509 seconds.
r2: 0.12665506397440818
explained_variance: 0.1269798088524331
neg_mean_squared_error: -97.50976362086429

Train size: 3901 - Val size: 1300
Fit fold in 32.162 seconds.
r2: 0.12881473999724302
explained_variance: 0.12993691704209953
neg_mean_squared_error: -94.30894684816529

Train size: 3901 - Val size: 1300
Fit fold in 32.283 seconds.
r2: 0.13186716127986986
explained_variance: 0.1324105551689032
neg_mean_squared_error: -95.50904483923601



BPtEvaluator
------------
mean_scores = {'r2': 0.12337679505700369, 'explained_variance': 0.12388510532494076, 'neg_mean_squared_error': -95.42117205053559}
std_scores = {'r2': 0.010105332861084842, 'explained_variance': 0.010382476343798103, 'neg_mean_squared_error': 1.2980356560969097}

Saved Attributes: ['estimators', 'preds', 'timing', 'train_subjects', 'val_subjects', 'feat_names', 'ps', 'mean_scores', 'std_scores', 'weighted_mean_scores', 'scores', 'fis_', 'coef_']

Avaliable Methods: ['get_X_transform_df', 'get_preds_dfs', 'get_fis', 'get_coef_', 'permutation_importance']

Evaluated with:
ProblemSpec(n_jobs=8, problem_type='regression', random_state=51,
            scorer={'explained_variance': make_scorer(explained_variance_score),
                    'neg_mean_squared_error': make_scorer(mean_squared_error, greater_is_better=False),
                    'r2': make_scorer(r2_score)},
            subjects='train', target='cbcl_scr_syn_internal_t')

In [None]:
### elastic internalizing
results = bp.evaluate(pipeline=pipe,
                      dataset=data,
                      problem_spec=spec,
                      subjects='train', # This line is important! It says we only want to evaluate on the train set
                      cv=4)
results

NameError: name 'bp' is not defined

In [None]:
pip install vtk

Note: you may need to restart the kernel to use updated packages.




  


In [None]:
from enigmatoolbox.datasets import load_summary_stats

# Load summary statistics for ENIGMA-Epilepsy
sum_stats = load_summary_stats('epilepsy')

# Get case-control subcortical volume and cortical thickness tables
SV = sum_stats['SubVol_case_vs_controls_ltle']
CT = sum_stats['CortThick_case_vs_controls_ltle']

# Extract Cohen's d values
SV_d = SV['d_icv']
CT_d = CT['d_icv']

16

In [None]:
stop_success[0:9]

Unnamed: 0,Features,Mean,StD,corr/SSRT,Beta.Mean
0,subcort.aseg_accumbens.area.lh,0.014775,0.185177,-0.07,-0.109016
1,subcort.aseg_accumbens.area.rh,0.017513,0.191345,-0.073586,-0.077661
2,subcort.aseg_amygdala.lh,0.014086,0.157135,-0.071502,-0.028379
3,subcort.aseg_amygdala.rh,0.020511,0.166054,-0.106774,-2.034516
4,subcort.aseg_caudate.lh,0.0215,0.162266,-0.105964,-0.248061
5,subcort.aseg_hippocampus.lh,0.004826,0.121976,-0.056222,0.361653
6,subcort.aseg_pallidum.lh,-0.001257,0.130407,-0.063536,-0.738379
7,subcort.aseg_putamen.lh,0.008222,0.138045,-0.126512,-4.913341
8,subcort.aseg_putamen.rh,0.025021,0.143475,-0.122096,-3.15112


16

In [None]:
stop_fail[0:15]

Unnamed: 0,Features,Mean,StD,corr/SSRT,Beta.Mean
0,subcort.aseg_accumbens.area.lh,-0.025442,0.210583,0.013935,1.474339
1,subcort.aseg_accumbens.area.rh,-0.019668,0.20722,0.00906,0.248416
2,subcort.aseg_amygdala.lh,-0.020487,0.167504,-0.021734,0.162025
3,subcort.aseg_amygdala.rh,-0.014237,0.177322,-0.034298,0.17331
4,subcort.aseg_brain.stem,0.05168,0.158235,-0.043105,-1.081468
5,subcort.aseg_caudate.lh,0.009734,0.173849,-0.014795,-1.50754
6,subcort.aseg_caudate.rh,0.012214,0.178521,-0.013993,-0.228852
7,subcort.aseg_cerebellum.cortex.lh,0.074778,0.247166,-0.019125,-1.882696
8,subcort.aseg_cerebellum.cortex.rh,0.058933,0.245482,-0.009997,-0.291043
9,subcort.aseg_pallidum.rh,0.005347,0.137829,0.020647,0.371621


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74])