# 1. Create parcellation for RNN models:

- Input: Denoised timeseries data (nii.gz)
- Output: one csv with parcellations as columns, and time points as rows, per subject per run
- Behavioral data are added for convenience later
- Atlas: schaffer 2018 17network or 7network
- N_ROI: 100

*Yiyu Wang 2022 November*




In [1]:
import numpy as np
import pandas as pd
import glob
import os
import nibabel as nib
from scipy.stats import zscore

# 1. Parcellation

In [3]:
# parcellate:
make_parcellation = True

# which task data to load:
task = 'Mem'

if task == 'Novel':
    included_data = pd.read_csv('/work/abslab/AVFP/Preproc_Scripts/included_AVFP_novel_subjects.csv', header=None)
    run_list = [3,4,5]
elif task == 'Mem':
    included_data = pd.read_csv('/work/abslab/AVFP/Preproc_Scripts/included_AVFP_memory_subjects.csv', header=None)
    run_list = [1,2]
    
subIDs = included_data[0].astype('str').tolist()


data_dir = '/work/abslab/Yiyu/AVFP_analysis/denoised_visreg/'

print(subIDs)
total_subs = len(subIDs)
print(f"total subs = {total_subs}")

TR_LENGTH = 0.8

['100', '103', '104', '105', '106', '107', '108', '110', '111', '112', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '132', '133', '134', '135', '136', '137', '139', '140', '142', '143', '144', '145', '146', '147', '149', '150', '151', '152', '153', '154', '157', '158', '159', '160', '161', '162', '163', '164', '166', '169', '170', '171', '172', '174', '176', '177', '179', '180', '181', '182', '183', '184', '185']
total subs = 70


In [4]:
# create whole brain atlas and labels

from nilearn import plotting, image, input_data
N_ROI = 100
which_atlas = 'network7'

if which_atlas == 'network17':
    # make parcellation masker from pre-saved atlas nifti and atlas labels tsv:
    roi_dir = '/work/abslab/Yiyu/masks/'

    atlas_img = image.load_img(roi_dir + f'tpl-MNI152NLin6Asym_res-02_atlas-Schaefer2018_desc-{N_ROI}Parcels17Networks_dseg.nii.gz')
    atlas_data = image.get_data(atlas_img)

    atlas_info = pd.read_csv(roi_dir + f'tpl-MNI152NLin6Asym_atlas-Schaefer2018_desc-{N_ROI}Parcels17Networks_dseg.tsv', delimiter='\t')
    atlas_info.set_index('index', inplace=True)
    my_atlas = atlas_info.reset_index().rename(columns={'index':'new_label'})
    wholebrain_data = atlas_data
    max_label = my_atlas.new_label.max()
    wholebrain_img = image.new_img_like(atlas_img, wholebrain_data, affine=atlas_img.affine)
    wholebrain_img.to_filename(f'/work/abslab/Yiyu/dnn/masks/wholebrain_atlas_{N_ROI}.nii.gz')

    my_atlas[['name','new_label']].to_csv(f'/work/abslab/Yiyu/dnn/masks/wholebrain_atlas-{which_atlas}_{N_ROI}.csv', index=False, header=True)

    masker = input_data.NiftiLabelsMasker(
        labels_img=labels_img=wholebrain_img,
        detrend=True,
        standardize=True,
        verbose=1)

    # plot:
    plotting.view_img_on_surf(wholebrain_img, threshold=1,
                          vol_to_surf_kwargs={"n_samples": 1, "radius": 0.0,
                                              "interpolation": "nearest"},
                          title='ROIs', vmax = np.max(wholebrain_data),
                          symmetric_cmap=False, cmap="gist_ncar")

elif which_atlas == 'network7':
    from nilearn import datasets
    atlas = datasets.fetch_atlas_schaefer_2018(n_rois=N_ROI,yeo_networks=7, resolution_mm=2)
    masker = input_data.NiftiLabelsMasker(
        labels_img=atlas['maps'],
        labels=atlas['labels'],
        detrend=True,
        standardize=True,
        memory='nilearn_cache',
        verbose=1)




In [10]:
# fetch onset files:
def GetSubjectOnset(this_sub, run, task):
    if task == 'Novel':
        logfiles_dir = '/work/abslab/AVFP/logfiles/AffVidsNovel_logfiles'
    elif task == 'Mem':
        logfiles_dir = '/work/abslab/AVFP/logfiles/AffVidsMem_logfiles'    
    onset_files = glob.glob(logfiles_dir + "/*.txt")        
    # column names of onset files:
    col_names = ['video_name','video_number','video_category','novel_familiar','run_number',
                 'video_onset','video_offset','video_duration_method1','video_duration_method2',
                 'fear_rating_onset','fear_rating','fear_rating_RT',
                 'arousal_rating_onset','arousal_rating','arousal_rating_RT',
                 'valence_rating_onset','valence_rating','valence_rating_RT']  
    # Load onsets, for both runs:
    onset_file = [i for i in onset_files if 'sub_' + this_sub in i][0]
    onset_data = pd.read_csv(onset_file, delimiter=' ', header=None)
    onset_data = onset_data.iloc[:,0:18] #remove extra cols
    onset_data.columns=col_names
    onset_data['subID'] = this_sub
        
    onset_data = onset_data.loc[onset_data.run_number==run]
    
    
    # remove nan from ratings:
    return onset_data
 
    
# concatenate the subject information to parcellation df:
def AddSubjectInfoToDf(parcellated_data_df, run_df, hemo_dynamic_lag = 5):
    #parcellated_data_df[['video_name','video_category','fear','valence','arousal']]=0
    parcellated_data_df = parcellated_data_df.reindex(columns=[*parcellated_data_df.columns.tolist(), 'video_name','video_category','fear','valence','arousal'], fill_value=0)
    
    for trial_idx,trial_df in run_df.iterrows():

        start_tr = int(trial_df.video_onset/TR_LENGTH) + hemo_dynamic_lag
        video_duration = 20 # using fix durations instead of using trial_df.video_duration_method2 for RNN
        video_duration_in_trs = int(video_duration/TR_LENGTH)
        end_tr = start_tr + video_duration_in_trs
        parcellated_data_df.iloc[start_tr:end_tr,-5]=trial_df.video_name
        parcellated_data_df.iloc[start_tr:end_tr,-4]=trial_df.video_category
        parcellated_data_df.iloc[start_tr:end_tr,-3]=trial_df.fear_rating
        parcellated_data_df.iloc[start_tr:end_tr,-2]=trial_df.valence_rating
        parcellated_data_df.iloc[start_tr:end_tr,-1]=trial_df.arousal_rating
    
    return parcellated_data_df

In [12]:
# parcellate: 
if make_parcellation:
    save_dir = f'/work/abslab/Yiyu/dnn/AVFP_parcellation/wholebrain_schaeffer_atlas-{which_atlas}_{N_ROI}/'
    for subject in subIDs:
        print(subject)
        for run in run_list:
            file_name = data_dir + f'{subject}/sub-{subject}_run-{run}_AVFP_denoised_novideoregs.nii.gz'
            denoised_img =  nib.load(file_name)
            parcellated_data = masker.fit_transform(denoised_img)
            
            parcellated_data_df = pd.DataFrame(parcellated_data,columns=atlas_info['name'])
            
            # add behavioral information to all TRs, adjusted for HRF: video name, fear rating, valence rating, arousal rating
            run_df = GetSubjectOnset(subject, run, task)
            parcellated_data_df = AddSubjectInfoToDf(parcellated_data_df, run_df)
            
            parcellated_data_df.to_csv(save_dir+f'/par-{subject}_run-{run}_schaefer_{N_ROI}.csv',index=False)

100
103
104
105
106
107
108
110
111
112
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
132
133
134
135
136
137
139
140
142
143
144
145
146
147
149
150
151
152
153
154
157
158
159
160
161
162
163
164
166
169
170
171
172
174
176
177
179
180
181
182
183
184
185


In [None]:
# get a quick visual
parcelatted_data.head()