In [4]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from glob import glob 
from os.path import join as opj
import os

In [5]:
def cleaning(df):
    '''
    clearning up files for conditions
    '''
    
    df['n_pic'] = df['npic'].str.split('_', expand=True)[[0]]
    df['TR'] = df['onset'].apply(np.floor).astype('int')
    
    tmp = df['condition'].str.split('/', expand=True)
    
    df['pair'] = tmp[[0]].squeeze().str.extract('(\w+)')
    
    tmp1 = tmp[[1]].squeeze().str.split(',', expand=True)
    df['destination'] = tmp1[[0]].squeeze().str.extract('(\w+)')
    df['valid'] = pd.to_numeric(tmp1[[1]].squeeze(), errors='coerce').apply(lambda x: {0: True, 1: False}.get(x, None))
    df['catch'] = tmp1[[3]].squeeze().notnull()
    
    def segment(x):
        if x <= 25:
            return 'same'
        elif x <= 75:
            return 'similar'
        elif x <= 100:
            return 'different'
        else:
            return None

    df['n_int'] = pd.to_numeric(df['npic'], errors='coerce')
    df['segment'] = df['n_int'].apply(segment)
    
    return df

def cleaning2(df):
    '''
    remove duplicated lines for multiple pictures
    only save one line per second
    '''
    
    df = df.loc[df['catch'] == False]
    df = df.loc[df['segment'].notnull()]
    df = df.drop(columns=['onset', 'design_onset', 'design_end', 'n_pic', 'npic', 'condition', 'n_int', 'catch'])
    
    df = df.drop_duplicates()
    df['within_trial_TR'] = df.groupby(['sub','round','trial'])['TR'].rank(method = 'dense').astype('int')
    #df['odd_even'] = df['round'].apply(lambda x: 'even' if x%2 == 0 else 'odd')
    
    df['round'] = df['round'].astype('int')
    df['trial'] = df['trial'].astype('int')

    return df

def cleaning3(fmri_df):
    '''
    quick cleaning fMRI dataframe
    '''
    fmri_df.rename(columns={'Unnamed: 0':'TR'}, inplace=True)
    fmri_df['round'] = fmri_df['run'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df['sub'] = fmri_df['sub'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df = fmri_df.drop(columns=['run', 'roi'])
    return fmri_df

def pairwise_correlation(curr_tr_df):
    properties = curr_tr_df.iloc[:, :9]
    # calculate correlation for every trial combination
    corr_df = curr_tr_df.T.iloc[9:].astype(float).corr() 
    # taking only the upper triangle of the correlation matrix
    corr_df = corr_df.where(np.triu(np.ones(corr_df.shape)).astype(np.bool))
    # reorganize into long format
    corr_df = corr_df.stack().reset_index()
    # rename columns
    corr_df.columns = ['x', 'y', 'cor']
    overall_df = corr_df.merge(properties, right_index=True, left_on = 'x', how='left').merge(properties, right_index=True, left_on = 'y', how='left')

    return overall_df

def per_tr_calculation(df):
    outputs = []
    trs = df['within_trial_TR'].unique()
    for curr_tr in trs:

        curr_tr_df = df.loc[df['within_trial_TR'] == curr_tr]
        curr_tr_output = pairwise_correlation(curr_tr_df)
        outputs.append(curr_tr_output)

    output_df = pd.concat(outputs)
    output_df['roi'] = roi
    
    return output_df

def save_file(subnum, output_df, file_name):
    sub_out_dir = opj(output_dir, 'sub-MONSTERA{}'.format(subnum))
    if not os.path.isdir(sub_out_dir):
        os.makedirs(sub_out_dir)
    
    out_file = opj(sub_out_dir, file_name)
    output_df.to_csv(out_file, index=False)

def summarize(df):
    # remove same round correlations
    df = df.loc[df['round_x'] != df['round_y']]
    
    # define trial type
    conditions = [
    (df['pair_x'] != df['pair_y']),
    (df['destination_x'] != df['destination_y']),
    (df['destination_x'] == df['destination_y'])
    ]
    values = ['across', 'within', 'same']
    df['type'] = np.select(conditions, values)
    
    # define valid type
    conditions = [
    (df['valid_x'] != df['valid_y']),
    (df['valid_x'] == True),
    (df['valid_x'] == False)
        ]
    values = ['valid-invalid', 'valid-valid', 'invalid-invalid']
    df['valid'] = np.select(conditions, values)
    
    # mean correlations
    df = df.groupby(['type','valid','within_trial_TR_x'])['cor'].mean().reset_index()
    df['within_trial_TR'] = df['within_trial_TR_x']
    df = df.drop(columns=['within_trial_TR_x'])
    
    return df

In [6]:
rois_dict = {
    'ca23dg-body_thre_0.5_masked':'ca23dg-body',
    'ca1-body_thre_0.5_masked':'ca1-body',
    'ca23dg_thre_0.5_masked':'ca23dg',
    'ca1_thre_0.5_masked':'ca1', 
    'evc_2_epi_thre_0.5_masked':'evc', 
    'ppa_mni_2_epi_thre_0.5_masked':'ppa'
}

fMRI_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/fMRI/"
all_subs = os.listdir(fMRI_dir)

output_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/python/"
processed_subs = os.listdir(output_dir)

todo_subs = list(set(all_subs) - set(processed_subs))
todo_subs.remove('sub-MONSTERA14')

behav_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/behavior/"
sub_dir = os.listdir(behav_dir)

behav_subnums = [x[-2:] for x in sub_dir]
todo_subnums = [x[-2:] for x in todo_subs]

todo_subnums = list(set(behav_subnums) & set(todo_subnums))

In [9]:
print(processed_subs)

['sub-MONSTERA31', 'sub-MONSTERA12', 'sub-MONSTERA08', 'sub-MONSTERA23', 'sub-MONSTERA25', 'sub-MONSTERA13', 'sub-MONSTERA26', 'sub-MONSTERA07', 'sub-MONSTERA17', 'sub-MONSTERA16', 'sub-MONSTERA15', 'sub-MONSTERA19', 'sub-MONSTERA20', 'sub-MONSTERA37', 'sub-MONSTERA36', 'sub-MONSTERA33', 'sub-MONSTERA09', 'sub-MONSTERA35', 'sub-MONSTERA32', 'sub-MONSTERA22', 'sub-MONSTERA24', 'sub-MONSTERA10', 'sub-MONSTERA18', 'sub-MONSTERA29', 'sub-MONSTERA06', 'sub-MONSTERA21', 'sub-MONSTERA27', 'sub-MONSTERA28', 'sub-MONSTERA11']


In [8]:
len(todo_subnums)

0

In [7]:
for subnum in todo_subnums:
    print('---{}---'.format(subnum))
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    behav_df = cleaning2(behav_df_tmp)
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
        
        # calculating no rolling data
        df = behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left')
        output_df = per_tr_calculation(df)
        #save_file(subnum, output_df, 'sub-MONSTERA{}_norolling_{}.csv'.format(subnum, roi))
        
        summary_df = summarize(output_df)
        #save_file(subnum, summary_df, 'sub-MONSTERA{}_norolling_{}_summary.csv'.format(subnum, roi))
        
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(3, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
        df = behav_df.merge(rolling_df, on=['sub', 'round', 'TR'], how='left')
        output_df = per_tr_calculation(df)
        #save_file(subnum, output_df, 'sub-MONSTERA{}_rolling3_{}.csv'.format(subnum, roi))
        
        summary_df = summarize(output_df)
        #save_file(subnum, summary_df, 'sub-MONSTERA{}_rolling3_{}_summary.csv'.format(subnum, roi))
        break

---09---
ca23dg-body_thre_0.5_masked


In [8]:
outputs = []
trs = df['within_trial_TR'].unique()

In [16]:
for curr_tr in trs:
    print(curr_tr)
    curr_tr_df = df.loc[df['within_trial_TR'] == curr_tr]
    curr_tr_output = pairwise_correlation(curr_tr_df)
    outputs.append(curr_tr_output)
    break

1


In [19]:
curr_tr_df

Unnamed: 0,sub,round,trial,TR,pair,destination,valid,segment,within_trial_TR,0,...,103,104,105,106,107,108,109,110,111,112
0,9,9,3,73,pair4_south,entrance,True,same,1,0.518210,...,-0.461881,-0.821217,-0.009233,-0.293390,-1.345732,-0.455121,0.491168,-0.906832,0.662693,0.103059
24,9,9,4,103,pair2_north,pole,True,same,1,-0.557125,...,-1.280117,0.034079,-0.175521,-0.552651,0.557634,0.353203,0.394860,-0.755425,0.038868,-0.178959
48,9,9,5,133,pair4_south,entrance,True,same,1,-0.317038,...,0.622422,0.813524,0.419770,0.693380,0.341525,0.044736,-0.287099,0.061546,-0.084701,-0.390041
72,9,9,6,163,pair2_north,pole,True,same,1,-0.069573,...,0.085622,0.088497,-0.632427,-0.624725,0.378804,0.000230,-0.283490,-0.562156,-0.203746,-0.375220
96,9,9,7,193,pair4_south,map,True,same,1,0.764725,...,0.602259,-0.017476,0.088813,0.302120,-0.982497,-0.072598,0.438216,-0.579906,0.790680,0.185732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2760,9,8,8,237,pair4_south,entrance,True,same,1,0.073726,...,-0.732864,-0.776224,-1.369052,-0.630596,-0.237702,-0.361690,-0.350008,-0.455051,0.182339,-0.365726
2784,9,8,11,309,pair4_south,entrance,False,same,1,-0.444932,...,-0.771131,-0.674327,0.420796,-0.486502,0.840830,0.535524,0.155928,0.166933,-0.246883,0.149418
2808,9,8,12,339,pair2_north,window,True,same,1,-0.516202,...,0.295186,-1.091799,-0.693039,-0.634675,-0.592067,0.014666,-0.201174,-0.839629,0.519569,-0.260950
2832,9,8,13,369,pair2_north,window,False,same,1,1.172527,...,-0.408189,-0.324663,-0.699182,-0.633651,-0.917683,-0.380010,0.573508,0.566889,-0.313395,-0.134581


In [20]:
curr_tr_df.T.iloc[9:].astype(float)

Unnamed: 0,0,24,48,72,96,120,144,168,192,216,...,2640,2664,2688,2712,2736,2760,2784,2808,2832,2856
0,0.518210,-0.557125,-0.317038,-0.069573,0.764725,0.035163,-0.687721,-0.056536,-0.321833,0.321294,...,-0.247656,0.679995,0.906095,-0.105899,-0.277200,0.073726,-0.444932,-0.516202,1.172527,-0.377394
1,-0.313539,0.617016,0.076280,-0.547941,-0.411885,0.289737,-0.524639,-0.644331,0.099315,-0.390180,...,0.495320,-0.014727,-0.380680,0.091954,0.688572,0.798150,-0.385865,0.470942,-0.391231,-0.144955
2,-0.150686,-0.099812,0.125056,-0.048817,-1.554681,0.276073,0.703302,0.303745,-0.295495,0.036739,...,0.929032,0.338689,-0.696447,0.298437,0.901908,-0.056171,-0.076538,-0.607672,-0.696150,-0.402706
3,-0.302785,0.502287,-0.114655,0.219965,-0.965877,-0.413443,-1.432097,-0.298671,0.497406,0.475733,...,1.565423,-0.449228,-0.165449,-0.134996,0.360656,0.720695,-0.398448,0.433329,0.502584,-0.233787
4,-0.325259,0.333260,-0.623211,-0.064407,-0.285645,0.843864,1.198741,-0.262120,0.432167,-0.520373,...,0.480273,0.196127,-0.239723,0.408416,0.393270,-0.590890,-0.337012,0.562256,0.073487,-0.173300
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108,-0.455121,0.353203,0.044736,0.000230,-0.072598,0.294839,-0.022212,-0.069968,0.405574,-0.341622,...,-0.083063,0.190363,-0.221084,-0.010687,0.500922,-0.361690,0.535524,0.014666,-0.380010,-0.592890
109,0.491168,0.394860,-0.287099,-0.283490,0.438216,-0.671820,0.254311,-1.183308,-0.136032,-0.187056,...,-0.594334,-0.657377,0.183774,-0.334196,0.132307,-0.350008,0.155928,-0.201174,0.573508,-0.084157
110,-0.906832,-0.755425,0.061546,-0.562156,-0.579906,-0.367714,-0.019951,0.465776,-0.324288,0.087255,...,0.094430,0.540572,-0.344207,-0.135022,-0.260857,-0.455051,0.166933,-0.839629,0.566889,-0.252734
111,0.662693,0.038868,-0.084701,-0.203746,0.790680,-0.711993,0.632217,0.802076,-0.246225,-0.435792,...,0.960814,0.113779,-0.084311,-0.122071,0.300684,0.182339,-0.246883,0.519569,-0.313395,0.126863


In [37]:
corr_df = curr_tr_df.T.iloc[9:].astype(float).corr() 
# taking only the upper triangle of the correlation matrix
corr_df = corr_df.where(np.triu(np.ones(corr_df.shape), k = 1).astype(np.bool))

In [38]:
corr_df.head()

Unnamed: 0,0,24,48,72,96,120,144,168,192,216,...,2640,2664,2688,2712,2736,2760,2784,2808,2832,2856
0,,-0.001161,0.026577,-0.140345,0.077217,0.032713,-0.079503,0.119816,0.275461,-0.086823,...,-0.014014,-0.055752,0.027075,0.085446,0.006711,-0.129766,-0.081285,0.022387,0.187792,-0.067677
24,,,-0.047901,-0.146379,0.009935,0.079824,-0.088758,-0.036812,0.129389,0.074192,...,0.099493,-0.167879,-0.053592,0.022627,0.060989,-0.03998,0.203755,-0.071761,0.007382,-0.144673
48,,,,0.009743,-0.129226,0.021476,-0.136392,0.016295,0.053655,-0.129342,...,0.051052,0.041051,-0.238184,0.01855,0.020834,0.079705,-0.121368,-0.082898,-0.197785,0.175693
72,,,,,0.125541,-0.208638,-0.055358,0.011991,-0.224301,-0.119121,...,0.038809,0.223893,0.016238,0.037555,-0.066008,-0.003666,0.011158,0.050497,0.162511,0.037507
96,,,,,,-0.058697,0.167063,0.115987,-0.094612,0.018854,...,0.029457,0.06218,0.075711,-0.027597,0.012978,0.038053,0.02546,0.043493,0.166357,-0.124398


In [39]:
corr_df = corr_df.stack().reset_index()

In [40]:
corr_df

Unnamed: 0,level_0,level_1,0
0,0,24,-0.001161
1,0,48,0.026577
2,0,72,-0.140345
3,0,96,0.077217
4,0,120,0.032713
...,...,...,...
7135,2784,2832,-0.057541
7136,2784,2856,-0.215120
7137,2808,2832,0.174338
7138,2808,2856,0.119831


In [43]:
properties

Unnamed: 0,sub,round,trial,TR,pair,destination,valid,segment,within_trial_TR
0,9,9,3,73,pair4_south,entrance,True,same,1
24,9,9,4,103,pair2_north,pole,True,same,1
48,9,9,5,133,pair4_south,entrance,True,same,1
72,9,9,6,163,pair2_north,pole,True,same,1
96,9,9,7,193,pair4_south,map,True,same,1
...,...,...,...,...,...,...,...,...,...
2760,9,8,8,237,pair4_south,entrance,True,same,1
2784,9,8,11,309,pair4_south,entrance,False,same,1
2808,9,8,12,339,pair2_north,window,True,same,1
2832,9,8,13,369,pair2_north,window,False,same,1


In [64]:
properties = curr_tr_df.iloc[:, :9]
# rename columns
corr_df.columns = ['x', 'y', 'cor']
overall_df = corr_df.merge(properties, 
                           right_index=True, 
                           left_on = 'x', 
                           how='left')

In [65]:
print(overall_df.iloc[0])

x                            0
y                           24
cor                -0.00116066
sub                          9
round                        9
trial                        3
TR                          73
pair               pair4_south
destination           entrance
valid                     True
segment                   same
within_trial_TR              1
Name: 0, dtype: object


In [66]:

overall_df = overall_df.merge(properties,
                              right_index=True,
                              left_on = 'y',
                              how='left')

In [67]:
print(overall_df.iloc[0])

x                              0
y                             24
cor                  -0.00116066
sub_x                          9
round_x                        9
trial_x                        3
TR_x                          73
pair_x               pair4_south
destination_x           entrance
valid_x                     True
segment_x                   same
within_trial_TR_x              1
sub_y                          9
round_y                        9
trial_y                        4
TR_y                         103
pair_y               pair2_north
destination_y               pole
valid_y                     True
segment_y                   same
within_trial_TR_y              1
Name: 0, dtype: object


In [11]:
def pairwise_correlation(curr_tr_df):
    properties = curr_tr_df.iloc[:, :9]
    # calculate correlation for every trial combination
    corr_df = curr_tr_df.T.iloc[9:].astype(float).corr() 
    # taking only the upper triangle of the correlation matrix
    corr_df = corr_df.where(np.triu(np.ones(corr_df.shape)).astype(np.bool))
    # reorganize into long format
    corr_df = corr_df.stack().reset_index()
    # rename columns
    corr_df.columns = ['x', 'y', 'cor']
    overall_df = corr_df.merge(properties, 
                               right_index=True, 
                               left_on = 'x', 
                               how='left', 
                               suffixes = [None, '_x']).merge(properties, 
                                                              right_index=True, 
                                                              left_on = 'y', 
                                                              how='left',
                                                              suffixes = [None, '_y'])

    return overall_df

def per_tr_calculation(df):
    outputs = []
    trs = df['within_trial_TR'].unique()
    for curr_tr in trs:

        curr_tr_df = df.loc[df['within_trial_TR'] == curr_tr]
        curr_tr_output = pairwise_correlation(curr_tr_df)
        outputs.append(curr_tr_output)

    output_df = pd.concat(outputs)
    output_df['roi'] = roi
    
    return output_df