In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from glob import glob 
from os.path import join as opj
import os
import re
from scipy.stats import ttest_rel
import seaborn as sns

def cleaning(df):
    '''
    clearning up files for conditions
    '''
    
    df['n_pic'] = df['npic'].str.split('_', expand=True)[[0]]
    df['n_int'] = pd.to_numeric(df['npic'], errors='coerce')
    df['TR'] = df['onset'].apply(np.floor).astype('int')
    
    tmp = df['condition'].str.split('/', expand=True)
    
    df['pair'] = tmp[[0]].squeeze().str.extract('(\w+)')
    
    tmp1 = tmp[[1]].squeeze().str.split(',', expand=True)
    df['destination'] = tmp1[[0]].squeeze().str.extract('(\w+)')
    df['valid'] = pd.to_numeric(tmp1[[1]].squeeze(), errors='coerce').apply(lambda x: {0: True, 1: False}.get(x, None))
    df['catch'] = tmp1[[3]].squeeze().notnull()
    
    def segment(x):
        if x <= 25:
            return 'same'
        elif x <= 50:
            return 'early-similar'
        elif x <= 75:
            return 'late-similar'
        elif x <= 100:
            return 'different'
        else:
            return None

    
    df['segment'] = df['n_int'].apply(segment)
    
    return df

def cleaning2(df):
    '''
    remove duplicated lines for multiple pictures
    only save one line per second
    '''
    
    df = df.loc[df['catch'] == False]
    df = df.loc[df['segment'].notnull()]
    df = df.drop(columns=['onset', 'design_onset', 'design_end', 'n_pic', 'npic', 'condition', 'n_int', 'catch'])
    
    df = df.drop_duplicates()
    df['within_trial_TR'] = df.groupby(['sub','round','trial'])['TR'].rank(method = 'dense').astype('int')
    #df['odd_even'] = df['round'].apply(lambda x: 'even' if x%2 == 0 else 'odd')
    
    df['round'] = df['round'].astype('int')
    df['trial'] = df['trial'].astype('int')

    return df

def cleaning3(fmri_df):
    '''
    quick cleaning fMRI dataframe
    '''
    fmri_df.rename(columns={'Unnamed: 0':'TR'}, inplace=True)
    fmri_df['round'] = fmri_df['run'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df['sub'] = fmri_df['sub'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df = fmri_df.drop(columns=['run', 'roi'])
    return fmri_df

def pairwise_correlation(curr_tr_df):
    properties = curr_tr_df.iloc[:, :9]
    # calculate correlation for every trial combination
    corr_df = curr_tr_df.T.iloc[9:].astype(float).corr() 
    # taking only the upper triangle of the correlation matrix
    corr_df = corr_df.where(np.triu(np.ones(corr_df.shape)).astype(np.bool))
    # reorganize into long format
    corr_df = corr_df.stack().reset_index()
    # rename columns
    corr_df.columns = ['x', 'y', 'cor']
    overall_df = corr_df.merge(properties, right_index=True, left_on = 'x', how='left').merge(properties, right_index=True, left_on = 'y', how='left')

    return overall_df

def per_tr_calculation(df):
    outputs = []
    trs = df['within_trial_TR'].unique()
    for curr_tr in trs:

        curr_tr_df = df.loc[df['within_trial_TR'] == curr_tr]
        curr_tr_output = pairwise_correlation(curr_tr_df)
        outputs.append(curr_tr_output)

    output_df = pd.concat(outputs)
    output_df['roi'] = roi
    
    return output_df

def save_file(subnum, output_df, file_name):
    sub_out_dir = opj(output_dir, 'sub-MONSTERA{}'.format(subnum))
    if not os.path.isdir(sub_out_dir):
        os.makedirs(sub_out_dir)
    
    out_file = opj(sub_out_dir, file_name)
    output_df.to_csv(out_file, index=False)

In [2]:
    
rois_dict = {
    'ca1-body_thre_0.5_masked':'ca1-body',
    'ca23dg-body_thre_0.5_masked':'ca23dg-body',
    
    #'ca23dg_thre_0.5_masked':'ca23dg',
    #'ca1_thre_0.5_masked':'ca1', 
    'evc_2_epi_thre_0.5_masked':'evc', 
    'ppa_mni_2_epi_thre_0.5_masked':'ppa'
}

behav_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/behavior/"
preprocess_dir = '/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/preprocess'
output_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/python/"
fMRI_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/fMRI/"

In [3]:

f_list = [x for x in glob(os.path.join(preprocess_dir, '*sub-MONSTERA*/'))]
subs = list(map(lambda f: f[len(os.path.commonpath(f_list))+1:-1], f_list))
subs.sort()
print(subs)

bad = ['sub-MONSTERA01', 'sub-MONSTERA02', 'sub-MONSTERA03', 'sub-MONSTERA04', 'sub-MONSTERA05',
        'sub-MONSTERA13', 'sub-MONSTERA14', 'sub-MONSTERA20', 'sub-MONSTERA23', 'sub-MONSTERA24', 'sub-MONSTERA27', 
        'sub-MONSTERA30', 'sub-MONSTERA34']

todo_subs = list(set(subs) - set(bad))
todo_subs.sort()
print(todo_subs)

['sub-MONSTERA01', 'sub-MONSTERA02', 'sub-MONSTERA03', 'sub-MONSTERA04', 'sub-MONSTERA05', 'sub-MONSTERA06', 'sub-MONSTERA07', 'sub-MONSTERA08', 'sub-MONSTERA09', 'sub-MONSTERA10', 'sub-MONSTERA11', 'sub-MONSTERA12', 'sub-MONSTERA13', 'sub-MONSTERA14', 'sub-MONSTERA15', 'sub-MONSTERA16', 'sub-MONSTERA17', 'sub-MONSTERA18', 'sub-MONSTERA19', 'sub-MONSTERA20', 'sub-MONSTERA21', 'sub-MONSTERA22', 'sub-MONSTERA23', 'sub-MONSTERA24', 'sub-MONSTERA25', 'sub-MONSTERA26', 'sub-MONSTERA27', 'sub-MONSTERA28', 'sub-MONSTERA29', 'sub-MONSTERA31', 'sub-MONSTERA32', 'sub-MONSTERA33', 'sub-MONSTERA35', 'sub-MONSTERA36', 'sub-MONSTERA37', 'sub-MONSTERA38', 'sub-MONSTERA39', 'sub-MONSTERA40', 'sub-MONSTERA41', 'sub-MONSTERA42', 'sub-MONSTERA43', 'sub-MONSTERA44', 'sub-MONSTERA45', 'sub-MONSTERA46', 'sub-MONSTERA47', 'sub-MONSTERA48', 'sub-MONSTERA49', 'sub-MONSTERA50', 'sub-MONSTERA51', 'sub-MONSTERA52', 'sub-MONSTERA53']
['sub-MONSTERA06', 'sub-MONSTERA07', 'sub-MONSTERA08', 'sub-MONSTERA09', 'sub-MON

In [4]:
len(todo_subs)

40

In [5]:
postscan_summary = pd.read_csv('/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/scripts/R-analysis/csv_files/postscan_summary.csv')

In [6]:
postscan_summary = postscan_summary.drop(columns = ['m','max','min','median','range','n'])


In [7]:
postscan_summary


Unnamed: 0,route,sub,mode
0,pair1_east,6,6
1,pair1_east,8,7
2,pair1_east,10,8
3,pair1_east,12,8
4,pair1_east,16,9
...,...,...,...
75,pair4_south,45,9
76,pair4_south,47,9
77,pair4_south,49,9
78,pair4_south,51,9


In [8]:
postscan_summary.loc[postscan_summary['mode'] <=6]


Unnamed: 0,route,sub,mode
0,pair1_east,6,6
11,pair1_east,38,6
16,pair1_east,48,6
61,pair4_south,11,6
63,pair4_south,17,6


## Between MoI and destination (not including MoI or destination)
Average first then correlate within each trial

In [23]:
df['segment']

0            same
1            same
2            same
3            same
4            same
          ...    
2875    different
2876    different
2877    different
2878    different
2879    different
Name: segment, Length: 2880, dtype: object

In [None]:
results = pd.DataFrame()
for sub in todo_subs:
    
    subnum = re.findall('\d+', sub)[0]
    print('---{}---'.format(subnum))
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    
    behav_df = cleaning2(behav_df_tmp)
    
    behav_df = behav_df.merge(postscan_summary.rename(columns={"route": "pair"}), on=['sub', 'pair'], how='left')
    behav_df['ceiling'] = behav_df['mode'] + 1
    behav_df['floor'] = behav_df['mode'] - 1
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
                
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
              
        df = behav_df.merge(rolling_df, on=['sub', 'round', 'TR'], how='left')
        same_df = df.loc[(df['segment']=='same')].reset_index(drop = True)
        same_avg_df = same_df.groupby(['sub','round','trial','pair','destination','valid']).mean().drop(columns=['within_trial_TR','TR','mode','ceiling','floor']).reset_index()
        
        moi_df = df.loc[(df['within_trial_TR']<=df['ceiling']+6) & (df['within_trial_TR']>df['ceiling'])].reset_index(drop = True)
        moi_avg_df = moi_df.groupby(['sub','round','trial','pair','destination','valid']).mean().drop(columns=['within_trial_TR','TR','mode','ceiling','floor']).reset_index()
        
        numeric_moi_df = moi_avg_df[moi_avg_df.columns[pd.to_numeric(moi_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
        numeric_same_df = same_avg_df[same_avg_df.columns[pd.to_numeric(same_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)

        result_df = moi_avg_df[['sub','round','trial','pair','destination','valid']].copy().reset_index(drop = True)
        result_df['cor'] = numeric_moi_df.corrwith(numeric_same_df, axis = 1) 

        result_df['roi'] = roi

        results =  pd.concat([results, result_df]).reset_index(drop = True) 

In [27]:
results.to_csv('/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/scripts/R-analysis/csv_files/3s_post_MoI_within_trials_avg_then_correlations.csv', index=False)



## Each timepoint correlate to MoI or Destination separately

In [None]:
results = pd.DataFrame()
for sub in todo_subs:
    subnum = re.findall('\d+', sub)[0]
    print('---{}---'.format(subnum))
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    
    behav_df = cleaning2(behav_df_tmp)
    
    behav_df = behav_df.merge(postscan_summary.rename(columns={"route": "pair"}), on=['sub', 'pair'], how='left')
    behav_df['ceiling'] = behav_df['mode'] + 1
    behav_df['floor'] = behav_df['mode'] - 1
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
        
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')

        df = behav_df.merge(rolling_df, on=['sub', 'round', 'TR'], how='left')
        same_df = df.loc[(df['segment']=='same')].reset_index(drop = True)
        
        moi_df = df.loc[(df['within_trial_TR']<=df['ceiling']) & (df['within_trial_TR']>=df['floor'])].reset_index(drop = True)
        moi_avg_df = moi_df.groupby(['sub','round','trial','pair','destination','valid']).mean().drop(columns=['within_trial_TR','TR','mode','ceiling','floor']).reset_index()

        for row in range(moi_avg_df.shape[0]):
            curr_trial_moi = moi_avg_df.iloc[row]
            curr_trial = same_df.loc[(same_df['sub']==curr_trial_moi['sub']) & (same_df['trial']==curr_trial_moi['trial']) & (same_df['round']==curr_trial_moi['round'])]

            numeric_df = curr_trial[curr_trial.columns[pd.to_numeric(curr_trial.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
            curr_trial_moi = moi_avg_df.iloc[[row],:]
            numeric_moi_df = curr_trial_moi[curr_trial_moi.columns[pd.to_numeric(curr_trial_moi.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)

            result_df = curr_trial[['sub','round','trial','pair','destination','valid','within_trial_TR']].copy().reset_index(drop = True)
            result_df['cor'] = numeric_df.corrwith(numeric_moi_df.iloc[0], axis = 1) 

            result_df['roi'] = roi

            results =  pd.concat([results, result_df]).reset_index(drop = True)

In [29]:
results.to_csv('/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/scripts/R-analysis/csv_files/MoI_within_trials_correlations.csv', index=False)

In [None]:
results = pd.DataFrame()
for sub in todo_subs:
    subnum = re.findall('\d+', sub)[0]
    print('---{}---'.format(subnum))
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    
    dest_behav_df = behav_df_tmp[behav_df_tmp["npic"].str.contains("destination")]
    dest_behav_df = dest_behav_df.drop(columns=['onset', 'design_onset', 'design_end', 'n_pic', 'npic', 'condition', 'n_int', 'catch', 'segment'])
    extra = dest_behav_df.copy()
    extra['TR'] = extra['TR']+1
    dest_behav_df = pd.concat([dest_behav_df, extra])
    
    behav_df = cleaning2(behav_df_tmp)
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
        
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
        
        # problem with rolling function, so calculating the last sec manually
        last_two_sec = fmri_df.loc[(fmri_df['TR']==450) | (fmri_df['TR']==451)]
        last_two_sec_avg = last_two_sec.groupby(['sub','round']).mean().reset_index()
        last_two_sec_avg['TR'] = 451
        
        # Combine together to get fmri data
        fmri_df = pd.concat([rolling_df.dropna(), last_two_sec_avg]).reset_index(drop = True)
        
        # template for each destination for each trial
        dest_df = dest_behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left').groupby(['sub', 'round', 'trial', 'pair', 'destination', 'valid']).mean().reset_index().drop(columns = 'TR')
        
        df = behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left')
        same_df = df.loc[(df['segment']=='same')].reset_index().drop(columns = 'TR')
        
        for row in range(dest_df.shape[0]):
            curr_trial_dest = dest_df.iloc[row]
            curr_trial = same_df.loc[(same_df['sub']==curr_trial_dest['sub']) & (same_df['trial']==curr_trial_dest['trial']) & (same_df['round']==curr_trial_dest['round'])]
        
            #numeric_same_df = curr_trial[curr_trial.columns[pd.to_numeric(curr_trial.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
            #numeric_dest_df = curr_trial_dest[curr_trial_dest.columns[pd.to_numeric(curr_trial_dest.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
        
            numeric_df = curr_trial[curr_trial.columns[pd.to_numeric(curr_trial.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
            curr_trial_dest = dest_df.iloc[[row],:]
            numeric_dest_df = curr_trial_dest[curr_trial_dest.columns[pd.to_numeric(curr_trial_dest.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
        
            result_df = curr_trial[['sub','round','trial','pair','destination','valid','within_trial_TR']].copy().reset_index(drop = True)
            result_df['cor'] = numeric_df.corrwith(numeric_dest_df.iloc[0], axis = 1) 
            
            result_df['roi'] = roi
            
            results =  pd.concat([results, result_df]).reset_index(drop = True)

In [21]:
results.to_csv('/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/scripts/R-analysis/csv_files/destinations_within_trials_correlations.csv', index=False)

## Average within same segment then correlate to MoI or Destination

In [None]:
results = pd.DataFrame()
for sub in todo_subs:
    
    subnum = re.findall('\d+', sub)[0]
    print('---{}---'.format(subnum))
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    
    dest_behav_df = behav_df_tmp[behav_df_tmp["npic"].str.contains("destination")]
    dest_behav_df = dest_behav_df.drop(columns=['onset', 'design_onset', 'design_end', 'n_pic', 'npic', 'condition', 'n_int', 'catch', 'segment'])
    extra = dest_behav_df.copy()
    extra['TR'] = extra['TR']+1
    dest_behav_df = pd.concat([dest_behav_df, extra])
    
    behav_df = cleaning2(behav_df_tmp)
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
                
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
        
        # problem with rolling function, so calculating the last sec manually
        last_two_sec = fmri_df.loc[(fmri_df['TR']==450) | (fmri_df['TR']==451)]
        last_two_sec_avg = last_two_sec.groupby(['sub','round']).mean().reset_index()
        last_two_sec_avg['TR'] = 451
        
        # Combine together to get fmri data
        fmri_df = pd.concat([rolling_df.dropna(), last_two_sec_avg]).reset_index(drop = True)
        
        # same segment
        df = behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left')
        same_df = df.loc[(df['segment']=='same')].reset_index(drop = True).drop(columns = 'TR')
        same_avg_df = same_df.groupby(['sub','round','trial','pair','destination','valid']).mean().reset_index().drop(columns = 'within_trial_TR')
               
        # each destination for each trial
        dest_df = dest_behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left')
        '''
        last_sec_trial = df.loc[df['within_trial_TR'] ==24]
        
        if(subnum == '47'):
            print('fixing problematic trial')
            problem_trial = df.loc[(df['round']==1) & (df['trial']==1)].loc[df['within_trial_TR']==23]
            print(problem_trial)
            last_sec_trial = pd.concat([last_sec_trial, problem_trial])
        if(last_sec_trial.shape[0] != 120):
            print('===============subject with extra within_trial_TR===============')
        dest_avg_df = pd.concat([dest_df, last_sec_trial]).groupby(['sub','round','trial','pair','destination','valid']).mean().reset_index().drop(columns = 'TR')
        '''
        dest_avg_df = dest_df.groupby(['sub','round','trial','pair','destination','valid']).mean().reset_index().drop(columns = 'TR')

        
        numeric_dest_df = dest_avg_df[dest_avg_df.columns[pd.to_numeric(dest_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
        numeric_same_df = same_avg_df[same_avg_df.columns[pd.to_numeric(same_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)

        result_df = dest_avg_df[['sub','round','trial','pair','destination','valid']].copy().reset_index(drop = True)
        result_df['cor'] = numeric_dest_df.corrwith(numeric_same_df, axis = 1) 

        result_df['roi'] = roi

        results =  pd.concat([results, result_df]).reset_index(drop = True) 

In [12]:
results.to_csv('/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/scripts/R-analysis/csv_files/destinations_within_trials_avg_then_correlations.csv', index=False)

In [None]:
results = pd.DataFrame()
for sub in todo_subs:
    
    subnum = re.findall('\d+', sub)[0]
    print('---{}---'.format(subnum))
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    
    behav_df = cleaning2(behav_df_tmp)
    
    behav_df = behav_df.merge(postscan_summary.rename(columns={"route": "pair"}), on=['sub', 'pair'], how='left')
    behav_df['ceiling'] = behav_df['mode'] + 1
    behav_df['floor'] = behav_df['mode'] - 1
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
                
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
              
        df = behav_df.merge(rolling_df, on=['sub', 'round', 'TR'], how='left')
        same_df = df.loc[(df['segment']=='same')].reset_index(drop = True)
        same_avg_df = same_df.groupby(['sub','round','trial','pair','destination','valid']).mean().drop(columns=['within_trial_TR','TR','mode','ceiling','floor']).reset_index()
        
        moi_df = df.loc[(df['within_trial_TR']<=df['ceiling']) & (df['within_trial_TR']>=df['floor'])].reset_index(drop = True)
        moi_avg_df = moi_df.groupby(['sub','round','trial','pair','destination','valid']).mean().drop(columns=['within_trial_TR','TR','mode','ceiling','floor']).reset_index()
        
        numeric_moi_df = moi_avg_df[moi_avg_df.columns[pd.to_numeric(moi_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
        numeric_same_df = same_avg_df[same_avg_df.columns[pd.to_numeric(same_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)

        result_df = moi_avg_df[['sub','round','trial','pair','destination','valid']].copy().reset_index(drop = True)
        result_df['cor'] = numeric_moi_df.corrwith(numeric_same_df, axis = 1) 

        result_df['roi'] = roi

        results =  pd.concat([results, result_df]).reset_index(drop = True) 

In [16]:
results.to_csv('/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/scripts/R-analysis/csv_files/MoI_within_trials_avg_then_correlations.csv', index=False)