In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from glob import glob 
from os.path import join as opj
import os

In [2]:
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeaveOneGroupOut 
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import scipy.stats as stats 

In [3]:
def cleaning(df):
    '''
    clearning up files for conditions
    '''
    
    df['n_pic'] = df['npic'].str.split('_', expand=True)[[0]]
    df['TR'] = df['onset'].apply(np.floor).astype('int')
    
    tmp = df['condition'].str.split('/', expand=True)
    
    df['pair'] = tmp[[0]].squeeze().str.extract('(\w+)')
    
    tmp1 = tmp[[1]].squeeze().str.split(',', expand=True)
    df['destination'] = tmp1[[0]].squeeze().str.extract('(\w+)')
    df['valid'] = pd.to_numeric(tmp1[[1]].squeeze(), errors='coerce').apply(lambda x: {0: True, 1: False}.get(x, None))
    df['catch'] = tmp1[[3]].squeeze().notnull()
    
    def segment(x):
        if x <= 25:
            return 'same'
        elif x <= 75:
            return 'similar'
        elif x <= 100:
            return 'different'
        else:
            return None

    df['n_int'] = pd.to_numeric(df['npic'], errors='coerce')
    df['segment'] = df['n_int'].apply(segment)
    
    return df

def cleaning2(df):
    '''
    remove duplicated lines for multiple pictures
    only save one line per second
    '''
    
    df = df.loc[df['catch'] == False]
    df = df.loc[df['segment'].notnull()]
    df = df.drop(columns=['onset', 'design_onset', 'design_end', 'n_pic', 'npic', 'condition', 'n_int', 'catch'])
    
    df = df.drop_duplicates()
    df['within_trial_TR'] = df.groupby(['sub','round','trial'])['TR'].rank(method = 'dense').astype('int')
    
    df['round'] = df['round'].astype('int')
    df['trial'] = df['trial'].astype('int')
    
    if subnum == 47:
        df = df.loc[df['within_trial_TR']!=25]
        
        problem_trial = df.loc[(df['round']==1)&(df['trial']==1)]
        added_sec = pd.DataFrame([[47,1,1,50,'pair2_north','pole','False','similar', 'na']], columns = problem_trial.columns)
        problem_trial = pd.concat([problem_trial, added_sec])
        problem_trial['within_trial_TR'] = problem_trial.groupby(['sub','round','trial'])['TR'].rank(method = 'dense').astype('int')
        problem_trial = problem_trial.sort_values(by=['within_trial_TR'])
        
        print(df.shape)
        df = df.loc[(df['round']!=1)|(df['trial']!=1)]
        print(df.shape)
        df = pd.concat([df, problem_trial])

    return df

def cleaning3(fmri_df):
    '''
    quick cleaning fMRI dataframe
    '''
    fmri_df.rename(columns={'Unnamed: 0':'TR'}, inplace=True)
    fmri_df['round'] = fmri_df['run'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df['sub'] = fmri_df['sub'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df = fmri_df.drop(columns=['run', 'roi'])
    return fmri_df



In [169]:
rois_dict = {
    'ca1-body_thre_0.5_masked':'ca1-body',
    'ca23dg-body_thre_0.5_masked':'ca23dg-body',
    'ppa_mni_2_epi_thre_0.5_masked':'ppa',
    'evc_2_epi_thre_0.5_masked':'evc'
}

'''
'lpc_2_epi_thre_0.5_masked':'lpc'
'pfc_2_epi_thre_0.5_masked':'pfc',
'ca23dg_thre_0.5_masked':'ca23dg',

'ofc_orbital_2_epi_thre_0.5_masked':'ofc',
'rsc_cingul-Post-dorsal_2_epi_thre_0.5_masked': 'rsc',



'''

behav_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/behavior/"
fMRI_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/fMRI/"
all_subs = os.listdir(fMRI_dir)

bads = ['01', '02', '03', '04', '05', '13', '14', '20', '23', '24', '27', '30', '34']
for bad in bads:
    all_subs = [x for x in all_subs if bad not in x ]

all_subs.sort()
subnums = [x[-2:] for x in all_subs]

In [170]:
len(subnums)

40

In [171]:
postscan_summary = pd.read_csv('/projects/kuhl_lab/wanjiag/MONSTERA/derivatives/scripts/R-analysis/csv_files/postscan_summary.csv')
postscan_summary = postscan_summary.drop(columns = ['m','max','min','median','range','n'])

In [172]:
predictions = []
info = []
for subnum in subnums:
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    
    dest_behav_df = behav_df_tmp[behav_df_tmp["npic"].str.contains("destination")]
    dest_behav_df = dest_behav_df.drop(columns=['onset', 'design_onset', 'design_end', 'n_pic', 'npic', 'condition', 'n_int', 'catch', 'segment'])
    extra = dest_behav_df.copy()
    extra['TR'] = extra['TR']+1
    dest_behav_df = pd.concat([dest_behav_df, extra])
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
                
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
              
        # problem with rolling function, so calculating the last sec manually
        last_two_sec = fmri_df.loc[(fmri_df['TR']==450) | (fmri_df['TR']==451)]
        last_two_sec_avg = last_two_sec.groupby(['sub','round']).mean().reset_index()
        last_two_sec_avg['TR'] = 451
        
        # Combine together to get fmri data
        fmri_df = pd.concat([rolling_df.dropna(), last_two_sec_avg]).reset_index(drop = True)
        
        # avg for each destination for each trial
        dest_df = dest_behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left').groupby(['sub', 'round', 'trial', 'pair', 'destination', 'valid']).mean().reset_index().drop(columns = 'TR') 
            
        # no round 2 for sub29
        if subnum == '29':
            dest_df = dest_df.loc[dest_df['round'] != 2]
            
        print(len(pd.to_numeric(dest_df.columns, errors='coerce').to_series().notnull()))
        
        for pair in dest_df.pair.unique():
            
            curr_df = dest_df.loc[dest_df.pair == pair].reset_index(drop = True)
        
            X = curr_df[curr_df.columns[pd.to_numeric(curr_df.columns, errors='coerce').to_series().notnull()]]
            #numeric_same_df = same_avg_df[same_avg_df.columns[pd.to_numeric(same_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
            y = curr_df.destination
            groups = curr_df['round']

            scalar = StandardScaler()
            lr = LogisticRegression(penalty = 'l2', dual = True, solver = 'liblinear', max_iter=1000, C=0.001, random_state = 315)
            #svc = LinearSVC(penalty = 'l2', C=0.001, random_state = 315)
            pipeline = Pipeline([('transformer', scalar), ('estimator', lr)])

            logo = LeaveOneGroupOut()
            results = cross_val_score(pipeline, X, y, cv = logo, groups = groups, scoring = 'accuracy')
            
            info.append([subnum, roi])
            predictions.append(results)
            



96




ca23dg-body_thre_0.5_masked
107
ppa_mni_2_epi_thre_0.5_masked




422
evc_2_epi_thre_0.5_masked




1112
ca1-body_thre_0.5_masked




147
ca23dg-body_thre_0.5_masked




84
ppa_mni_2_epi_thre_0.5_masked




409




evc_2_epi_thre_0.5_masked
726


In [173]:
len(predictions)

320

In [174]:
df_pred = pd.DataFrame(predictions)
df_pred['mean'] = df_pred.mean(axis=1)

In [175]:
df_info = pd.DataFrame(info, columns = ['sub','roi'])

In [176]:
df_pred['sub'] = df_info['sub']
df_pred['roi'] = df_info['roi']

In [177]:
df_pred_sub = df_pred.groupby(['sub','roi'])['mean'].mean().reset_index()

In [178]:
df_pred_sub.groupby(['roi']).mean()

Unnamed: 0_level_0,mean
roi,Unnamed: 1_level_1
ca1-body,0.521667
ca23dg-body,0.486667
evc,0.727639
ppa,0.513611


In [179]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred_sub.loc[df_pred_sub['roi']=='ca1-body']['mean'], popmean=0.5) 
print(t_statistic , p_value)

2.531474213138302 0.015500763884311436


In [180]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred_sub.loc[df_pred_sub['roi']=='ca23dg-body']['mean'], popmean=0.5) 
print(t_statistic , p_value)

-1.6126615453042397 0.11488208966508487


In [181]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred_sub.loc[df_pred_sub['roi']=='ppa']['mean'], popmean=0.5) 
print(t_statistic , p_value)

1.349742890964635 0.18488131077136627


In [182]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred_sub.loc[df_pred_sub['roi']=='evc']['mean'], popmean=0.5) 
print(t_statistic , p_value)

14.38464487612447 3.503428358293055e-17


In [None]:
predictions = []
info = []
for subnum in subnums:
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    
    behav_df = cleaning2(behav_df_tmp)
    
    behav_df = behav_df.merge(postscan_summary.rename(columns={"route": "pair"}), on=['sub', 'pair'], how='left')
    behav_df['ceiling'] = behav_df['mode'] + 1
    behav_df['floor'] = behav_df['mode'] - 1
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
                
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
              
        df = behav_df.merge(rolling_df, on=['sub', 'round', 'TR'], how='left')
        #same_df = df.loc[(df['segment']=='same')].reset_index(drop = True)
        #same_avg_df = same_df.groupby(['sub','round','trial','pair','destination','valid']).mean().drop(columns=['within_trial_TR','TR','mode','ceiling','floor']).reset_index()
        
        moi_df = df.loc[(df['within_trial_TR']<=df['ceiling']) & (df['within_trial_TR']>=df['floor'])].reset_index(drop = True)
        moi_avg_df = moi_df.groupby(['sub','round','trial','pair','destination','valid']).mean().drop(columns=['within_trial_TR','TR','mode','ceiling','floor']).reset_index()
        
        # no round 2 for sub29
        if subnum == '29':
            moi_avg_df = moi_avg_df.loc[moi_avg_df['round'] != 2]
        
        print(len(pd.to_numeric(moi_avg_df.columns, errors='coerce').to_series().notnull()))
        
        for pair in moi_avg_df.pair.unique():
            
            curr_df = moi_avg_df.loc[moi_avg_df.pair == pair].reset_index(drop = True)
        
            X = curr_df[curr_df.columns[pd.to_numeric(curr_df.columns, errors='coerce').to_series().notnull()]]
            #numeric_same_df = same_avg_df[same_avg_df.columns[pd.to_numeric(same_avg_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
            y = curr_df.destination
            groups = curr_df['round']

            scalar = StandardScaler()
            lr = LogisticRegression(penalty = 'l2', dual = True, solver = 'liblinear', max_iter=1000, C=0.001, random_state = 315)
            #svc = LinearSVC(penalty = 'l2', C=0.001, random_state = 315)
            pipeline = Pipeline([('transformer', scalar), ('estimator', lr)])

            logo = LeaveOneGroupOut()
            results = cross_val_score(pipeline, X, y, cv = logo, groups = groups, scoring = 'accuracy')
            print(results.mean())
            predictions.append(results)
            info.append([subnum, roi])

In [151]:
df_pred = pd.DataFrame(predictions)
df_pred['mean'] = df_pred.mean(axis=1)

In [152]:
df_info = pd.DataFrame(info, columns = ['sub','roi'])
df_pred['sub'] = df_info['sub']
df_pred['roi'] = df_info['roi']

In [153]:
df_pred_sub = df_pred.groupby(['sub','roi'])['mean'].mean().reset_index()

In [154]:
df_pred_sub.groupby(['roi']).mean()

Unnamed: 0_level_0,mean
roi,Unnamed: 1_level_1
ca1-body,0.509074
ca23dg-body,0.512847


In [155]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred_sub.loc[df_pred_sub['roi']=='ca1-body']['mean'], popmean=0.5) 
print(t_statistic , p_value)

1.0532249619075826 0.2987197745476368


In [156]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred_sub.loc[df_pred_sub['roi']=='ca23dg-body']['mean'], popmean=0.5) 
print(t_statistic , p_value)

1.4358618384797643 0.15901456386478183


## Average all same segment

In [None]:
predictions = []
info = []

for roi_file_name, roi in rois_dict.items():
    print(roi_file_name)
    for subnum in subnums:
        print('---{}---'.format(subnum))

        behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
        behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))

        org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
        behav_df_tmp = cleaning(org_behav_df)
        behav_df = cleaning2(behav_df_tmp)
        
        behav_df = behav_df.merge(postscan_summary.rename(columns={"route": "pair"}), on=['sub', 'pair'], how='left')
        behav_df['ceiling'] = behav_df['mode'] + 1
        behav_df['floor'] = behav_df['mode'] - 1

        fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))

        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
        
        #calculating rolling data
        rolling_df = fmri_df.groupby(['sub','round']).rolling(window = 3, min_periods = 2, center = True, method = 'table').mean()
        rolling_df = rolling_df.drop(columns= ['sub','round']).reset_index().drop(columns= 'level_2')
        
        # calculating no rolling data
        df = behav_df.merge(rolling_df, on=['sub', 'round', 'TR'], how='left')
        
        same_df = df.loc[df.segment == 'same'].drop(columns = ['TR','within_trial_TR'])
        same_df = same_df.groupby(['sub','round','trial','pair','destination','valid']).mean().reset_index()
        
        # no round 2 for sub29
        if subnum == '29':
            same_df = same_df.loc[same_df['round'] != 2]
        
        for pair in same_df.pair.unique():
            curr_df = same_df.loc[same_df.pair == pair]
            curr_destination = curr_df.destination.unique()[0]
            curr_df['cue_destination'] = np.where(curr_df.valid, curr_df.destination == curr_destination, curr_df.destination != curr_destination).astype(int)
            
            print(len(pd.to_numeric(curr_df.columns, errors='coerce').to_series().notnull()))
            X = curr_df[curr_df.columns[pd.to_numeric(curr_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
            y = curr_df.cue_destination.reset_index(drop = True)
            groups = curr_df['round'].reset_index(drop = True)

            scalar = StandardScaler()
            #lr = LogisticRegression(penalty = 'l2', dual = True, solver = 'liblinear', max_iter=1000, C=0.001, random_state = 315)
            svc = LinearSVC(penalty = 'l2', C=0.001, random_state = 315)
            pipeline = Pipeline([('transformer', scalar), ('estimator', svc)])
            
            logo = LeaveOneGroupOut()
            results = cross_val_score(pipeline, X, y, cv = logo, groups = groups, scoring = 'accuracy')
            print(results)
            predictions.append(results)
            info.append([subnum, roi])

In [191]:
df_pred = pd.DataFrame(predictions)

In [192]:
df_pred['mean'] = df_pred.mean(axis=1)

In [194]:
df_info = pd.DataFrame(info, columns = ['sub','roi'])
df_pred['sub'] = df_info['sub']
df_pred['roi'] = df_info['roi']

In [197]:
df_pred_sub = df_pred.groupby(['sub','roi'])['mean'].mean().reset_index()
df_pred_sub.groupby(['roi']).mean()

Unnamed: 0_level_0,mean
roi,Unnamed: 1_level_1
ca1-body,0.496944
ca23dg-body,0.508657
evc,0.515833
ppa,0.500648


In [198]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred_sub.loc[df_pred_sub['roi']=='ca1-body']['mean'], popmean=0.5) 
print(t_statistic , p_value)

-0.37479529448772514 0.709843151050119


## Second by second

In [13]:
predictions = []

for roi_file_name, roi in rois_dict.items():
    print(roi_file_name)
    for subnum in subnums:
        print('---{}---'.format(subnum))

        behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
        behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))

        org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
        behav_df_tmp = cleaning(org_behav_df)
        behav_df = cleaning2(behav_df_tmp)

        fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))

        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
        
        # calculating no rolling data
        df = behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left')
        
        for i in range(6,7):
            sec_df = df.loc[df.within_trial_TR == i].drop(columns = ['segment','TR','within_trial_TR'])
            # no round 2 for sub29
            if subnum == '29':
                sec_df = sec_df.loc[sec_df['round'] != 2]
                
            for pair in sec_df.pair.unique():
                curr_df = sec_df.loc[sec_df.pair == pair]
                curr_destination = curr_df.destination.unique()[0]
                #curr_df['cue_destination'] = np.where(curr_df.destination == curr_destination, 1, 0)
                curr_df['cue_destination'] = np.where(curr_df.valid, curr_df.destination == curr_destination, curr_df.destination != curr_destination).astype(int)
                if i == 7:
                    print(len(pd.to_numeric(curr_df.columns, errors='coerce').to_series().notnull()))
                X = curr_df[curr_df.columns[pd.to_numeric(curr_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
                y = curr_df.cue_destination.reset_index(drop = True)
                groups = curr_df['round'].reset_index(drop = True)

                scalar = StandardScaler()
                lr = LogisticRegression(penalty = 'l2', dual = True, solver = 'liblinear', max_iter=1000, C=0.001, random_state = 315)
                svc = LinearSVC(penalty = 'l2', C=0.001, random_state = 315)
                pipeline = Pipeline([('transformer', scalar), ('estimator', svc)])

                logo = LeaveOneGroupOut()
                results = cross_val_score(pipeline, X, y, cv = logo, groups = groups, scoring = 'accuracy')
                
                info = list(results)
                
                predictions.append([subnum, i, pair] + info)
    break

lpc_2_epi_thre_0.5_masked
---06---
---07---
---08---
---09---
---10---
---11---
---12---
---15---
---16---
---17---
---18---
---19---
---21---
---22---
---25---
---26---
---28---
---29---
---31---
---32---
---33---
---35---
---36---
---37---
---38---
---39---
---40---
---41---
---42---
---43---
---44---
---45---
---46---
---47---
---48---
---49---
---50---
---51---
---52---
---53---


In [14]:
predictions_df = pd.DataFrame(predictions, columns=['subnum', 'within_trial_TR', 'pair', 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

In [15]:
predictions_df['mean'] = predictions_df[predictions_df.columns[pd.to_numeric(predictions_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True).mean(axis=1)

In [16]:
predictions_df

Unnamed: 0,subnum,within_trial_TR,pair,1,2,3,4,5,6,7,8,9,10,mean
0,06,6,pair3_west,0.625,0.375,0.625,0.375,0.750,0.500,0.750,0.750,0.375,0.750,0.5875
1,06,6,pair1_east,0.375,0.875,0.500,0.500,0.875,0.500,0.625,0.375,0.625,0.375,0.5625
2,07,6,pair4_south,0.625,0.625,0.500,0.625,0.250,0.750,0.500,0.500,0.625,0.625,0.5625
3,07,6,pair2_north,0.375,0.500,0.500,0.500,0.500,0.500,0.375,0.375,0.625,0.500,0.4750
4,08,6,pair3_west,0.625,0.375,0.750,0.500,0.375,0.625,0.500,0.125,0.500,0.500,0.4875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,51,6,pair4_south,0.250,0.625,0.500,0.750,0.875,0.500,0.625,0.500,0.375,0.625,0.5625
76,52,6,pair3_west,0.750,0.375,0.500,0.625,0.375,0.625,0.625,0.000,0.875,0.750,0.5500
77,52,6,pair1_east,0.500,0.375,0.500,0.500,0.375,0.500,0.500,0.375,0.625,0.500,0.4750
78,53,6,pair4_south,0.625,0.500,0.625,0.500,0.250,0.125,0.250,0.625,0.750,0.500,0.4750


In [17]:
predictions_df.groupby(['within_trial_TR'])['mean'].mean()


within_trial_TR
6    0.507535
Name: mean, dtype: float64

In [10]:
predictions_df.groupby(['within_trial_TR'])['mean'].mean()



within_trial_TR
1    0.499774
2    0.505434
3    0.493889
4    0.502274
5    0.510451
Name: mean, dtype: float64

In [12]:
all_prediction_df = predictions_df




In [1]:
all_prediction_df

NameError: name 'all_prediction_df' is not defined

In [3]:
(0.499774+0.505434+0.493889+0.502274+0.510451+0.507535)/6

0.5032261666666668

## Decoding between pairs

In [None]:
predictions = []
for subnum in subnums:
    print('---{}---'.format(subnum))
    
    behav_file_dir = opj(behav_dir, 'sub{}'.format(subnum))
    behav_files = glob(opj(behav_file_dir, 'sub*_scan*_timing_*'))
    
    org_behav_df = pd.concat((pd.read_csv(f) for f in behav_files), ignore_index=True)
    behav_df_tmp = cleaning(org_behav_df)
    behav_df = cleaning2(behav_df_tmp)
    
    fmri_file_dir = opj(fMRI_dir, 'sub-MONSTERA{}'.format(subnum))
    for roi_file_name, roi in rois_dict.items():
        print(roi_file_name)
        fmri_files = glob(opj(fmri_file_dir, '{}*'.format(roi_file_name)))
        fmri_files.sort()
        
        fmri_df = pd.concat((pd.read_csv(f) for f in fmri_files), ignore_index=True)
        fmri_df = cleaning3(fmri_df)
        
        # calculating no rolling data
        df = behav_df.merge(fmri_df, on=['sub', 'round', 'TR'], how='left')
        
        same_df = df.loc[df.within_trial_TR == 3].drop(columns = ['TR','within_trial_TR', 'segment'])

        # no round 2 for sub29
        if subnum == '29':
            same_df = same_df.loc[same_df['round'] != 2]

        curr_pair = same_df.pair.unique()[0]
        same_df['cue_pair'] = np.where(same_df.pair == curr_pair, 0, 1)

        print(len(pd.to_numeric(same_df.columns, errors='coerce').to_series().notnull()))
        X = same_df[same_df.columns[pd.to_numeric(same_df.columns, errors='coerce').to_series().notnull()]].reset_index(drop = True)
        y = same_df.cue_pair.reset_index(drop = True)
        groups = same_df['round'].reset_index(drop = True)
        
        scalar = StandardScaler()
        lr = LogisticRegression(penalty = 'l2', max_iter=1000, C=200.0, random_state = 315)
        svc = LinearSVC(penalty = 'l2', C=0.001, random_state = 315)
        pipeline = Pipeline([('transformer', scalar), ('estimator', svc)])

        logo = LeaveOneGroupOut()
        results = cross_val_score(pipeline, X, y, cv = logo, groups = groups, scoring = 'accuracy')
        print(results)
        predictions.append(results)
        break

In [24]:
df_pred = pd.DataFrame(predictions)
df_pred['mean'] = df_pred.mean(axis=1)

In [25]:
df_pred['mean'].mean()

0.5542013888888888

In [26]:
t_statistic, p_value = stats.ttest_1samp(a=df_pred['mean'], popmean=0.5) 
print(t_statistic , p_value)

4.249505230776209 0.0001289575579796593
