In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
from glob import glob 
from os.path import join as opj
import os

In [2]:
#from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import LeaveOneGroupOut 
from sklearn.metrics import confusion_matrix

In [3]:
def cleaning(df):
    '''
    clearning up files for conditions
    '''
    
    df['n_pic'] = df['npic'].str.split('_', expand=True)[[0]]
    df['TR'] = df['onset'].apply(np.floor).astype('int')
    
    tmp = df['condition'].str.split('/', expand=True)
    
    df['pair'] = tmp[[0]].squeeze().str.extract('(\w+)')
    
    tmp1 = tmp[[1]].squeeze().str.split(',', expand=True)
    df['destination'] = tmp1[[0]].squeeze().str.extract('(\w+)')
    df['valid'] = pd.to_numeric(tmp1[[1]].squeeze(), errors='coerce').apply(lambda x: {0: True, 1: False}.get(x, None))
    df['catch'] = tmp1[[3]].squeeze().notnull()
    
    def segment(x):
        if x <= 25:
            return 'same'
        elif x <= 75:
            return 'similar'
        elif x <= 100:
            return 'different'
        else:
            return None

    df['n_int'] = pd.to_numeric(df['npic'], errors='coerce')
    df['segment'] = df['n_int'].apply(segment)
    
    return df

def cleaning2(df):
    '''
    remove duplicated lines for multiple pictures
    only save one line per second
    '''
    
    #df = df.loc[df['catch'] == False]
    df = df.loc[df['segment'].notnull()]
    df = df.drop(columns=['onset', 'design_onset', 'design_end', 'n_pic', 'npic', 'condition', 'n_int', 'catch'])
    
    df = df.drop_duplicates()
    df['within_trial_TR'] = df.groupby(['sub','round','trial'])['TR'].rank(method = 'dense').astype('int')
    #df['odd_even'] = df['round'].apply(lambda x: 'even' if x%2 == 0 else 'odd')
    
    df['round'] = df['round'].astype('int')
    df['trial'] = df['trial'].astype('int')

    return df

def cleaning3(fmri_df):
    '''
    quick cleaning fMRI dataframe
    '''
    fmri_df.rename(columns={'Unnamed: 0':'TR'}, inplace=True)
    fmri_df['round'] = fmri_df['run'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df['sub'] = fmri_df['sub'].squeeze().str.extract('(\d+)').astype('int')
    fmri_df = fmri_df.drop(columns=['run', 'roi'])
    return fmri_df



In [4]:
rois_dict = {
    'ofc_orbital_2_epi_thre_0.5_masked':'ofc',
    'rsc_cingul-Post-dorsal_2_epi_thre_0.5_masked': 'rsc', 
    'ppa_mni_2_epi_thre_0.5_masked':'ppa'
    
}

behav_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/behavior/"
fMRI_dir = "/home/wanjiag/projects/MONSTERA/derivatives/csv_files/fMRI/"
all_subs = os.listdir(fMRI_dir)

bads = ['01', '02', '03', '04', '05', '13', '14', '20', '23', '24', '27', '30', '34']
for bad in bads:
    all_subs = [x for x in all_subs if bad not in x ]

all_subs.sort()
subnums = [x[-2:] for x in all_subs]

In [18]:
np.mean(np.hstack(m))

0.4928467000835422