# Figure S7: Neural Activity during video on
## First Level Analysis



*Yiyu Wang 2025 May*



In [1]:
import os


import glob
import nibabel as nib
import numpy as np
import pandas as pd
import copy

import nilearn
from nilearn import datasets, plotting, image
from nilearn.image import smooth_img, resample_to_img, load_img, concat_imgs, mean_img, resample_img
from nilearn import plotting
from nilearn.masking import apply_mask
from nilearn.input_data import NiftiMasker
from nilearn.glm.first_level import FirstLevelModel
from nilearn.glm.second_level import SecondLevelModel
from scipy.stats import norm


import gzip

import seaborn as sns
import matplotlib.pyplot as plt

from os.path import join



In [2]:
print(nilearn.__version__)

0.10.2


In [3]:
# local directory set up
#data_dir =base_dir + 'transformed_data_2mm/'

# directory set up for cluster:
# base_dir = '/scratch/wang.yiyu/SocialAbstraction/'
data_dir = '/Users/yiyuwang/Downloads/social_prediction_transformed_data_2mm/'

logfiles_dir = 'Data/logfiles/'
confounds_dir = 'Data/confounds/'
mask_dir = 'masks/'
figures_dir = 'figures/'

subjects_list = pd.read_csv('Data/included_SocialPred_subjects.csv', header=None)
subjects_list = subjects_list[0].values.tolist()
sample_n = len(subjects_list)
print("subjects in this analysis:")
print(subjects_list)
print(f"**** n = {sample_n} *****" )

vmax = 12
TR = .001
N_TR = 675
TR_Length = 0.8
TR_IN_MS = int(TR_Length/TR)


# resample a gray matter mask
gm_mask_img = nib.load(mask_dir + 'gm_mask_icbm152_brain.nii.gz')
confounds_of_interest = ['csf',
                        'white_matter',
                        'trans_x', 
                        'trans_y', 
                        'trans_z',
                        'rot_x',
                        'rot_y',
                        'rot_z','framewise_displacement']

subjects in this analysis:
[152, 179, 154, 158, 173, 153, 159, 174, 162, 145, 143, 181, 144, 169, 146, 167, 161, 182, 147, 166, 160, 185, 170, 176, 151, 157, 171, 177, 150, 156]
**** n = 30 *****


In [4]:
logfile_headers =np.array(['obs_video_name', 'fd_video_name','video_number','trial_condition','run_number','run_condition',
             'obs_video_onset','obs_video_offset','obs_video_duration_method1','obs_video_duration_method2',
             'prediction','prediction_x','prediction_y','prediction_RT','prediction_onset', 
             'fb_video_onset','fb_video_offset','fb_video_duration_method1','fb_video_duration_method2',
             'surprise','surprise_RT','surprise_onset'])

In [5]:
def AddSteadyStateOutliers(columns_of_interest, all_columns):
    new_columns = copy.deepcopy(columns_of_interest)
    for column in all_columns:
        if 'outlier' in column:
            new_columns.append(column) 
    return new_columns



def CreateConfoundMatrix(confound_file_path, 
                         confounds_of_interest, s, run):
    
    
    confounds = pd.read_csv(confound_file_path, sep='\t')
    confounds_of_interest = AddSteadyStateOutliers(confounds_of_interest, confounds.columns)
    
    cov = confounds[confounds_of_interest]
    cov.values[np.isnan(cov.values)]=0
    return cov


video_key = pd.read_csv('/Users/yiyuwang/Dropbox/Projects/NEU_projects/SocialPrediction/Results/SocialPrediction_video_key.csv')


def get_subjective_prior(prediction, vn, video_key = video_key):
    social_prior = video_key[video_key.vid_num == vn]['Social_correct'].values[0]
    pattern_prior = video_key[video_key.vid_num == vn]['Pattern_correct'].values[0]
    if pattern_prior == 7:
        if prediction == 4 or prediction == 2:
            return 1, 'Pattern'
        elif prediction == social_prior:
            return 2, 'Social'
        else:
            return 0, 'Neither'
    elif pattern_prior == 8:
        if prediction == 3 or prediction == 2:
            return 1, 'Pattern'
        elif prediction == social_prior:
            return 2, 'Social'
        else:
            return 0, 'Neither'
    elif pattern_prior == 6:
        if prediction == 4 or prediction == 1:
            return 1, 'Pattern'
        elif prediction == social_prior:
            return 2, 'Social'
        else:
            return 0, 'Neither'
    else: 
        if prediction == social_prior:
            return 2, 'Social'
        elif prediction == pattern_prior:
            return 1, 'Pattern'
        else:
            return 0, 'Neither'





In [6]:
def get_condition(cn):
    if cn == 1:
        cat = 'Pattern'
    elif cn == 2:
        cat = 'Social'
    else:
        print('no such Condition number!')
    return cat 

def parse_task_lines(lines, headers):
    for (i, line) in enumerate(lines):
        cols = line.split(' ')


        video_onset = float(cols[int(np.where(headers == 'obs_video_onset')[0])])
        video_offset = float(cols[int(np.where(headers == 'obs_video_offset')[0])])  
        video_duration = video_offset - video_onset
        # print("video onset: ", video_onset)
        # print("video offset: ", video_offset)
        
        fb_video_onset = float(cols[int(np.where(headers == 'fb_video_onset')[0])])
        fb_video_offset = float(cols[int(np.where(headers == 'fb_video_offset')[0])])  
        fb_video_duration = fb_video_offset - fb_video_onset
        # print("fb video onset: ", fb_video_onset)
        # print("fb video offset: ", fb_video_offset)

        run = int(cols[int(np.where(headers == 'run_number')[0])])

        prediction_onset = float(cols[int(np.where(headers == 'prediction_onset')[0])])
        surprise_onset = float(cols[int(np.where(headers == 'surprise_onset')[0])])
        
        # identify the rest period in between videos and ratings
        rest_onset1 = prediction_onset + 4
        rest_duration1 = fb_video_onset - rest_onset1

        

        rest_onset2 = surprise_onset + 4
        # if i reach the 20th or the 40th line, we assume the next line onset is the end of the run
        if i == 19 or i == 39:
            # this is the last trial of the run, we assume the next line onset is the end of the run
            next_line_onset = N_TR * TR_Length
        else:
            next_line_onset = float(lines[i+1].split(' ')[int(np.where(headers == 'obs_video_onset')[0])])
        rest_duration2 = next_line_onset - rest_onset2
        print("trial number: ", i+1)
        print("Interstimuli rest duration 1: ", rest_duration1)
        print("Intertrial rest duration 2: ", rest_duration2)
        
        yield [video_onset, video_duration, 'cue_VideoOn', run]
        yield [fb_video_onset,fb_video_duration, 'fb_VideoOn', run]
        yield [rest_onset1, rest_duration1, 'rest', run]
        yield [rest_onset2, rest_duration2, 'rest', run]
        yield [surprise_onset, 1, 'surprise_onset', run]
        yield [prediction_onset, 1, 'pred_onset', run]



def create_events_dataframe(task_csv, run):   
    task_lines =[]       
    # df = pd.DataFrame(columns=['onset','duration','trial_type'])
    with open(task_csv, 'r') as task_csv_file:
        task_lines.append(list(parse_task_lines(task_csv_file.readlines()[0:], logfile_headers)))

    df = pd.DataFrame(task_lines[0], columns=['onset','duration','trial_type','run'])
    df= df[df['run']==run].drop(columns=['run'])
    return df

In [7]:
from nilearn.glm.first_level import make_first_level_design_matrix
from nilearn.glm.first_level import compute_regressor
model_name = 'model_VideoOn'
res_dir = f'Results/{model_name}/1stLvl'

create_design_matrix = True
if create_design_matrix: 
    design_matrices = []
    sub_run_list = []
    for s in subjects_list:
        print(f'running subject {s}')
        sub_output_dir = res_dir + f'/{s}/'
        if not os.path.isdir(sub_output_dir):
            os.makedirs(sub_output_dir)
            
        task_file = glob.glob(logfiles_dir + f"/*{s}*edited.txt")
        task_csv = task_file[0]
        
        for run in [1,2]:
            sub_run_list.append(f'sub-{s}_run-{run}')
            events = create_events_dataframe(task_csv, run)

            #get confounds info:
            confounds_str = f'sub-{s}_task-socialpred_run-{run}_desc-confounds_timeseries.tsv'
            cov = CreateConfoundMatrix(confounds_dir + confounds_str, confounds_of_interest, s, run)
            
            fmri_glm = FirstLevelModel(t_r=TR_Length,
                        noise_model='ar3',
                        standardize=True,
                        hrf_model='spm',
                        drift_model='cosine',
                        high_pass=.012, mask_img=gm_mask_img,smoothing_fwhm=6)
            
            func_str = f'sub-{s}_socialpred_run{run}.nii.gz'
            func_path = data_dir + func_str
            fmri_img = nib.load(func_path)
            fmri_glm = fmri_glm.fit(fmri_img, events, confounds=cov)
            
            
            # save design_matrix for every run
            design_matrix = fmri_glm.design_matrices_[0]
            design_matrices.append(design_matrix)
            print(design_matrix.columns)
            plotting.plot_design_matrix(design_matrix, output_file=join(sub_output_dir, f'design_matrix_run{run}.png'))
            design_matrix.to_csv(join(sub_output_dir, f'sub-{s}_run-{run}_design_matrix.csv'))
            
        # save design_matrix
        design_matrix = fmri_glm.design_matrices_[0]
            

    # save design_matrices
    import pickle
    with open(res_dir + 'design_matrices.pkl', 'wb') as f:
        pickle.dump(design_matrices, f)

else: 
    design_matrices = []
    for s in subjects_list:
        sub_output_dir = res_dir + f'/{s}/'
        for run in [1,2]:
            design_matrices.append(pd.read_csv(join(sub_output_dir, f'sub-{s}_run-{run}_design_matrix.csv'), index_col=0))

                

running subject 152
trial number:  1
Interstimuli rest duration 1:  2.0150000000000006
Intertrial rest duration 2:  1.0330000000000013
trial number:  2
Interstimuli rest duration 1:  1.0150000000000006
Intertrial rest duration 2:  3.0390000000000015
trial number:  3
Interstimuli rest duration 1:  1.0130000000000052
Intertrial rest duration 2:  2.037000000000006
trial number:  4
Interstimuli rest duration 1:  1.0160000000000053
Intertrial rest duration 2:  1.051000000000002
trial number:  5
Interstimuli rest duration 1:  2.016999999999996
Intertrial rest duration 2:  3.046999999999997
trial number:  6
Interstimuli rest duration 1:  1.0200000000000102
Intertrial rest duration 2:  2.0319999999999823
trial number:  7
Interstimuli rest duration 1:  2.0459999999999923
Intertrial rest duration 2:  1.0439999999999827
trial number:  8
Interstimuli rest duration 1:  2.007000000000005
Intertrial rest duration 2:  3.032999999999987
trial number:  9
Interstimuli rest duration 1:  2.009999999999991


In [8]:


dm_dir = f'Results/{model_name}/1stLvl'

design_matrices = []
for s in subjects_list:
    sub_output_dir = dm_dir + f'/{s}/'
    for run in [1,2]:
        design_matrices.append(pd.read_csv(join(sub_output_dir, f'sub-{s}_run-{run}_design_matrix.csv'), index_col=0))

            
design_index = 0

res_dir = f'Results/{model_name}/1stLvl'
if not os.path.isdir(res_dir):
    os.makedirs(res_dir)
column_names = ['video_on']

for s in subjects_list: 
    
    sub_output_dir = res_dir + f'/{s}/'
    print(f'running subject {s}')
    if not os.path.isdir(sub_output_dir):
        os.makedirs(sub_output_dir) 

   
    run1 = design_matrices[design_index]
    run2 = design_matrices[design_index + 1]

    # add a new column with 1 for run1 and 0 for run2
    run1['run_regressor'] = 1

    

    # concatenate design matrices, fill in zeros for the missing columns
    # Find unique columns
    run1_unique_cols = set(run1.columns) - set(run2.columns)
    run2_unique_cols = set(run2.columns) - set(run1.columns)

    # remove column names in run1_unique_cols if there are in column_names:
    run1_unique_cols = run1_unique_cols - set(column_names)
    run2_unique_cols = run2_unique_cols - set(column_names)

    # Rename unique columns
    run1_renamed = run1.rename(columns={col: f'run1_{col}' for col in run1_unique_cols})
    run2_renamed = run2.rename(columns={col: f'run2_{col}' for col in run2_unique_cols})

    # Concatenate and fill missing values with 0
    concatenated_design = pd.concat([run1_renamed, run2_renamed], ignore_index=True).fillna(0)

    # Concatenate fmri img
    func_path = data_dir + f'sub-{s}_socialpred_run1.nii.gz'
    run1_img = nib.load(func_path)

    func_path = data_dir + f'sub-{s}_socialpred_run2.nii.gz'
    run2_img = nib.load(func_path)
    
    concatenated_imgs = concat_imgs([run1_img, run2_img])

    concat_glm = FirstLevelModel(t_r=TR_Length,
                    noise_model='ar1',
                    standardize=True,
                    hrf_model=None,
                    drift_model='cosine',
                    high_pass=.01, mask_img=gm_mask_img,smoothing_fwhm=6)
    
    concat_glm.fit(concatenated_imgs, design_matrices=concatenated_design)

    plotting.plot_design_matrix(concat_glm.design_matrices_[0], output_file=join(sub_output_dir, f'design_matrix_concatenated.png'))
    concatenated_design.to_csv(join(sub_output_dir, f'sub-{s}_design_matrix_concatenated.csv'))
    
    # compute contrast
    contrast_matrix = np.eye(concatenated_design.shape[1])
    
    # extract the betas
    for i in range(5):
        print(f'saving regressor for video {concatenated_design.columns[i]}')
        eff = concat_glm.compute_contrast(contrast_matrix[i],output_type='effect_size') #parameter estimate
        nii_file_path = sub_output_dir + f'sub-{s}_beta_video-{concatenated_design.columns[i]}_gm_masked.nii.gz'
        nib.save(eff, nii_file_path)

        eff = concat_glm.compute_contrast(contrast_matrix[i],output_type='z_score')
        nii_file_path = sub_output_dir + f'sub-{s}_z_score_video-{concatenated_design.columns[i]}_gm_masked.nii.gz'
        nib.save(eff, nii_file_path)

    
    design_index = design_index + 2
    




running subject 152
saving regressor for video cue_VideoOn
saving regressor for video fb_VideoOn
saving regressor for video pred_onset
saving regressor for video rest
saving regressor for video surprise_onset
running subject 179
saving regressor for video cue_VideoOn
saving regressor for video fb_VideoOn
saving regressor for video pred_onset
saving regressor for video rest
saving regressor for video surprise_onset
running subject 154
saving regressor for video cue_VideoOn
saving regressor for video fb_VideoOn
saving regressor for video pred_onset
saving regressor for video rest
saving regressor for video surprise_onset
running subject 158
saving regressor for video cue_VideoOn
saving regressor for video fb_VideoOn
saving regressor for video pred_onset
saving regressor for video rest
saving regressor for video surprise_onset
running subject 173
saving regressor for video cue_VideoOn
saving regressor for video fb_VideoOn
saving regressor for video pred_onset
saving regressor for video re