In [4]:
import warnings
import sys 
if not sys.warnoptions:
    warnings.filterwarnings("ignore", category=DeprecationWarning)
import os 
import random
import numpy as np
import time
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns 
from itertools import combinations
import time

import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
from rpy2.robjects import Formula

from utils import *


In [21]:
eyetracking_dir = './data/pupil/'

def get_pupil_subs():
    eyetracking_subs = []
    for s in os.listdir(eyetracking_dir):
        if s[0] != ".":
            eyetracking_subs.append(int(s[1:]))

    learn_subs = []
    prepost_subs = []
    
    for s in eyetracking_subs:
        datapath_learn = f'pupil_preproc_learn_s{subnum}.csv'
        datapath_prepost = f'pupil_preproc_prepost_s{subnum}.csv'
        
        subfile_learn = f'{eyetracking_dir}/s{s}/{datapath_learn}'
        if os.path.isfile(subfile_learn):
            learn_subs.append(s)
            
        subfile_prepost = f'{eyetracking_dir}/s{s}/{datapath_prepost}'
        if os.path.isfile(subfile_prepost):
            prepost_subs.append(s)
    
    return sorted(learn_subs), sorted(prepost_subs)



# gets a dataframe containing whether each trial (0-indexed) in a specific run was encoded (subject response provided)
def get_encoded_df(subid, run):
    behav_file = f'labels/catStats_sub{subid}.csv'
    behav_data = pd.read_csv(behav_file, sep='\t')
    behav_data_run = behav_data[behav_data.block == run].reset_index(drop=True)[["blocktrial", "block", "encoded"]]
    behav_data_run = behav_data_run.rename(columns={"blocktrial": "trial"})
    return behav_data_run


# load pupil data from a specific subject
def load_sub_pupil(subnum, with_encoded=False):
    subdata = []

    datapath_learn = f'pupil_preproc_learn_s{subnum}.csv'
    datapath_prepost = f'pupil_preproc_prepost_s{subnum}.csv'
    
    subfile_learn = f'{eyetracking_dir}/s{subnum}/{datapath_learn}'
    if os.path.isfile(subfile_learn):
        subdata.append(pd.read_csv(subfile_learn).drop(columns=["Unnamed: 0"]))
    
    subfile_prepost = f'{eyetracking_dir}/s{subnum}/{datapath_prepost}'
    if os.path.isfile(subfile_prepost):
        subdata.append(pd.read_csv(subfile_prepost).drop(columns=["Unnamed: 0"]))
    
    subdata = pd.concat(subdata)
    
    if with_encoded:
        encoded_dfs = []
        for run in [1, 2, 3, 4, 5]:
            encoded_df = get_encoded_df(subnum, run)
            encoded_df["trial"] = encoded_df["trial"].apply(lambda x: x+1) # convert 0-indexed trials to 1-indexed, as used in pupil
            encoded_dfs.append(encoded_df)
        encoded_dfs = pd.concat(encoded_dfs)

        subdata = pd.merge(subdata, encoded_dfs, on=["trial", "block"], how="left")

    return subdata


# read the proportion of NA timepoints from the raw data, as generated from preprocessing pipeline
def get_propna(subnum):
    propna_file = f'{eyetracking_dir}/s{subnum}/raw_propna_learn_s{subnum}.txt'
    propna_prepost_file = f'{eyetracking_dir}/s{subnum}/raw_propna_prepost_s{subnum}.txt'
    
    with open(propna_file, 'r') as file:
        content = file.read()
        propna = [float(s) for s in content.split(' ')]
        
    with open(propna_prepost_file, 'r') as file:
        content = file.read()
        propna_prepost = [float(s) for s in content.split(' ')]
        
    return [propna_prepost[0], np.mean(propna)]



In [22]:
learn_subs, prepost_subs = get_pupil_subs()

NameError: name 'datapath_learn' is not defined