In [7]:
import pickle as pk
import pandas as pd
import numpy as np
import glob
import argparse
import sys
from sentence_transformers import SentenceTransformer
import torch
from transformers import BertTokenizer, BertModel
from gensim.models import KeyedVectors
import os.path
from transformers import AutoModel, AutoTokenizer 
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request
from transformers import RobertaModel, RobertaTokenizer
from transformers import AutoTokenizer, AutoModelForMaskedLM
pd.options.mode.chained_assignment = None
from ast import literal_eval
import nibabel as nib
import scipy.io
import nibabel as nib

## Creating .mat files from .csv files

### Assertion Test checks if participant name is mapped to the right file

In [8]:
csv_dir = '/home/varshini/projects/def-afyshe-ab/varshini/glucks/data/stim_order_160/'
csvs = sorted(glob.glob(csv_dir+'*.csv'))
    
control_p = ['P054','P057','P064','P065','P067','P068','P072','P073','P075','P076','P080','P081']
ASD_p = ['P050','P055','P056','P058','P059','P060','P066','P069','P070','P071','P078','P079']
all_participants = ASD_p + control_p

csv_dict = {}
for csv in csvs:
    
    participant_name = csv[-8:-4]
    
    if participant_name in all_participants:
        csv_dict[participant_name] = csv

#assertion test
assert(csv_dict['P080'][-8:-4] == 'P080')

### Assertion Test to check if the stimuli are sorted properly for all participants

In [9]:
#checking the value of the 42nd stimulus for a specific participant
part = 'P066'
df = pd.read_csv(csv_dict[part])
df = df[['stim','SentType','StimOnset','run','Nifti','nifti_list','category','exemplar']]

#sorting stimuli alphabetically
df = df.sort_values(by='stim',ignore_index = True) 
names = np.array(df['stim'].values.tolist())
print(part, names[0])
print(part, names[42])


P066 Some birds are eagles
P066 Some flowers are tulips


In [28]:
#create mat files
for participant in all_participants:
    df = pd.read_csv(csv_dict[participant])
    df = df[['stim','SentType','StimOnset','run','Nifti','nifti_list','category','exemplar']]
    df = df.sort_values(by='stim',ignore_index = True) 

    names = np.array(df['stim'].values.tolist())
    onsets = np.array(df['StimOnset'].values.tolist())
    durations = np.full(names.shape, 0, dtype=float)

    #assertion test for all participants
    assert(names[0] == 'Some birds are eagles')
    assert(names[42] == 'Some flowers are tulips')
    
    
    mat_write_dir = '/home/varshini/scratch/data/data_glucksberg/mat_files_alphabetic/'
    scipy.io.savemat(mat_write_dir + participant + '.mat', mdict={'durations': durations, 'names':names, 'onsets':onsets})

### Checking the correctness of onset values from mat files

In [26]:
#assertion test to check onset values for participant P081
df_81 = pd.read_csv(csv_dict['P081'])
df_81 = df_81.sort_values(by='stim',ignore_index = True)
onsets_81 = np.array(df_81['StimOnset'].values.tolist())
np.testing.assert_almost_equal(onsets_81[:4],[262.186,20.677,48.679,39.184], decimal = 3)

#assertion test to check onset values for participant P055
df_55 = pd.read_csv(csv_dict['P055'])
df_55 = df_55.sort_values(by='stim',ignore_index = True)
onsets_55 = np.array(df_55['StimOnset'].values.tolist())
np.testing.assert_almost_equal(onsets_55[18:22],[621.937,410.5,419.011,287.533], decimal = 3)

#assertion test to check onset values for participant P072
df_72 = pd.read_csv(csv_dict['P072'])
df_72 = df_72.sort_values(by='stim',ignore_index = True)
onsets_72 = np.array(df_72['StimOnset'].values.tolist())
np.testing.assert_almost_equal(onsets_72[100:104],[622.372,721.382,708.382,596.872], decimal = 3)

#assertion test to check onset values for participant P060
df_60 = pd.read_csv(csv_dict['P060'])
df_60 = df_60.sort_values(by='stim',ignore_index = True)
onsets_60 = np.array(df_60['StimOnset'].values.tolist())
np.testing.assert_almost_equal(onsets_60[73:77],[716.876,139.896,663.875,784.88], decimal = 3)

### Checking the Beta Headers for two participants

#### P050

In [24]:
participant = 'P050'
beta_dir = '/home/varshini/scratch/data/data_glucksberg/betas_alphabetical_ordered/'
beta_path = beta_dir + participant + '/1st_Level_ROI_alph/'

#index starts from 1 for SPM outputs. 0-->1 and 42-->43

beta_file_1 = beta_path +'beta_0001.nii'
beta1_img = nib.load(beta_file_1)
assert('Some birds are eagles' in str(beta1_img.header['descrip']))
assert('0001' in str(beta1_img.header['descrip']))

beta_file_43 = beta_path +'beta_0043.nii'
beta43_img = nib.load(beta_file_43)
assert('Some flowers are tulips' in str(beta43_img.header['descrip']))
assert('0043' in str(beta43_img.header['descrip']))

#### P055

In [25]:
participant = 'P055'
beta_dir = '/home/varshini/scratch/data/data_glucksberg/betas_alphabetical_ordered/'
beta_path = beta_dir + participant + '/1st_Level_ROI_alph/'

#index starts from 1 for SPM outputs. 0-->1 and 42-->43

beta_file_1 = beta_path +'beta_0001.nii'
beta1_img = nib.load(beta_file_1)
assert('Some birds are eagles' in str(beta1_img.header['descrip']))
assert('0001' in str(beta1_img.header['descrip']))

beta_file_43 = beta_path +'beta_0043.nii'
beta43_img = nib.load(beta_file_43)
assert('Some flowers are tulips' in str(beta43_img.header['descrip']))
assert('0043' in str(beta43_img.header['descrip']))