# Data review

In [2]:
import pandas as pd
import numpy as np

First, checking whether the numbers of codes of each type match between the EEG file and the psychopy CSV file

In [3]:
psychopy_file = 'data_files/Cond1Full_Mattsen_Study3_2021_Jun_25_1057.csv'
eeg_file = 'data_files/Condition1Full.vmrk'

csv = pd.read_csv(psychopy_file)

eeg = pd.read_csv(eeg_file,
                  skiprows=12,
                  header=None,
                  names=['type', 'evtcode', 'time', 'tmp1', 'tmp2'],
                  usecols=['evtcode', 'time'])

Clean up some types and formatting in the eeg file

In [4]:
eeg.evtcode = eeg.evtcode.str.split(expand=True).iloc[:, 1]
eeg.evtcode = eeg.evtcode.astype('int64')

Now combine the files into one dataframe

In [5]:
pd.concat([csv.groupby(['code']).sound.count(), eeg.groupby(['evtcode']).evtcode.count()], axis=1)

Unnamed: 0,sound,evtcode
1,1167,1167
2,135,135
3,1167,1167
4,135,135
11,1167,1167
12,135,135
13,1167,1167
14,135,135
98,540,540
99,12,11


## Now we want to look at actual sounds used

In [6]:
group1 = pd.read_csv('data_files/Group1Final_Mattsen_Study3_2021_Jul_27_1352.csv')
group2 = pd.read_csv('data_files/Group2Final_Mattsen_Study3_2021_Jul_27_1446.csv')

# Group 1 sound for each code

In [9]:
codes = [1, 2, 3, 4, 11, 12, 13, 14]
names = [np.sort(pd.unique(group1[group1.code == x].name)) for x in codes]
pd.concat([pd.DataFrame({'Code_{}'.format(code): name_list}) for code, name_list in zip(codes, names)], axis=1)

Unnamed: 0,Code_1,Code_2,Code_3,Code_4,Code_11,Code_12,Code_13,Code_14
0,Central_50ms,Central_100ms,Central_50ms,Central_100ms,Central_50ms,Central_100ms,Central_50ms,Central_100ms
1,left_22.5deg_50ms,left_22.5deg_100ms,left_22.5deg_50ms,left_22.5deg_100ms,left_22.5deg_50ms,left_22.5deg_100ms,left_22.5deg_50ms,left_22.5deg_100ms
2,left_45deg_50ms,left_45deg_100ms,right_22.5deg_50ms,right_22.5deg_100ms,left_45deg_50ms,left_45deg_100ms,right_22.5deg_50ms,right_22.5deg_100ms
3,left_67deg_50ms,left_67deg_100ms,,,left_67deg_50ms,left_67deg_100ms,,
4,left_90deg_50ms,left_90deg_100ms,,,left_90deg_50ms,left_90deg_100ms,,
5,right_22.5deg_50ms,right_22.5deg_100ms,,,right_22.5deg_50ms,right_22.5deg_100ms,,
6,right_45deg_50ms,right_45deg_100ms,,,right_45deg_50ms,right_45deg_100ms,,
7,right_67deg_50ms,right_67deg_100ms,,,right_67deg_50ms,right_67deg_100ms,,
8,right_90deg_50ms,right_90deg_100ms,,,right_90deg_50ms,right_90deg_100ms,,


# Group 2 sound for each code

In [10]:
codes = [5, 6, 7, 8, 25, 26, 27, 28]
names = [np.sort(pd.unique(group2[group2.code == x].name)) for x in codes]
pd.concat([pd.DataFrame({'Code_{}'.format(code): name_list}) for code, name_list in zip(codes, names)], axis=1)

Unnamed: 0,Code_5,Code_6,Code_7,Code_8,Code_25,Code_26,Code_27,Code_28
0,Central_50ms,Central_100ms,Central_50ms,Central_100ms,Central_50ms,Central_100ms,Central_50ms,Central_100ms
1,left_22.5deg_50ms,left_22.5deg_100ms,left_22.5deg_50ms,left_22.5deg_100ms,left_22.5deg_50ms,left_22.5deg_100ms,left_22.5deg_50ms,left_22.5deg_100ms
2,right_22.5deg_50ms,right_22.5deg_100ms,left_45deg_50ms,left_45deg_100ms,right_22.5deg_50ms,right_22.5deg_100ms,left_45deg_50ms,left_45deg_100ms
3,,,left_67deg_50ms,left_67deg_100ms,,,left_67deg_50ms,left_67deg_100ms
4,,,left_90deg_50ms,left_90deg_100ms,,,left_90deg_50ms,left_90deg_100ms
5,,,right_22.5deg_50ms,right_22.5deg_100ms,,,right_22.5deg_50ms,right_22.5deg_100ms
6,,,right_45deg_50ms,right_45deg_100ms,,,right_45deg_50ms,right_45deg_100ms
7,,,right_67deg_50ms,right_67deg_100ms,,,right_67deg_50ms,right_67deg_100ms
8,,,right_90deg_50ms,right_90deg_100ms,,,right_90deg_50ms,right_90deg_100ms


In [11]:
np.unique(group2.code)

array([ 5,  6,  7,  8, 25, 26, 27, 28, 98, 99])

In [12]:
sound_set = [
    'Central_100ms.wav', 'Central_50ms.wav', 'left_22.5deg_100ms.wav',
    'left_22.5deg_50ms.wav', 'left_45deg_100ms.wav', 'left_45deg_50ms.wav',
    'left_67deg_100ms.wav', 'left_67deg_50ms.wav', 'left_90deg_100ms.wav',
    'left_90deg_50ms.wav', 'right_22.5deg_100ms.wav', 'right_22.5deg_50ms.wav',
    'right_45deg_100ms.wav', 'right_45deg_50ms.wav', 'right_67deg_100ms.wav',
    'right_67deg_50ms.wav', 'right_90deg_100ms.wav', 'right_90deg_50ms.wav'
]
sound_files = ['stimuli/' + s for s in sound_set]
classes = ['std' if '50ms' in s else 'dev' for s in sound_set]

counts = {
    'hvc': [145 if c == 'std' else 15 for c in classes],
    'lvc': [261 if c == 'std' else 27 for c in classes],
    'vlvc': [435 if c == 'std' else 45 for c in classes],
}
for idx, snd in enumerate(sound_set):
    if '67deg' in snd or '90deg' in snd:
        counts['lvc'][idx] = 0
        counts['vlvc'][idx] = 0
    if '45deg' in snd:
        counts['vlvc'][idx] = 0


In [13]:
sum(counts['vlvc'])

1440

In [14]:
15/(145+15)

0.09375

In [15]:
27/(261+27)

0.09375

In [16]:
45/(435+45)


0.09375