## dataset generation

In [None]:
# split event trial EEGs

In [7]:
import mne
import numpy as np
from tqdm import tqdm

In [8]:
#use mne to import

raw_eeg_p01 = "/home/archeron/dev/data/OpenMIIR-RawEEG_v1/P01-raw.fif"
subject = 'P01'
raw_fif = mne.io.Raw(raw_eeg_p01, preload=True, verbose=True)

Opening raw data file /home/archeron/dev/data/OpenMIIR-RawEEG_v1/P01-raw.fif...
Isotrak not found
    Read a total of 1 projection items:
        Average EEG reference (1 x 64)  idle
    Range : 0 ... 2478165 =      0.000 ...  4840.166 secs
Ready.
Reading 0 ... 2478165  =      0.000 ...  4840.166 secs...


In [9]:
# get numpy array
raw_arr = raw_fif.get_data()

In [10]:
# get events object with stim markers
event_trials = mne.find_events(raw_fif, stim_channel='STI 014', shortest_event=0)

540 events found
Event IDs: [  11   12   13   14   21   22   23   24   31   32   33   34   41   42
   43   44  111  112  113  114  121  122  123  124  131  132  133  134
  141  142  143  144  211  212  213  214  221  222  223  224  231  232
  233  234  241  242  243  244 1000 1111 2001]


In [12]:
def decode_event_id(event_id):
    if event_id < 1000:
#         stimulus_id = event_id / 10
# changing to int(stimulus_id) to fix key error
# eg. event_id = 121 => stimulus_id = 12.1
        stimulus_id = int(event_id / 10)
        condition = event_id % 10
        return stimulus_id, condition
    else:
        return event_id

In [13]:
STIMULUS_IDS = [1, 2, 3, 4, 11, 12, 13, 14, 21, 22, 23, 24]

dataset = []
stimlist = []

for i, event in tqdm(enumerate(event_trials)):
    etype = event[2]
    etime = event[0]

    if etype >= 1000: # stimulus_id + condition
        continue

    stimulus_id, condition = decode_event_id(etype)

    if stimulus_id in STIMULUS_IDS:

# choice here is to not take the eeg data corresponding to 
# the cue beats.
# If cue beat information is to be taken, at line 26 below,
# trial_start = next_event[0] should be => trial_start = etime

# can be used to generate more data where the eeg signal has the 
# cue beat info in addition to this
        
        if condition < 3:
            try:
                next_event = event_trials[i+1]
                next_next_event = event_trials[i+2]
                if next_event[2] == 1000: # only use if audio onset
                    trial_start = next_event[0]
                    trial_end = next_next_event[0] - 1
            # to handle the last entry from throwing up an index error
            # beacause of event_trials[i+2]
            # there is definitely a better solution than this
            except IndexError:
                break
        else:
            next_event = event_trials[i+1]
            trial_start = etime
            trial_end = next_event[0] - 1
        
        temp_arr = raw_arr[:, trial_start - 1:trial_end]
        dataset.append(temp_arr)
        
        # eg '121_520_8140_P01'
        array_id = '_'.join([str(etype), str(trial_start), str(trial_end), subject])
        stimlist.append(array_id)

        print(event)
        # only for visual confirmation of algo/process
        print(temp_arr[:2,:5])
        print('=========================================')

540it [00:00, 4076.38it/s]

[512   0 121]
[[-0.00222756 -0.00222925 -0.00223471 -0.00223609 -0.00223771]
 [ 0.00100819  0.00100865  0.00100244  0.00099759  0.00099365]]
[8141    0  122]
[[-0.00230046 -0.00229584 -0.0022904  -0.00228718 -0.00228743]
 [ 0.00091015  0.00091353  0.00091831  0.00092222  0.00092922]]
[16517     0   123]
[[-0.00235656 -0.00234906 -0.00234006 -0.00232678 -0.00231925]
 [ 0.00087194  0.00088115  0.00088878  0.00090075  0.00091097]]
[25446     0    31]
[[-0.00240384 -0.00240465 -0.00240081 -0.00239121 -0.00238515]
 [ 0.00077209  0.0007765   0.00078653  0.00079631  0.0007974 ]]
[34388     0    32]
[[-0.00237428 -0.00237384 -0.00236318 -0.00235325 -0.00234731]
 [ 0.00082134  0.00082122  0.00083062  0.0008404   0.00084675]]
[43873     0    33]
[[-0.00235446 -0.00236981 -0.00237987 -0.00238453 -0.0023785 ]
 [ 0.00087762  0.00085075  0.0008364   0.00083378  0.0008415 ]]
[53915     0   231]
[[-0.00240359 -0.00240262 -0.00240125 -0.0024005  -0.00239475]
 [ 0.00082434  0.00082822  0.00083147  0.000




In [None]:
# some redundant confirmation of algo, before saving, cross-checking with above output

In [16]:
dataset[5][:2,:5]

array([[-0.00235446, -0.00236981, -0.00237987, -0.00238453, -0.0023785 ],
       [ 0.00087762,  0.00085075,  0.0008364 ,  0.00083378,  0.0008415 ]])

In [17]:
stimlist[5]

'33_43873_50032_P01'

In [None]:
# # note 
# changing filepath to
# '/home/archeron/dev/data/eeg_to_music_data/'

# from 
# "/home/archeron/dev/repos/eeg_to_music/data/"

# copy of file at previous file location still exists

In [18]:
save_filepath = "/home/archeron/dev/repos/eeg_to_music/data/" + subject + "_extracted"
# save as .npz file. numpy arrays are mapped to the ids
np.savez(save_filepath, **{stimlist[i]: dataset[i] for i in range(len(stimlist))})

## Load dataset from npz

In [1]:
! ls -lht /home/archeron/dev/data/eeg_to_music_data/

total 812M
drwxrwxr-x 2 archeron archeron 4.0K May 14 23:03 processed_wavs
drwxrwxr-x 2 archeron archeron 4.0K May 14 20:35 samples
drwxrwxr-x 2 archeron archeron 4.0K May 14 20:35 checkpoint
drwxrwxr-x 4 archeron archeron 4.0K May 14 15:35 eeg_audio
drwxr-xr-x 3 archeron archeron 4.0K May  6 19:11 LJSpeech-1.1
-rw-rw-r-- 1 archeron archeron 812M Apr 25 23:07 P01_extracted.npz


In [8]:
! ls -lht /home/archeron/dev/repos/eeg_to_music/data/

total 1.2G
-rw-rw-r-- 1 archeron archeron 812M Apr 25 23:07 P01_extracted.npz
-rw-rw-r-- 1 archeron archeron  71M Apr 25 21:53 first_runnnn.npz
-rw-rw-r-- 1 archeron archeron 315M Apr 25 20:10 first_run.json


In [11]:
! head -n 3 /home/archeron/dev/repos/eeg_to_music/data/first_run.json

{
    "stimulus_id": 12,
    "eeg_array": [


In [13]:
import numpy as np

In [None]:
# # note 
# changing filepath to
# '/home/archeron/dev/data/eeg_to_music_data/'

# from 
# "/home/archeron/dev/repos/eeg_to_music/data/"

# copy of file at previous file location still exists at 14/5/21 20:15

In [14]:
subject = "P01"
save_filepath = "/home/archeron/dev/data/eeg_to_music_data/" + subject + "_extracted"

loaded file size notes:

RAM
~ 5.03G before load
~ 5.05G after load

P01_extracted.npz filesize = 812M


In [15]:
# load the array

filepath_for_load = save_filepath + ".npz"
loaded_data = np.load(filepath_for_load)

In [17]:
type(loaded_data)

numpy.lib.npyio.NpzFile

In [16]:
type(loaded_data['33_43873_50032_P01'])

numpy.ndarray

In [5]:
(loaded_data['33_43873_50032_P01'])[:2,:5]

array([[-0.00235446, -0.00236981, -0.00237987, -0.00238453, -0.0023785 ],
       [ 0.00087762,  0.00085075,  0.0008364 ,  0.00083378,  0.0008415 ]])

In [38]:
loaded_data.files

['121_520_8140_P01',
 '122_8147_16516_P01',
 '123_16517_21565_P01',
 '31_25451_34387_P01',
 '32_34394_43872_P01',
 '33_43873_50032_P01',
 '231_53921_62597_P01',
 '232_62604_71860_P01',
 '233_71861_77788_P01',
 '131_81674_90082_P01',
 '132_90089_99226_P01',
 '133_99227_105044_P01',
 '141_108938_119459_P01',
 '142_119467_130600_P01',
 '143_130601_138413_P01',
 '111_142313_153103_P01',
 '112_153110_164414_P01',
 '113_164415_172398_P01',
 '11_176289_186557_P01',
 '12_186564_197571_P01',
 '13_197572_205257_P01',
 '21_209144_216757_P01',
 '22_216763_224987_P01',
 '23_224988_229892_P01',
 '41_233786_244045_P01',
 '42_244053_254878_P01',
 '43_254879_262384_P01',
 '211_266269_274148_P01',
 '212_274154_282780_P01',
 '213_282781_288086_P01',
 '221_291986_303834_P01',
 '222_303842_316433_P01',
 '223_316434_325704_P01',
 '241_329592_337469_P01',
 '242_337477_346058_P01',
 '243_346059_351321_P01',
 '221_363615_375462_P01',
 '222_375468_388061_P01',
 '223_388062_397333_P01',
 '41_401226_411485_P01',


In [44]:
print(type(loaded_data))
print(len(loaded_data.files))
print(type(loaded_data.files))
print((loaded_data.files)[10])

<class 'numpy.lib.npyio.NpzFile'>
240
<class 'list'>
132_90089_99226_P01


In [12]:
for cal in loaded_data:
    print(cal)
    break

121_520_8140_P01


In [53]:
# testing out how to create an iterator over the npz file

flag = 0
for idx, val in enumerate(np.nditer(loaded_data)):
#     print(idx)
    print(val)
    print(loaded_data[str(val)].shape)
#     flag += 1
#     if flag>2:
#         break

121_520_8140_P01
(69, 7621)
122_8147_16516_P01
(69, 8370)
123_16517_21565_P01
(69, 5049)
31_25451_34387_P01
(69, 8937)
32_34394_43872_P01
(69, 9479)
33_43873_50032_P01
(69, 6160)
231_53921_62597_P01
(69, 8677)
232_62604_71860_P01
(69, 9257)
233_71861_77788_P01
(69, 5928)
131_81674_90082_P01
(69, 8409)
132_90089_99226_P01
(69, 9138)
133_99227_105044_P01
(69, 5818)
141_108938_119459_P01
(69, 10522)
142_119467_130600_P01
(69, 11134)
143_130601_138413_P01
(69, 7813)
111_142313_153103_P01
(69, 10791)
112_153110_164414_P01
(69, 11305)
113_164415_172398_P01
(69, 7984)
11_176289_186557_P01
(69, 10269)
12_186564_197571_P01
(69, 11008)
13_197572_205257_P01
(69, 7686)
21_209144_216757_P01
(69, 7614)
22_216763_224987_P01
(69, 8225)
23_224988_229892_P01
(69, 4905)
41_233786_244045_P01
(69, 10260)
42_244053_254878_P01
(69, 10826)
43_254879_262384_P01
(69, 7506)
211_266269_274148_P01
(69, 7880)
212_274154_282780_P01
(69, 8627)
213_282781_288086_P01
(69, 5306)
221_291986_303834_P01
(69, 11849)
222_303

In [54]:
loaded_data['144_1906760_1906800_P01']

array([[-3.77093041e-04, -3.76249292e-04, -3.75749317e-04, ...,
        -3.73843046e-04, -3.74843053e-04, -3.71936803e-04],
       [-1.30496632e-03, -1.30274756e-03, -1.30624759e-03, ...,
        -1.30549764e-03, -1.30656005e-03, -1.30224756e-03],
       [ 7.72223576e-03,  7.72514195e-03,  7.72704836e-03, ...,
         7.73554808e-03,  7.73254829e-03,  7.73326681e-03],
       ...,
       [-9.80195065e-03, -9.79982537e-03, -9.80166939e-03, ...,
        -9.79904400e-03, -9.80695092e-03, -9.81110648e-03],
       [-5.08355319e-03, -5.08317833e-03, -5.08167797e-03, ...,
        -5.08505308e-03, -5.08939677e-03, -5.09086547e-03],
       [ 0.00000000e+00,  1.44000000e+02,  0.00000000e+00, ...,
         0.00000000e+00,  0.00000000e+00,  0.00000000e+00]])

In [61]:
loaded_data['144_1906760_1906800_P01'].astype(np.float32)

array([[-3.7709303e-04, -3.7624929e-04, -3.7574931e-04, ...,
        -3.7384304e-04, -3.7484305e-04, -3.7193680e-04],
       [-1.3049663e-03, -1.3027475e-03, -1.3062475e-03, ...,
        -1.3054976e-03, -1.3065600e-03, -1.3022475e-03],
       [ 7.7222358e-03,  7.7251419e-03,  7.7270484e-03, ...,
         7.7355481e-03,  7.7325483e-03,  7.7332668e-03],
       ...,
       [-9.8019503e-03, -9.7998250e-03, -9.8016690e-03, ...,
        -9.7990436e-03, -9.8069506e-03, -9.8111061e-03],
       [-5.0835530e-03, -5.0831782e-03, -5.0816778e-03, ...,
        -5.0850529e-03, -5.0893966e-03, -5.0908653e-03],
       [ 0.0000000e+00,  1.4400000e+02,  0.0000000e+00, ...,
         0.0000000e+00,  0.0000000e+00,  0.0000000e+00]], dtype=float32)

In [40]:
x = [x for x in range(1, 11)]

In [41]:
x

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [45]:
xx = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]

In [51]:
np.stack([np.pad(x, (0, 5), mode='constant', constant_values=0) for x in xx])

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0]])

In [49]:
np.pad(xx, (0, 5), mode='constant', constant_values=0)

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0]])

In [9]:
# redundant/convenience copy of function; so that i don't need to 

def decode_event_id(event_id):
    if event_id < 1000:
#         stimulus_id = event_id / 10
# changing to int(stimulus_id) to fix key error
# eg. event_id = 121 => stimulus_id = 12.1
        stimulus_id = int(event_id / 10)
        condition = event_id % 10
        return stimulus_id, condition
    else:
        return event_id

In [10]:
data_id = '113_164415_172398_P01'
etype = data_id.split('_')[0]
trial_start = data_id.split('_')[1]
trial_end = data_id.split('_')[2]
subject = data_id.split('_')[3]

stimulus_id, condition = decode_event_id(int(etype))

print(etype, trial_start, trial_end, subject, stimulus_id, condition)

113 164415 172398 P01 11 3


In [None]:
# misc file manipulation/processing

In [48]:
! ls /home/archeron/dev/data/eeg_to_music_data/eeg_audio/

original_audio	processed


In [55]:
int('01')

1

In [12]:
# wav files

! ls /home/archeron/dev/data/eeg_to_music_data/eeg_audio/processed

'S01_Chim Chim Cheree_lyrics_processed.wav'
'S02_Take Me Out To The Ballgame_lyrics_processed.wav'
'S03_Jingle Bells_lyrics_processed.wav'
'S04_Mary Had A Little Lamb_lyrics_processed.wav'
'S11_Chim Chim Cheree_no lyrics_processed.wav'
'S12_Take Me Out To The Ballgame_no lyrics_processed.wav'
'S13_Jingle Bells_no lyrics_processed.wav'
'S14_Mary Had A Little Lamb_no lyrics_processed.wav'
 S21_EmperorWaltz_processed.wav
'S22_Harry Potter Theme_processed.wav'
'S23_Star Wars Theme_processed.wav'
'S24_Eine kleine Nachtmusic_processed.wav'


In [14]:
wav_files = ['S01_Chim Chim Cheree_lyrics_processed.wav',
'S02_Take Me Out To The Ballgame_lyrics_processed.wav',
'S03_Jingle Bells_lyrics_processed.wav',
'S04_Mary Had A Little Lamb_lyrics_processed.wav',
'S11_Chim Chim Cheree_no lyrics_processed.wav',
'S12_Take Me Out To The Ballgame_no lyrics_processed.wav',
'S13_Jingle Bells_no lyrics_processed.wav',
'S14_Mary Had A Little Lamb_no lyrics_processed.wav',
'S21_EmperorWaltz_processed.wav',
'S22_Harry Potter Theme_processed.wav',
'S23_Star Wars Theme_processed.wav',
'S24_Eine kleine Nachtmusic_processed.wav']


In [15]:
wav_files = [file.replace(' ', '_') for file in wav_files]

In [16]:
wav_files

['S01_Chim_Chim_Cheree_lyrics_processed.wav',
 'S02_Take_Me_Out_To_The_Ballgame_lyrics_processed.wav',
 'S03_Jingle_Bells_lyrics_processed.wav',
 'S04_Mary_Had_A_Little_Lamb_lyrics_processed.wav',
 'S11_Chim_Chim_Cheree_no_lyrics_processed.wav',
 'S12_Take_Me_Out_To_The_Ballgame_no_lyrics_processed.wav',
 'S13_Jingle_Bells_no_lyrics_processed.wav',
 'S14_Mary_Had_A_Little_Lamb_no_lyrics_processed.wav',
 'S21_EmperorWaltz_processed.wav',
 'S22_Harry_Potter_Theme_processed.wav',
 'S23_Star_Wars_Theme_processed.wav',
 'S24_Eine_kleine_Nachtmusic_processed.wav']

In [18]:
STIMULUS_IDS = [1, 2, 3, 4, 11, 12, 13, 14, 21, 22, 23, 24]

In [21]:
stim_event_dict = dict(zip(STIMULUS_IDS, wav_files))

In [22]:
stim_event_dict

{1: 'S01_Chim_Chim_Cheree_lyrics_processed.wav',
 2: 'S02_Take_Me_Out_To_The_Ballgame_lyrics_processed.wav',
 3: 'S03_Jingle_Bells_lyrics_processed.wav',
 4: 'S04_Mary_Had_A_Little_Lamb_lyrics_processed.wav',
 11: 'S11_Chim_Chim_Cheree_no_lyrics_processed.wav',
 12: 'S12_Take_Me_Out_To_The_Ballgame_no_lyrics_processed.wav',
 13: 'S13_Jingle_Bells_no_lyrics_processed.wav',
 14: 'S14_Mary_Had_A_Little_Lamb_no_lyrics_processed.wav',
 21: 'S21_EmperorWaltz_processed.wav',
 22: 'S22_Harry_Potter_Theme_processed.wav',
 23: 'S23_Star_Wars_Theme_processed.wav',
 24: 'S24_Eine_kleine_Nachtmusic_processed.wav'}

next

load npz file,

process the file id

get the stim id

match with the event dict ^ to get the wav file path

In [45]:
sfxgdftim_event_dict = {1: 'S01_Chim_Chim_Cheree_lyrics_processed.wav',
    2: 'S02_Take_Me_Out_To_The_Ballgame_lyrics_processed.wav',
    3: 'S03_Jingle_Bells_lyrics_processed.wav',
    4: 'S04_Mary_Had_A_Little_Lamb_lyrics_processed.wav',
    11: 'S11_Chim_Chim_Cheree_no_lyrics_processed.wav',
    12: 'S12_Take_Me_Out_To_The_Ballgame_no_lyrics_processed.wav',
    13: 'S13_Jingle_Bells_no_lyrics_processed.wav',
    14: 'S14_Mary_Had_A_Little_Lamb_no_lyrics_processed.wav',
    21: 'S21_EmperorWaltz_processed.wav',
    22: 'S22_Harry_Potter_Theme_processed.wav',
    23: 'S23_Star_Wars_Theme_processed.wav',
    24: 'S24_Eine_kleine_Nachtmusic_processed.wav'
}

In [46]:
sfxgdftim_event_dict[13]

'S13_Jingle_Bells_no_lyrics_processed.wav'

In [49]:
import os

In [None]:
datapath = '/home/archeron/dev/data/eeg_to_music_data'
os

In [None]:
! ls

In [None]:
eeg_array = 

In [None]:
wav_name = 

In [1]:
import mne
import numpy as np
import sys
import xlrd
import os

In [2]:
STIMULI_META_XLSX_PATH = "/home/archeron/dev/repos/eeg_to_music/openmiir-master/meta/Stimuli_Meta.v1.xlsx"
BEATS_PATH_ROOT = "/home/archeron/dev/repos/eeg_to_music/openmiir-master/meta/beats.v1" 

STIMULUS_IDS = [1, 2, 3, 4, 11, 12, 13, 14, 21, 22, 23, 24]
DEFAULT_VERSION = 1

stimuli_version = 1

In [None]:
# read raw 
raw_fif_path_p01 = "/home/archeron/dev/data/OpenMIIR-RawEEG_v1/P01-raw.fif"
raw_eeg = mne.io.Raw(raw_fif_path_p01, preload=True, verbose=True)

In [None]:
## main function defintions cell


# helper func for printing event counts
def print_event_type_counts(event_types, decoder=None):
    types, counts = np.unique(event_types, return_counts=True)
    for i in range(len(types)):
        label = types[i]
        if decoder is not None:
            label = decoder(label)
        print('{}: {}'.format(label, counts[i]))

def decode_event_id(event_id):
    if event_id < 1000:
#         stimulus_id = event_id / 10
# changing to int(stimulus_id) to fix key error
# eg. event_id = 121 => stimulus_id = 12.1
        stimulus_id = int(event_id / 10)
        condition = event_id % 10
        return stimulus_id, condition
    else:
        return event_id

def get_event_string(event_id):
    if event_id < 1000:
        """
        Event Ids < 1000 are trial labels
        with the last digit indicating the condition
                1 : 'perception',
                2 : 'cued imag',
                3 : 'imag fix cross',
                4 : 'imagination',
        and the remaining digits referring to the stimulus id.
        """
        stimulus_id, condition = decode_event_id(event_id)
        return 'stimulus {}, condition {}'.format(stimulus_id, condition)
    else:
        return {
            1000: 'audio onset',
            1111: 'noise',
            KEYSTROKE_BASE_ID: 'imagination failed',
            KEYSTROKE_BASE_ID+1: 'imagination okay'
        }[event_id]

def default_beat_event_id_generator(stimulus_id, condition, cue, beat_count):
    if cue:
        cue = 0
    else:
        cue = 10
    return 100000 + stimulus_id * 1000 + condition * 100 + cue + beat_count

def simple_beat_event_id_generator(stimulus_id, condition, cue, beat_count):
    return 10000

def generate_beat_events(trial_events,                  # base events as stored in raw fif files
                         include_cue_beats=True,        # generate events for cue beats as well?
                         use_audio_onset=True,          # use the more precise audio onset marker (code 1000) if present
                         exclude_stimulus_ids=[],
                         exclude_condition_ids=[],
                         beat_event_id_generator=default_beat_event_id_generator,
                         sr=512.0,                      # sample rate, correct value important to compute event frames
                         verbose=True,
                         version=None):

    ## prepare return value
    beat_events = []

    ## get stimuli meta information
    meta = load_stimuli_metadata_map(version=version)
    beats = load_stimuli_metadata_map('beats', verbose=verbose, version=version)

    if include_cue_beats:
        cue_beats = load_stimuli_metadata_map('cue_beats')

        ## determine the number of cue beats
        num_cue_beats = dict()
        for stimulus_id in STIMULUS_IDS:
            num_cue_beats[stimulus_id] = \
                meta[stimulus_id]['beats_per_bar'] * meta[stimulus_id]['cue_bars']
        if verbose:
            print("num_cue_beats:  ", num_cue_beats)


    ## helper function to add a single beat event
    def add_beat_event(etime, stimulus_id, condition, beat_count, cue=False):
        etype = beat_event_id_generator(stimulus_id, condition, cue, beat_count)
        beat_events.append([etime, 0, etype])
        if verbose:
            print("beat_events[-1]:  ", beat_events[-1])

    ## helper function to add a batch of beat events
    def add_beat_events(etimes, stimulus_id, condition, cue=False):
        beats_per_bar = meta[stimulus_id]['beats_per_bar']
        for i, etime in enumerate(etimes):
            beat_count = (i % beats_per_bar) + 1
            add_beat_event(etime, stimulus_id, condition, beat_count, cue)

    for i, event in enumerate(trial_events):
        etype = event[2]
        etime = event[0]

        if verbose:
            print("etype at etime:")
            print('{:4d} at {:8d}'.format(etype, etime))

        if etype >= 1000: # stimulus_id + condition
            continue

        stimulus_id, condition = decode_event_id(etype)
        print("stimulus_id, condition:  ", stimulus_id, condition)

        if stimulus_id in exclude_stimulus_ids or condition in exclude_condition_ids:
            continue  # skip excluded

        trial_start = etime # default: use trial onset
        if use_audio_onset and condition < 3:
            # Note: conditions 3 and 4 have no audio cues
            next_event = trial_events[i+1]
            if next_event[2] == 1000: # only use if audio onset
                trial_start = next_event[0]

        if verbose:
            print('Trial starts at {}'.format(trial_start))

        if condition < 3: # cued
            offset = sr * meta[stimulus_id]['length_of_cue']

            if include_cue_beats:
                cue_beat_times = trial_start + np.floor(sr * cue_beats[stimulus_id])
                cue_beat_times = cue_beat_times[:num_cue_beats[stimulus_id]]  # truncate at num_cue_beats
                cue_beat_times = np.asarray(cue_beat_times, dtype=int)
                if verbose:
                    print("cue_beat_times:    ", cue_beat_times)
                add_beat_events(cue_beat_times, stimulus_id, condition, cue=True)
        else:
            offset = 0 # no cue

        beat_times = trial_start + offset + np.floor(sr * beats[stimulus_id])
        beat_times = np.asarray(beat_times, dtype=int)
        if verbose:
            print("beat_times    ", beat_times[:5], '...')
        add_beat_events(beat_times, stimulus_id, condition)

    beat_events = np.asarray(beat_events, dtype=int)

    return beat_events

def load_stimuli_metadata_map(key=None, data_root=None, verbose=None, version=None):

    if version is None:
        version = DEFAULT_VERSION

    # handle special case for beats
    if key == 'cue_beats':
        key = 'beats'
        cue = True
    else:
        cue = False

    if key == 'beats':
        map = dict()
        for stimulus_id in STIMULUS_IDS:
            map[stimulus_id] = load_beat_times(stimulus_id,
                                               cue=cue,
                                               data_root=data_root,
                                               verbose=None,
                                               version=version)
        return map

    meta = load_stimuli_metadata(data_root, version=version)

    if key is None:
        return meta  # return everything

    map = dict()
    for stimulus_id in STIMULUS_IDS:
        map[stimulus_id] = meta[stimulus_id][key]

    return map

def load_stimuli_metadata(data_root=None, version=None, verbose=None):

    if version is None:
        version = DEFAULT_VERSION

    if data_root is None:
        data_root = ""

    xlsx_filepath = STIMULI_META_XLSX_PATH
    book = xlrd.open_workbook(xlsx_filepath, encoding_override="cp1252")
    sheet = book.sheet_by_index(0)

    if verbose:
        print('Loading stimulus metadata from {}'.format(xlsx_filepath))

    meta = dict()
    for i in range(1, 13):
        stimulus_id = int(sheet.cell(i,0).value)
        meta[stimulus_id] = {
            'id' : stimulus_id,
            'label' : sheet.cell(i,1).value.encode('ascii'),
            'audio_file' : sheet.cell(i,2).value.encode('ascii'),
#         'cue_file' : sheet.cell(i,2).value.encode('ascii').replace('.wav', '_cue.wav'),
            'cue_file' : sheet.cell(i,2).value.replace('.wav', '_cue.wav').encode('ascii'),
            'length_with_cue' : sheet.cell(i,3).value,
            'length_of_cue' : sheet.cell(i,4).value,
            'length_without_cue' : sheet.cell(i,5).value,
            'length_of_cue_only' : sheet.cell(i,6).value,
            'cue_bpm' : int(sheet.cell(i,7).value),
            'beats_per_bar' : int(sheet.cell(i,8).value),
            'num_bars' : int(sheet.cell(i,14).value),
            'cue_bars' : int(sheet.cell(i,15).value),
            'bpm' : int(sheet.cell(i,16).value),
            'approx_bar_length' : sheet.cell(i,11).value,
        }

        if version == 2:
            meta[stimulus_id]['bpm'] = meta[stimulus_id]['cue_bpm'] # use cue bpm

    return meta


def load_beat_times(stimulus_id, cue=False, data_root=None, verbose=None, version=None):

    if version is None:
        version = DEFAULT_VERSION

    if data_root is None:
        data_root = "/home/archeron/dev/repos/eeg_to_music/openmiir-master"

    if cue:
        beats_filepath = os.path.join(BEATS_PATH_ROOT,
                                      '{}_cue_beats.txt'.format(stimulus_id))
    else:
        beats_filepath = os.path.join(BEATS_PATH_ROOT,
                                      '{}_beats.txt'.format(stimulus_id))

    with open(beats_filepath, 'r') as f:
        lines = f.readlines()

    beats = []
    for line in lines:
        if not line.strip().startswith('#'):
            beats.append(float(line.strip()))
    beats = np.asarray(beats)

    if verbose:
        print('Read {} beat times from {}'.format(len(beats), beats_filepath))

    return beats


In [None]:
event_dict = {
    'noise': 1111,
    'Chim Chim Cheree with lyrics, perception': 11,
    'Take Me Out To The Ballgame with lyrics, perception': 21,
    'Jingle Bells with lyrics, perception': 31,
    'Mary Had A Little Lamb with lyrics, perception': 41,
    'Chim Chim Cheree without lyrics, perception': 111,
    'Take Me Out To The Ballgame without lyrics, perception': 121,
    'Jingle Bells without lyrics, perception': 131,
    'Mary Had A Little Lamb without lyrics, perception': 141,
    'Emperor Waltz, perception': 211,
    'Harry Potter Theme, perception': 221,
    'Star Wars Theme, perception': 231,
    'Eine kleine Nachtmusik, perception': 241,
    'Chim Chim Cheree with lyrics, cued_imagination': 12,
    'Take Me Out To The Ballgame with lyrics, cued_imagination': 22,
    'Jingle Bells with lyrics, cued_imagination': 32,
    'Mary Had A Little Lamb with lyrics, cued_imagination': 42,
    'Chim Chim Cheree without lyrics, cued_imagination': 112,
    'Take Me Out To The Ballgame without lyrics, cued_imagination': 122,
    'Jingle Bells without lyrics, cued_imagination': 132,
    'Mary Had A Little Lamb without lyrics, cued_imagination': 142,
    'Emperor Waltz, cued_imagination': 212,
    'Harry Potter Theme, cued_imagination': 222,
    'Star Wars Theme, cued_imagination': 232,
    'Eine kleine Nachtmusik, cued_imagination': 242,
    'Chim Chim Cheree with lyrics, uncued_imagination': 13,
    'Take Me Out To The Ballgame with lyrics, uncued_imagination': 23,
    'Jingle Bells with lyrics, uncued_imagination': 33,
    'Mary Had A Little Lamb with lyrics, uncued_imagination': 43,
    'Chim Chim Cheree without lyrics, uncued_imagination': 113,
    'Take Me Out To The Ballgame without lyrics, uncued_imagination': 123,
    'Jingle Bells without lyrics, uncued_imagination': 133,
    'Mary Had A Little Lamb without lyrics, uncued_imagination': 143,
    'Emperor Waltz, uncued_imagination': 213,
    'Harry Potter Theme, uncued_imagination': 223,
    'Star Wars Theme, uncued_imagination': 233,
    'Eine kleine Nachtmusik, uncued_imagination': 243,
    'Chim Chim Cheree with lyrics, imagination_feedback': 14,
    'Take Me Out To The Ballgame with lyrics, imagination_feedback': 24,
    'Jingle Bells with lyrics, imagination_feedback': 34,
    'Mary Had A Little Lamb with lyrics, imagination_feedback': 44,
    'Chim Chim Cheree without lyrics, imagination_feedback': 114,
    'Take Me Out To The Ballgame without lyrics, imagination_feedback': 124,
    'Jingle Bells without lyrics, imagination_feedback': 134,
    'Mary Had A Little Lamb without lyrics, imagination_feedback': 144,
    'Emperor Waltz, imagination_feedback': 214,
    'Harry Potter Theme, imagination_feedback': 224,
    'Star Wars Theme, imagination_feedback': 234,
    'Eine kleine Nachtmusik, imagination_feedback': 244,
    'Audio Onset': 1000,
    'Feedback No': 2000,
    'Feedback Yes': 2001
}

In [None]:
# extract events
events = mne.find_events(raw_eeg, stim_channel='STI 014')

In [None]:
beat_events = generate_beat_events(events,
                                   version=stimuli_version,
                                   beat_event_id_generator=simple_beat_event_id_generator,
                                   verbose=True)

In [None]:
picks = mne.pick_types(raw_eeg.info, meg=False, eeg=True, eog=True, stim=True, exclude=[])
event_id = event_dict
tmin = -0.2  # start of each epoch (200ms before the trigger)
tmax = 0.8  # end of each epoch (600ms after the trigger) - longest beat is 0.57s long
detrend = 0 # remove dc
# reject = dict(eog=250e-6) # TODO: optionally reject epochs

In [None]:
print("beat epoching starts")

beat_epochs = mne.Epochs(raw, beat_events, event_id,
                              tmin, tmax, preload=True,
                              proj=False, picks=picks, verbose=True)
# print(beat_epochs)

In [None]:
del raw_eeg