# Convert EDF to expipe files
The visual EEG inspection table 'eeg-sichtung.xlsx' has been manually translated into a form which is more easy to read automatically.
The table is located in the new file 'data_annotation.ods'.
It contains one row per day an animal is recorded.
Relevant metadata such as animal id, recording channel, artifacts or missing data are stored in separate columns.

This notebooks loads the original edf files sequentially and converts the recordings to the expipe format [1].
Here, each day an animal is recorded is its own action.

[1] https://github.com/CINPLA/expipe

In [1]:
import numpy as np
import pandas as pd
from tempfile import mkdtemp
import zipfile
import shutil
from tqdm import tqdm
import expipe
import pyedflib
import datetime
import os
import tools
from format_data_annotation import dict_format
import pdb

In [2]:
path = ''
#path = '/home/stoeber/EpimiRNA_ExpipeConversion/'
fname = 'data_annotation.ods'

In [3]:
# Info about the EEG files
path_edf = "/home/jovyan/work/data/PPS_data_raw/"
#path_edf = "/media/tristan/data/experiments/PPS_data_expipe/"

#path_expipe = "/home/jovyan/work/data_expipe/epimirna"
path_expipe = "/home/jovyan/work/data/epimirna/"

# decide whether existing data should be rebuild or not
bool_keepexisting = False

### Read data annotation

In [4]:
t = pd.read_excel(path+fname, engine="odf", dtype=dict_format)
#t = pd.read_excel(path+fname, engine="odf")

In [5]:
t['ID'].unique()

array([ 1218,  1227,  1237,  1243,  1251,  1254,  1257,  1260,  1261,
        1262,  1263,  1264,  1265,  1269,  1270,  1271,  1274,  1275,
        1276,   322,   323,   324,   325,   326,   328,   329,  3210,
        3211,  3212,  3215,  3216,  3218, 32220,  3222, 32223,  3224,
        3226,  3227,  3228,  3231,  3232,  3233,  3235,  3237,  3238,
        3239,  3240,  3241,  3242,  3243,  3244,  3245,  3246,  3247,
        3248,  3251,  3253,  3254,  3256,  3258,  3259,  3260,  3261,
        3262,  3263,  3266,  3267,  3268,  3269,  3270,  3271,  3272,
        3273,  3274,  3275,  3276,  3277,  3278,  3279,  3280,  3281,
        3282,  3283,  3284,  3285])

### Create Expipe project and store metadata for each animal

In [6]:
animals = t['ID'].unique()
print(animals)

[ 1218  1227  1237  1243  1251  1254  1257  1260  1261  1262  1263  1264
  1265  1269  1270  1271  1274  1275  1276   322   323   324   325   326
   328   329  3210  3211  3212  3215  3216  3218 32220  3222 32223  3224
  3226  3227  3228  3231  3232  3233  3235  3237  3238  3239  3240  3241
  3242  3243  3244  3245  3246  3247  3248  3251  3253  3254  3256  3258
  3259  3260  3261  3262  3263  3266  3267  3268  3269  3270  3271  3272
  3273  3274  3275  3276  3277  3278  3279  3280  3281  3282  3283  3284
  3285]


In [7]:
# Open project
project = expipe.require_project(path_expipe)

for animal_i in animals:
    
    # Register each animal
    entity_i = project.require_entity(str(animal_i))
    t_i = t[t['ID']==animal_i]
    entity_i.modules['info'] = {
        'channel': t_i['Channel'].unique()[0],
        'machine': t_i['Machine'].unique()[0]
    }
    # Create action for each animal
    name_action_i = 'record_'+str(animal_i)
    action_i = project.require_action(name_action_i)
    
    action_i.type = "eeg recording"
    action_i.entities = [str(animal_i)]
    
    # store sub-table for each animal
    fname_annot = 'df_data_annotation.pckl'
    t_i.to_pickle(action_i.data_path().joinpath(fname_annot))
    action_i.data['df_data_annotation'] = fname_annot

### Add concise group names for the whole epimirna dataset

In [8]:
from annotation_short import annotation as group_names_epimirna
print(group_names_epimirna)

{'1': 'FF_EPG_5Hz', '1_noPPS': 'FF_EPG_5Hz_noPPS', '2': 'FF_EPG_130Hz', '3': 'PP_EPG_5Hz', '4': 'PP_EPG_130Hz', '5': 'DG_EPG_5Hz', '6': 'DG_EPG_130Hz', '8': 'FF_ME_5Hz', '9': 'FF_ME_130Hz', '10': 'PP_EPG_5Hz', '11': 'PP_EPG_130Hz', '12': 'DG_EPG_5Hz', '13': 'DG_EPG_130Hz', '14': 'FF_noDBS', '14_noPPS': 'FF_noDBS_noPPS', '15': 'VHC_noDBS', '16': 'noDBS', 'pps_only': 'noDBS', 'vhc_epilep_1 Hz': 'VHC_EPG_1Hz', 'vhc_epilep_HF': 'VHC_EPG_130Hz', 'vhc_epilep_LF (5 Hz)': 'VHC_EPG_5Hz', 'vhc_epileptog_LF': 'VHC_EPG_?Hz', 'vhc_mE_Kontrolle': 'VHC_noDBS', 'vhc_mE_LF (1 Hz)': 'VHC_ME_1Hz', 'vhc_mE_LF (1Hz)': 'VHC_ME_1Hz', 'vhc_mE_LF (5 Hz)': 'VHC_ME_5Hz', 'vhc_mE_LF (5Hz)': 'VHC_ME_5Hz'}


In [9]:
for animal_i in animals:
    # load
    action_i = project.require_action('record_'+str(animal_i))
    t_i = pd.read_pickle(action_i.data_path().joinpath(fname_annot))
    
    # get epimirna group name, rename it
    group_i = t_i['group'].unique()
    assert len(group_i) == 1
    group_i = group_i[0]

    # define consisten group names across the whole epimirna data set
    t_i['group_epimirna'] = group_names_epimirna[group_i]
    
    # store
    t_i.to_pickle(action_i.data_path().joinpath(fname_annot))
    
    

### Load EEG files and store them as actions

#### Exclude entries with overlapping recordings


In [10]:
t = t[t['Conflict overlap recording']!='x']

#### Test consistency of leftover table

In [11]:
t = t[t['Device off'].isnull() & t['Missing data'].isnull()]
# Test consistency of the table for data extraction
tools.test_consistency_of_annotation_table(t)

In [12]:
t = t[t['Missing data']!='x']

In [13]:
# Open EDF files by date and machine
grp = t.groupby(['Date', 'Machine'])
for (date_i, machine_i), grp_i in tqdm(grp):
    
    # unzip file to temporary folder
    date_str = date_i.date().strftime('%Y-%m-%d')
    fname = "EDF_" + machine_i + '_' + date_str
    fname_zip = fname + '.zip'
    tmpdir = mkdtemp()
    try:
        with zipfile.ZipFile(path_edf + fname_zip, 'r') as zip_ref:
            zip_ref.extractall(tmpdir)
    except FileNotFoundError as err:
        print(err)
        shutil.rmtree(tmpdir)
        continue

    # load recordings
    fname_edf = fname + '.edf'
    try:
        f = pyedflib.EdfReader(tmpdir + '/' + fname_edf)
    except OSError as err:
        print("OS error: {0}".format(err))
        print(fname_edf + ' not found')
        f.close()
        shutil.rmtree(tmpdir)
        continue
    
    # extract information from recording
    n = f.signals_in_file
    start_time = f.getStartdatetime()
    signal_labels = f.getSignalLabels()
    nsamples = f.getNSamples()
    assert len(np.unique(nsamples)) == 1
    nsamples = np.unique(nsamples)[0]
    
    # associate recording to animal entity
    for label_i in signal_labels:
        pos_i = signal_labels.index(label_i)
        t_ii = grp_i[grp_i['Channel'] == int(label_i)]
        if np.any(t_ii):
            animal_id = t_ii['ID'].unique()
            assert(len(animal_id) == 1)
            animal_id = str(animal_id[0])
            
            # get corresponding action
            action_i = project.require_action('record_'+str(animal_id))
            
            name_rec = 'eeg_recording_' + date_str
            # check if recording already exists
            if (name_rec in list(action_i.data) and
                bool_keepexisting):
                print('Skipped file. Already present: ' + name_rec)
                continue
            else:
                # Get all information from table except date
                info = t_ii.to_dict('records')[0]
                
                # date is removed here, it is stored in a different place
                del info['Date']
                
                # if any entry in info is datetime.time, convert to hours, minutes, seconds
                for k, v in info.items():
                    if isinstance(v, datetime.time):
                        info[k] = v.strftime("%d/%m/%Y, %H:%M:%S")
                
                sampling_freq = f.getSampleFrequency(pos_i)
                info['sampling_freq'] = sampling_freq
                info['nsamples'] = nsamples
                info['duration'] = nsamples*1./sampling_freq
                info['dimension'] = f.getPhysicalDimension(pos_i)
                ional graph, broke into eight partitions? As we used a single thread (scheduler='synchronous') dnfo['starttime'] = start_time.strftime("%d/%m/%Y, %H:%M:%S")
                #info['annotations'] = list(t_ii.to_dict('index').values())

                mod_info = action_i.require_module(name=name_rec+'_info', contents=info)

                 # link file in expipe
                action_i.data[name_rec] = name_rec + '.npz'

                # store recordings in separate file
                sig_ii = f.readSignal(pos_i)
                action_data_path = str(action_i.data_path(name_rec))
                np.savez_compressed(action_data_path, eeg=sig_ii)
    # close EDF file
    f.close()
    # delete temporary folder
    shutil.rmtree(tmpdir)

  3%|█▌                                                             | 26/1023 [05:01<2:28:14,  8.92s/it]

[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2014-08-16.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2014-08-17.zip'


  4%|██▊                                                            | 46/1023 [08:29<3:09:05, 11.61s/it]

[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2014-09-05.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2014-09-06.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2014-09-07.zip'


  7%|████▍                                                          | 73/1023 [18:19<6:29:23, 24.59s/it]

OS error: /tmp/tmprm5i575m/EDF_yolo_2014-10-01.edf: can not open file, no such file or directory
EDF_yolo_2014-10-01.edf not found


 16%|█████████▎                                                  | 159/1023 [1:10:50<8:21:18, 34.81s/it]

OS error: /tmp/tmpaakconro/EDF_yolo_2015-01-09.edf: can not open file, no such file or directory
EDF_yolo_2015-01-09.edf not found


 19%|███████████▌                                                | 197/1023 [1:37:22<6:21:25, 27.71s/it]

OS error: /tmp/tmpwr4an_ag/EDF_yolo_2015-02-17.edf: can not open file, no such file or directory
EDF_yolo_2015-02-17.edf not found


 23%|█████████████▊                                             | 240/1023 [2:09:51<10:31:01, 48.35s/it]

[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-02.zip'


 25%|██████████████▊                                            | 257/1023 [2:23:47<11:23:38, 53.55s/it]

[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-19.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-20.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-21.zip'


 26%|███████████████▎                                            | 261/1023 [2:24:45<6:08:00, 28.98s/it]

[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-23.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-24.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-25.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-26.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-27.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-28.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-29.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-04-30.zip'


 28%|████████████████▋                                           | 285/1023 [2:37:39<8:53:24, 43.37s/it]

[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-17.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-18.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-19.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-20.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-21.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-22.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-23.zip'
[Errno 2] No such file or directory: '/home/jovyan/work/data/PPS_data_raw/EDF_yolo_2015-05-24.zip'


 46%|███████████████████████████▎                               | 473/1023 [5:51:19<12:30:42, 81.90s/it]

OS error: /tmp/tmpci35a2ju/EDF_yolo_2015-11-21.edf: can not open file, no such file or directory
EDF_yolo_2015-11-21.edf not found


 46%|███████████████████████████▊                                | 475/1023 [5:51:30<6:33:05, 43.04s/it]

OS error: /tmp/tmpzul0_1aq/EDF_yolo_2015-11-22.edf: can not open file, no such file or directory
EDF_yolo_2015-11-22.edf not found


 47%|███████████████████████████▉                                | 476/1023 [5:51:35<4:48:57, 31.70s/it]

OS error: /tmp/tmpm85x4ysu/EDF_yolo_2015-11-23.edf: can not open file, no such file or directory
EDF_yolo_2015-11-23.edf not found


 47%|███████████████████████████▉                                | 477/1023 [5:51:41<3:37:16, 23.88s/it]

OS error: /tmp/tmpscwlvlt3/EDF_yolo_2015-11-24.edf: can not open file, no such file or directory
EDF_yolo_2015-11-24.edf not found


 47%|████████████████████████████▍                               | 485/1023 [6:01:04<7:52:57, 52.75s/it]

OS error: /tmp/tmpz3f37fjq/EDF_yolo_2015-12-02.edf: can not open file, no such file or directory
EDF_yolo_2015-12-02.edf not found


 48%|████████████████████████████▌                               | 486/1023 [6:01:09<5:43:37, 38.39s/it]

OS error: /tmp/tmpgdqv7sp8/EDF_yolo_2015-12-03.edf: can not open file, no such file or directory
EDF_yolo_2015-12-03.edf not found


100%|████████████████████████████████████████████████████████████| 1023/1023 [13:51:34<00:00, 48.77s/it]
