# Convert EDF to expipe files
The visual EEG inspection table 'eeg-sichtung.xlsx' has been manually translated into a form which is more easy to read automatically.
The table is located in the new file 'data_annotation.ods'.
It contains one row per day an animal is recorded.
Relevant metadata such as animal id, recording channel, artifacts or missing data are stored in separate columns.

This notebooks loads the original edf files sequentially and converts the recordings to the expipe format [1].
Here, each day an animal is recorded is its own action.

[1] https://github.com/CINPLA/expipe

In [None]:
import numpy as np
import pandas as pd
from tempfile import mkdtemp
import zipfile
import shutil
from tqdm import tqdm
import expipe
import pyedflib
import datetime
import os
import tools

In [2]:
path = ''
fname = 'data_annotation.ods'

In [3]:
# Info about the EEG files
path_edf = "/home/jovyan/work/raw_files/"
path_expipe = "/home/jovyan/work/data_expipe/epimirna"

### Read data annotation

In [5]:
t = pd.read_excel(path+fname, engine="odf")

In [6]:
animals=t['ID'].unique()
print(animals)

[1218 1270 1227 1237 1243 1251 1254 1257 1260 1261 1262 1263 1264 1265
 1269 1271 1274 1275 1276 3220 3263 3266 3267]


### Exclude entries without data
Because sometimes channels reused on the same machine.
Such as channel 3 for 1251 and 1218

In [7]:
t = t[t['Device off'].isnull()]

### Test consistency of the table

In [8]:
tools.test_consistency_of_annotation_table(t)

### Create Expipe project

In [9]:
# Open project
project = expipe.require_project(path_expipe)

# Open/create each animal
for animal_i in animals:
    entity_i = project.require_entity(str(animal_i))
    t_i = t[t['ID']==animal_i]
    entity_i.modules['info'] = {
        'channel': t_i['Channel'].unique()[0],
        'machine': t_i['Machine'].unique()[0]
    }

# get existing actions
ls_names_actions = [name for name, _ in project.actions.items()]

# keep existing actions
bool_keepexisting = True

In [10]:
actions_path = str(project.path)+'/actions'

In [11]:
actions_path

'/home/jovyan/work/data_expipe/epimirna/actions'

### Load EEG files and store them as actions

In [None]:
# Open EDF files by date and machine
grp = t.groupby(['Date', 'Machine'])
for (date_i, machine_i), grp_i in tqdm(grp):
    
    # unzip file to temporary folder
    date_str = date_i.date().strftime('%Y-%m-%d')
    fname = "EDF_" + machine_i + '_' + date_str
    fname_zip = fname + '.zip'
    tmpdir = mkdtemp()
    try:
        with zipfile.ZipFile(path_edf + fname_zip, 'r') as zip_ref:
            zip_ref.extractall(tmpdir)
    except FileNotFoundError as err:
        print(err)
        shutil.rmtree(tmpdir)
        continue

    # load recordings
    fname_edf = fname + '.edf'
    try:
        f = pyedflib.EdfReader(tmpdir + '/' + fname_edf)
    except OSError as err:
        print("OS error: {0}".format(err))
        print(fname_edf + ' not found')
        f.close()
        shutil.rmtree(tmpdir)
        continue
    
    # extract information from recording
    n = f.signals_in_file
    start_time = f.getStartdatetime()
    signal_labels = f.getSignalLabels()
    nsamples = f.getNSamples()
    assert len(np.unique(nsamples)) == 1
    nsamples = np.unique(nsamples)[0]
    
    # associate recording to animal entity
    for label_i in signal_labels:
        pos_i = signal_labels.index(label_i)
        t_ii = grp_i[grp_i['Channel'] == int(label_i)]
        if np.any(t_ii):
            animal_id = t_ii['ID'].unique()
            assert(len(animal_id) == 1)
            animal_id = str(animal_id[0])
            
            name_action = 'eeg_recording_' + animal_id + '_' + date_str
            # check if action already exists
            if (name_action in actions and
                os.path.isdir(actions_path + '/' + name_action) and
                bool_keepexisting):
                print('Skipped file. Already present: ' + name_action)
                continue
            else:
            # create action
                action = project.create_action(name_action)
                action.type = "Recording"
                action.entities = [str(animal_id)]
                # Get all information from table except date
                info = t_ii.to_dict('records')[0]
                
                # date is removed here, it is stored in a different place
                del info['Date']
                
                # if any entry in info is datetime.time, convert to hours, minutes, seconds
                for k, v in info.items():
                    if isinstance(v, datetime.time):
                        info[k] = v.strftime('%H:%M:%S')
                
                sampling_freq = f.getSampleFrequency(pos_i)
                info['sampling_freq'] = sampling_freq
                info['nsamples'] = nsamples
                info['duration'] = nsamples*1./sampling_freq
                info['dimension'] = f.getPhysicalDimension(pos_i)
                action.modules['info'] = info

                action.datetime = start_time

                 # link file in expipe
                action.data['eeg_recording'] = name_action + '.npz'

                # store recordings in separate file
                sig_ii = f.readSignal(pos_i)
                action_data_path = str(action.data_path('eeg_recording'))
                np.savez_compressed(action_data_path, eeg=sig_ii)

                # add metadata
                action.annotations = list(t_ii.to_dict('index').values())
    # close EDF file
    f.close()
    # delete temporary folder
    shutil.rmtree(tmpdir)


In [None]:
df_actions = tools.actions_to_dataframe(project.actions)