Import libraries

In [1]:
import os

import pandas as pd
import pyedflib

Function to extract relevant EEG data from a EDF file

In [2]:
def extract_EEG_data_from_EDF(file_path):
    # Dictionary to store extracted information
    edf_data = {}

    # Open the EDF file
    try:
        f = pyedflib.EdfReader(file_path)

        # Extract header information
        edf_data["parameters"] = {
            "dimension": f.getPhysicalDimension(0),
            "sample_frequency": f.getSampleFrequency(0),
            "duration": f.file_duration,
            "signal_length": f.getNSamples()[0],
            "event_marker_length": len(f.readAnnotations()[0]),
        }

        # Extract signal data
        signal_labels = f.getSignalLabels()
        edf_data["signal_data"] = {}

        for i in range(f.signals_in_file):
            signal_data = f.readSignal(i)
            edf_data["signal_data"][signal_labels[i]] = signal_data

        # Extract annotations (event markers)
        edf_data["event_markers"] = {}
        edf_data["event_markers"]["onset"] = f.readAnnotations()[0]
        edf_data["event_markers"]["description"] = f.readAnnotations()[2]

        # Close the EDF file
        f.close()

    except Exception as e:
        pass

    return edf_data

Extract 

In [4]:
dataset_dir = "../acquired_dataset"

In [5]:
# Dictionary to store participant-level information
participant_data = {"id": [], "age": [], "sex": [], "category": []}

# Dictionary to store participant trial-level information
participant_trial_data = {
    "id_trial_phase": [],
    "id": [],
    "phase": [],
    "signal_length": [],
    "event_marker_length": [],
    "sample_frequency": [],
    "duration": [],
}

# Loop through participant IDs in the dataset directory
for id in os.listdir(dataset_dir):
    # Append participant ID to the "id" list in the participant_data dictionary
    participant_data["id"].append(id)

    # Open the userfile.gnr file for the current participant
    with open(f"{dataset_dir}/{id}/userfile.gnr", "r") as gnr_file:
        # Read lines from the file and extract participant information
        file_data = gnr_file.readlines()
        participant_data["age"].append(int(file_data[3].strip().split("=")[1]))
        participant_data["sex"].append(file_data[4].strip().split("=")[1])
        participant_data["category"].append(file_data[5].strip().split("=")[1])

    # Get a list of trial directories for the current participant
    trials_dir = [
        directory
        for directory in os.listdir(f"{dataset_dir}/{id}")
        if os.path.isdir(f"{dataset_dir}/{id}/{directory}")
    ]

    # Loop through trials for the current participant
    for trial in trials_dir:
        # Loop through the four phases of each trial
        for phase in range(1, 5):
            # Extract EEG data from the EDF file for the current phase
            edf_data = extract_EEG_data_from_EDF(
                f"{dataset_dir}/{id}/{trial}/Phase {phase}.edf"
            )

            # Append id-trial-phase-specific information
            participant_trial_data["id"].append(id)
            participant_trial_data["phase"].append(phase)
            participant_trial_data["id_trial_phase"].append(f"{id}_{trial}_{phase}")

            # Check if EEG data extraction was successful
            if edf_data:
                participant_trial_data["sample_frequency"].append(
                    edf_data["parameters"]["sample_frequency"]
                )
                participant_trial_data["signal_length"].append(
                    edf_data["parameters"]["signal_length"]
                )
                participant_trial_data["event_marker_length"].append(
                    edf_data["parameters"]["event_marker_length"]
                )
                participant_trial_data["duration"].append(
                    edf_data["parameters"]["duration"]
                )

                # Save EEG signal data
                pd.DataFrame(edf_data["signal_data"]).to_csv(
                    f"../data/signal_data/{id}_{trial}_{phase}.csv",
                    index=False,
                )

                # Save event marker data for arithmetic task and auditory stimuli phases
                if phase == 4:
                    pd.DataFrame(edf_data["event_markers"]).to_csv(
                        f"../data/event_markers/{id}_{trial}_{phase}.csv",
                        index=False,
                    )
            else:
                participant_trial_data["sample_frequency"].append(None)
                participant_trial_data["signal_length"].append(None)
                participant_trial_data["event_marker_length"].append(None)
                participant_trial_data["duration"].append(None)

Store as pandas DataFrame and save data extracted

In [None]:
participant_df = pd.DataFrame(participant_data)
participant_trial_df = pd.DataFrame(participant_trial_data)

In [None]:
participant_df.to_csv("../data/Participant Data.csv", index=False)
participant_trial_df.to_csv("../data/Participant Trial Data.csv", index=False)