In [None]:
# Study: EDA - Envisioned Speech Recognition
# Author: Willian Oliveira
# Start: 30/09/2022
# Motivation: Discover how to work with EEG exams.
# Study Status: On going

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mne
import seaborn as sns
import random

mne.set_log_level('WARNING') # Setting outputs to something less verbose.

In [None]:
# This is too verbose, but we will keep it as a record of my thinking process to understand this dataset.
def explore_edf_data(path):
    """ Extract raw data from EDF files, print some info about this measurement."""
    import mne
    raw = mne.io.read_raw_edf(path)
    print(f'{path} metadata:') 
    display(raw.info) # Explore metadata included in the file
    channels = raw.ch_names # Display all channels available in this EEG.
    print(f'Channel Names:\n')
    print(channels,'\n')
    print(5*'-', '\n')
    print('Transforming transposed edf data to np array...\n')
    data = raw.get_data().T # Transform this transposed raw data in array information
    print(f'Data size: {data.size}')
    print(f'Data shape: {data.shape} -> {data.shape[0]} measurements of {data.shape[1]} channels' )
    print('Each nested array is composed of 39 float numbers, indicating one measure of each channel at the same time.\n')
    print('Channels Values at the first measurement period:')
    print(data[0], '\n')
    print('Channels Values at the last measurement period:')
    print(data[-1], '\n')
    print(5*'-', '\n')
    print('Converting np array to dataframe for further analysis...\n')
    df_data = pd.DataFrame(data, columns=channels)
    display(df_data.describe().style)
    print('\n')
    print('We see that the sensor channels for this task are from AF3 to AF4 [2:16].') 
    plt.rcParams["figure.figsize"] = (20,8)
    print(f'Seconds of analysis: {round((df_data.TIME_STAMP_s.max() - df_data.TIME_STAMP_s.min())*(10**6))}')
    for c in channels[2:16]:
        plt.plot(df_data[c], label = str(c))
        plt.title(f'Sensor channels response for subject thinking on this EEG')


In [None]:
explore_edf_data('Char/aashay_A.edf')

In [None]:
# Creating a np.array with all EEG data and its label for Char Envisioned Recognition task.
folder = 'Char/'
X = np.zeros((230,14,1536))
Y = np.zeros((230,))
channels = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4'] 
ctr = 0
for file in os.listdir(folder):
    data = mne.io.read_raw_edf(os.path.join(folder,file))
    raw_data = data[2:16][0]*1000
    raw_data = raw_data[:,0:1536]
    a = raw_data.shape
    
    _,cls = file.split('_')
    if cls[0]=='A':
        Y[ctr] = 0
    elif cls[0]=='C':
        Y[ctr] = 1 
    elif cls[0]=='F':
        Y[ctr] = 2
    elif cls[0]=='H':
        Y[ctr] = 3
    elif cls[0]=='J':
        Y[ctr] = 4
    elif cls[0]=='M':
        Y[ctr] = 5
    elif cls[0]=='P':
        Y[ctr] = 6
    elif cls[0]=='S':
        Y[ctr] = 7
    elif cls[0]=='T':
        Y[ctr] = 8
    elif cls[0]=='Y':
        Y[ctr] = 9
    X[ctr,:,:] = raw_data
    ctr += 1

In [None]:
# Lets build a dictionary to mantain our char labels in memory. 
label_str = {0.0: 'A', 1.0: 'C', 2.0: 'F', 3.0: 'H', 4.0: 'J',
             5.0: 'M', 6.0: 'P', 7.0: 'S', 8.0: 'T', 9.0: 'Y'}

In [None]:
# Lets explore EEG readings in our dataset.
def explore_dataset(Doc, Measurements = X, Labels = Y, Channels = channels, labels_str = label_str):
    df = pd.DataFrame(X[Doc].T, columns=Channels)
    print(f'Exploring exam number {Doc}')
    print(f'Label: {labels_str[Labels[Doc]]}')
    print('Channels first mesurements:')
    display(df.head().style)
    plt.rcParams["figure.figsize"] = (20,8)
    for c in channels:
        plt.plot(df[c], label = str(c))
        plt.title(f'Sensor channels response for subject thinking on char {labels_str[Labels[Doc]]} for ~12 seconds.')

In [None]:
explore_dataset(Doc= random.randint(0, 230), Measurements = X, Labels = Y, Channels = channels)

In [None]:
print(X.shape)
print(Y.shape)

In [None]:
# Save as npz file to use it on the other notebooks.
np.savez('char_dataset', x=X, y=Y)

In [None]:
# Load npz file to see if its in the right configuration.
dataset = np.load('char_dataset.npz')
Measurements = dataset['x']
Labels = dataset['y']
print(Measurements.shape)
print(Labels.shape)

In [None]:
# Test if we can access data the same way.
explore_dataset(Doc= random.randint(0, 230), Measurements = Measurements, Labels = Labels, Channels = channels)

In [None]:
# Extracting a single sensor for tests on building its spectogram
eeg90_af3 = Measurements[47][0]
plt.plot(eeg90_af3, label = 'AF3')
plt.title('Channel AF3 response for subject thinking on char A for ~12 seconds.')

In [None]:
# Transform the array to a 2D Spectogram
from scipy import signal
fs = 128

f, t, Sxx = signal.spectrogram(eeg90_af3, fs)
plt.figure(figsize=(8,10))
plt.pcolormesh(t, f, Sxx, shading='gouraud')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.show()

In [None]:
# Extract all sensors from a same EEG:
sensors = {0: 'AF3', 1: 'F7',	2: 'F3', 3:	'FC5', 4: 'T7', 5: 'P7', 6:	'O1',
        7: 'O2', 8: 'P8', 9: 'T8', 10: 'FC6', 11: 'F4', 12: 'F8', 13: 'AF4'}

def eeg_tensor_spectrogram(edf_file, fs): ## On going work -> We will loop the subplots and export this images to the new dataset.
    eeg = []
    for c in range(0,14):
        sensor = Measurements[edf_file][c]
        f, t, Sxx = signal.spectrogram(sensor, fs)
        eeg.append([t, f, Sxx])
    
    f=plt.figure(figsize= (25, 20))
    
    s1 = f.add_subplot(4, 4, 1)
    s1.set_title(f'Sensor - AF3')
    plt.pcolormesh(eeg[0][0], eeg[0][1], eeg[0][2], shading='gouraud')
    plt.axis('off')

    s2 = f.add_subplot(4, 4, 2)
    s2.set_title(f'Sensor - F7')
    plt.pcolormesh(eeg[1][0], eeg[1][1], eeg[1][2], shading='gouraud')
    plt.axis('off')

    s3 = f.add_subplot(4, 4, 3)
    s3.set_title(f'Sensor - F3')
    plt.pcolormesh(eeg[2][0], eeg[2][1], eeg[2][2], shading='gouraud')
    plt.axis('off')

    s4 = f.add_subplot(4, 4, 4)
    s4.set_title(f'Sensor - FC5')
    plt.pcolormesh(eeg[3][0], eeg[3][1], eeg[3][2], shading='gouraud')
    plt.axis('off')

    s5 = f.add_subplot(4, 4, 5)
    s5.set_title(f'Sensor - T7')
    plt.pcolormesh(eeg[4][0], eeg[4][1], eeg[4][2], shading='gouraud')
    plt.axis('off')

    s5 = f.add_subplot(4, 4, 6)
    s5.set_title(f'Sensor - P7')
    plt.pcolormesh(eeg[5][0], eeg[5][1], eeg[5][2], shading='gouraud')
    plt.axis('off')

    s6 = f.add_subplot(4, 4, 7)
    s6.set_title(f'Sensor - O1')
    plt.pcolormesh(eeg[6][0], eeg[6][1], eeg[6][2], shading='gouraud')
    plt.axis('off')

    s7 = f.add_subplot(4, 4, 8)
    s7.set_title(f'Sensor - 02')
    plt.pcolormesh(eeg[7][0], eeg[7][1], eeg[7][2], shading='gouraud')
    plt.axis('off')

    
    s8 = f.add_subplot(4, 4, 9)
    s8.set_title(f'Sensor - P8')
    plt.pcolormesh(eeg[8][0], eeg[8][1], eeg[8][2], shading='gouraud')
    plt.axis('off')

    s9 = f.add_subplot(4, 4, 10)
    s9.set_title(f'Sensor - T8')
    plt.pcolormesh(eeg[9][0], eeg[9][1], eeg[9][2], shading='gouraud')
    plt.axis('off')

    s10 = f.add_subplot(4, 4, 11)
    s10.set_title(f'Sensor - FC6')
    plt.pcolormesh(eeg[10][0], eeg[10][1], eeg[10][2], shading='gouraud')
    plt.axis('off')

    s11 = f.add_subplot(4, 4, 12)
    s11.set_title(f'Sensor - F4')
    plt.pcolormesh(eeg[11][0], eeg[11][1], eeg[11][2], shading='gouraud')
    plt.axis('off')

    s12 = f.add_subplot(4, 4, 13)
    s12.set_title(f'Sensor - F8')
    plt.pcolormesh(eeg[12][0], eeg[12][1], eeg[12][2], shading='gouraud')
    plt.axis('off')

    s13 = f.add_subplot(4, 4, 14)
    s13.set_title(f'Sensor - AF4')
    plt.pcolormesh(eeg[13][0], eeg[13][1], eeg[13][2], shading='gouraud')
    plt.axis('off')
    
    return np.array(eeg, dtype=object)

eeg0 = eeg_tensor_spectrogram(229, 128)
eeg0.shape # -> 14 sensor informations of Sample Frequencies, Segment Times and the Spectrogram of X.