In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from glob import glob
import matplotlib.pyplot as plt

from scipy import signal

In [None]:
BASE_PATH = '/kaggle/input/hms-harmful-brain-activity-classification/'

In [None]:
df = pd.DataFrame({'path': glob(BASE_PATH + '**/*.parquet')})
df['test_type'] = df['path'].str.split('/').str.get(-2).str.split('_').str.get(-1)
df['id'] = df['path'].str.split('/').str.get(-1).str.split('.').str.get(0)
df

We have more eeg data than spectrogram data

In [None]:
df['test_type'].value_counts().plot(kind='bar', rot=0)

# Electroencephalography (EEG) signals

EEG is a no invasive method to record the macroscopic electrical (resting potential -70mV) activity in the brain by placing electrodes on the scalp. The electrodes are placed based on the 10-20 system, a system where the skull is split into increments of 10% or 20% to place the electrodes. Each electorde numbers and labes re based on location, 

 - F for frontal region, T for temporal, P for parietal, and O for occipital
 - Odd numbers on the left, even on the right

### Screening EEG studies
The most popular way to screen EEG is the banana montage shown in the figure below.

<img src= "https://eegatlas-online.com/myapplications/images/MON/db.png" alt ="EEG" height="100" align='center'>

In [None]:
df_eeg = pd.read_parquet(BASE_PATH + 'train_eegs/1000913311.parquet')
df_eeg.head()

In [None]:
right_temporal_chain = [('Fp2', 'F8'), ('F8', 'T4'), ('T4', 'T6'), ('T6', 'O2')]
right_parasagittal_chain = [('Fp2', 'F4'), ('F4', 'C4'), ('C4', 'P4'), ('P4', 'O2')]
left_temporal_chain = [('Fp1', 'F7'), ('F7', 'T3'), ('T3', 'T5'), ('T5', 'O1')]
left_parasagittal_chain = [('Fp1', 'F3'), ('F3', 'C3'), ('C3', 'P3'), ('P3', 'O1')]
z_electrodes = [('Fz', 'Cz'), ('Cz', 'Pz')]
chains = right_temporal_chain + right_parasagittal_chain + left_temporal_chain + left_parasagittal_chain + z_electrodes
fig, axes = plt.subplots(len(chains), 1, figsize=(12,40))
time = 2000
frequency = 200
for idx, ax in enumerate(axes.flatten()):
    led1, led2 = chains[idx][0], chains[idx][1]
    wave = df_eeg[led1] - df_eeg[led2]
    ax.plot(df_eeg[:time].index/frequency, wave[:time])
    ax.set_title(f'{led1} - {led2}')
plt.tight_layout()

The resting negative potentials are upgoing waves, and positive potentials are downgoing waves.

In [None]:
# Referential Montages
df_eeg.mean(1).plot(kind='line')

### Power Spectral Density
Welch method averaging consecutive fourier transforms of small windows

In [None]:
fig, axes = plt.subplots(len(df_eeg.columns), 1, figsize=(12,40))
cols = df_eeg.columns
for idx, ax in enumerate(axes.flatten()):
    f, psd = signal.welch(df_eeg[cols[idx]], fs=200, nperseg=df_eeg.shape[0]//100)
    ax.plot(f, psd)
    ax.set_title(cols[idx])

It's strange how they are all maked at the same point, this is all very new to me so I'm not sure if thats correct

## Spectrogram

In [None]:
df_spec = pd.read_parquet(BASE_PATH + 'train_spectrograms/1662527277.parquet')
df_spec

In [None]:
def plot_spec_num(spectogram_label):
    fig, axes = plt.subplots(2,2, figsize=(16,8))
    cols = df_spec.filter(like=spectogram_label).columns
    for idx, ax in enumerate(axes.flatten()):
        ax.scatter(x=df_spec['time'], y=df_spec[cols[idx]])
        ax.set_title(cols[idx])

In [None]:
def plot_spectrograms(df):
    fig, axes = plt.subplots(2,2, figsize=(16,8))
    views = ['LL', 'RL', 'RP', 'LP']
    
    for i, ax in enumerate(axes.flatten()):
        spec = df.filter(regex=f'^{views[i]}', axis=1)
        spec = np.log(spec).T
        ax.imshow(spec,  cmap='seismic', aspect='auto')
        ax.set_title(views[i])
        ax.set_ylabel("Frequency (Hz)")
        ax.set_xlabel("Time")
    plt.tight_layout()

In [None]:
plot_spectrograms(df_spec)