In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

import pywt
from scipy.signal import butter, sosfilt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import glob
from tqdm.notebook import tqdm, trange
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_eegs = glob.glob('/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/*')
train = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')

In [None]:
def sec2idx(time, sr=200):
    # this fuction get time point in sec and return corresponding row id in eeg file.
    return time*200

# custom dataloader
class EEGDataset:
    def __init__(self, base_path, train):
        self.base_path = base_path
        self.eegs = glob.glob(base_path+'*')
        self.train = train
        
    def __len__(self):
        return len(self.train)
    
    def __getitem__(self, idx):
        row = self.train.iloc[idx]
        file = self.base_path+str(row['eeg_id'])+'.parquet'
        st_idx = int(sec2idx(row['eeg_label_offset_seconds']))
        end_idx = int(st_idx+sec2idx(50))
        #print(row)
        
        eeg = pd.read_parquet(file)
        X = eeg.iloc[st_idx:end_idx]
        y = row['expert_consensus']
        
        return X, y
    
dataset = EEGDataset('/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/', train)    

# Visualize Some Examples

In [None]:
START = 10

for i in range(5):
    eeg, cat = dataset[START+i]

    eeg.plot(subplots=True, figsize=(10, 7))
    plt.title(cat)
    plt.show()

In [None]:
#plot correlation of eeg channels with each other 
IDX = 0

eeg,_ = dataset[IDX]
plt.figure(figsize=(20, 20))
sns.heatmap(eeg.corr(), annot=True)
plt.show()

# Define Fuctions for Band Extraction

In [None]:
def eeg_bandpass(eeg, min_freq, max_freq):
    #this function get signal and range of frequencies we interested and filter out every other frequency components
    # from signal
    
    sos_lp = butter(10, max_freq, 'lp', fs=200, output='sos')
    sos_hp = butter(10, min_freq, 'hp', fs=200, output='sos')
    
    eeg_low = sosfilt(sos_lp, eeg)
    eeg_high = sosfilt(sos_hp, eeg_low)
    
    return eeg_high

def eeg2band(eeg, band='alpha'):
    bands = {'alpha':(8, 12), 'beta':(12,30), 'gamma':(35,99), 'delta':(0.5,4), 'theta':(4,8)}
    band_range = bands[band]
    min_freq, max_freq = band_range[0], band_range[1]
    
    return eeg_bandpass(eeg, min_freq, max_freq)

# Extract and Plotting Bands

In [None]:
START = 10

# appling our function on first channel in selected eeg files
for i in range(5):
    eeg,cat = dataset[START+i]
    eeg_channel_0 = eeg.values[:,0]
    
    plt.subplots(5, 1, figsize=(15, 5))
    for j, band in enumerate(['delta','theta' ,'alpha', 'beta', 'gamma']):
        filtered = eeg2band(eeg_channel_0, band)
        
        plt.subplot(5,1,j+1)
        #plt.figure(figsize=(15, 1))
        plt.plot(eeg_channel_0)
        plt.plot(filtered)
        plt.title(band)
    plt.suptitle(cat)        
    plt.show()