
<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">

<html>
<head>
</head>
<body>
    <h1>If you find my Kaggle notebook helpful, please consider giving it an upvote! 👍</h1>
    
This is a super clean code and the Purpose of this notebook is to help beginners and new competitors on Kaggle to give an idea of how a EEG signal Classification training notebook looks like, this notebook is just a baseline but can be modified into a solid submission.    
    
</body>
</html>

# CREDITS 
#### * Thanks to [Nischay Dhankhar]() for providing such an insightful notebook for EEg signals training
#### * Kudos to [Chris Deotte](http://https://www.kaggle.com/code/cdeotte/wavenet-starter-lb-0-52?scriptVersionId=160158478) for sharing raw EEG signals, this notebook uses 8 channels signals generated by Chris' notebook

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import os
import pytorch_lightning as pl
from torch.utils.data import Dataset, DataLoader
from sklearn import model_selection
import torchvision.transforms as transforms
import torchvision.io 
import librosa
from PIL import Image
import albumentations as alb
import torch.multiprocessing as mp
import warnings
warnings.filterwarnings('ignore')

from pytorch_lightning.callbacks import ModelCheckpoint, BackboneFinetuning, EarlyStopping

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Config Class Constructor
    
<strong>Do play with these parameters to tune your model, make sure to track the progress<strong>

In [None]:
class Config:
    def __init__(self):
        self.optimizers ="adam"
        self.scheduler ="CosineAnnealingWarmRestarts"
        self.min_lr = 1e-6
        self.T_0 = 25

        self.use_aug = True
        self.num_classes = 6
        self.batch_size = 88
        self.epochs = 10
        self.PRECISION = 16
        self.PATIENCE = 20
        self.seed = 20
        self.pretrained = False
        self.weight_decay = 1e-2
        self.use_mixup = False
        self.mixup_alpha = 0.1
        self.num_channels = 8
        self.data_root = "/kaggle/input/hms-harmful-brain-activity-classification/"
        self.raw_eeg_path = "/kaggle/input/brain-eegs/eegs.npy"
        self.cols_interest = ['Fp1', 'C3', 'F7', 'T5', 'Fz', 'Cz', 'Pz', 'Fp2', 'F4', 'C4', 'P4', 'F8', 'T4', 'T6', 'O2', 'EKG']
        self.LR = 8e-3
        self.processed_train = None
        self.output_dir = '/kaggle/working/exp30_augv2_8ch_mixup_onlyk3711_aug_32sp'
        self.trn_folds = [0, 1, 2, 3, 4]
        
Config = Config()
     

In [None]:
df = pd.read_csv(f'{Config.data_root}train.csv')
df.shape

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Classes distributions

In [None]:
plt.figure(figsize =(15,5))
categories = df['expert_consensus'].value_counts().index
counts = df['expert_consensus'].value_counts()
plt.bar(categories, counts)
plt.xlabel('Categories')
plt.ylabel('Count')
plt.title('Bar Plot of expert_consensus Value Counts')
plt.show()

In [None]:
df[df['eeg_id']==1000913311]

In [None]:
unique_eeg_samples = df['eeg_id'].unique()
print("Unique EEG samples = ", len(unique_eeg_samples))

# Reading EEG signals

In [None]:
pqt = pd.read_parquet('/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/1000913311.parquet')

In [None]:
pqt

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Data Preprocessing

In [None]:
class DataProcessor:
    def __init__(self, df):
        self.df = df
        self.EEG_IDS = self.df['eeg_id'].unique()
        self.TARGETS = self.df.columns[-6:]
        self.TARS = {'Seizure': 0, 'LPD': 1, 'GPD': 2, 'LRDA': 3, 'GRDA': 4, 'Other': 5}
        self.TARS_INV = {x: y for y, x in self.TARS.items()}
        self.train = self.process_data()

    def process_data(self):
        train = self.df.groupby('eeg_id')[['patient_id']].agg('first')

        tmp = self.df.groupby('eeg_id')[self.TARGETS].agg('sum')
        for t in self.TARGETS:
            train[t] = tmp[t].values

        y_data = train[self.TARGETS].values
        y_data = y_data / y_data.sum(axis=1, keepdims=True)
        train[self.TARGETS] = y_data

        tmp = self.df.groupby('eeg_id')[['expert_consensus']].agg('first')
        train['target'] = tmp

        train = train.reset_index()
        train = train.loc[train.eeg_id.isin(self.EEG_IDS)]
        print('Train Data with unique eeg_id shape:', train.shape)

        return train

data_processor = DataProcessor(df)
train= data_processor.train


In [None]:
train.head()


In [None]:
%%time

CREATE_EEGS = True
df = pd.read_parquet(f'{Config.data_root}train_eegs/1000913311.parquet')
FEATS = df.columns
print(f'There are {len(FEATS)} raw eeg features')
print( list(FEATS) )

if Config.raw_eeg_path is not None:
    raw_eegs = np.load(Config.raw_eeg_path, allow_pickle=True).item()

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Signal Augmentations
    
 #### The combination of colored noise, Gaussian noise, and frequency/time masking provides a comprehensive set of transformations to enhance the model's robustness and performance across various scenarios. 

In [None]:
!pip install albumentations
!pip install torch_audiomentations

In [None]:
from torch_audiomentations import Compose
import torch_audiomentations as t

def get_eeg_transforms(data='train'):
    if data == 'train':
        eeg_transform = t.Compose([
            t.AddColoredNoise(
                p=0.15,
                mode="per_channel",
                p_mode="per_channel",
                max_snr_in_db=15,
                sample_rate=200
            ),
            t.AddGaussianNoise(
                p=0.2,
                max_amplitude=0.5
            ),
            t.FrequencyMask(
                p=0.2,
                max_mask_percentage=0.1
            ),
            t.TimeMask(
                p=0.2,
                max_mask_percentage=0.1
            ),
        ])
    elif data == 'valid':
        eeg_transform = t.Compose([])  
    else:
        raise ValueError(f"Invalid data split: {data}")

    return eeg_transform


<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Model Builder

In [None]:
import torch
import torch.nn as nn

class EEGNet(nn.Module):
    def __init__(self, in_channels=20, num_classes=6):
        super(EEGNet, self).__init__()

        # First convolution block
        self.conv1 = nn.Conv1d(in_channels, 32, kernel_size=64, stride=2, padding=16)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu1 = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout(0.5)

        # Second convolution block
        self.conv2 = nn.Conv1d(32, 64, kernel_size=16, stride=1, padding=8)
        self.bn2 = nn.BatchNorm1d(64)
        self.relu2 = nn.ReLU(inplace=True)
        self.dropout2 = nn.Dropout(0.5)

        # Third convolution block
        self.conv3 = nn.Conv1d(64, 128, kernel_size=8, stride=1, padding=4)
        self.bn3 = nn.BatchNorm1d(128)
        self.relu3 = nn.ReLU(inplace=True)
        self.dropout3 = nn.Dropout(0.5)

        # Fourth convolution block
        self.conv4 = nn.Conv1d(128, 256, kernel_size=4, stride=1, padding=2)
        self.bn4 = nn.BatchNorm1d(256)
        self.relu4 = nn.ReLU(inplace=True)
        self.dropout4 = nn.Dropout(0.5)

        self.pool = nn.AdaptiveAvgPool1d(1)

        # Fully connected layers
        self.fc1 = nn.Linear(256, 128)
        self.relu_fc1 = nn.ReLU(inplace=True)
        self.dropout_fc1 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        x = self.dropout2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        x = self.dropout3(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.relu4(x)
        x = self.dropout4(x)

        x = self.pool(x).squeeze(-1)

        x = self.fc1(x)
        x = self.relu_fc1(x)
        x = self.dropout_fc1(x)

        x = self.fc2(x)

        return x


In [None]:
import gc
iot = torch.randn(2, Config.num_channels, 10000)#.cuda()
model = EEGNet(in_channels=Config.num_channels, num_classes=6)#.cuda()
output = model(iot)
print(output.shape)

del iot, model
gc.collect()

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Signal Processing 
    
**ML models often require significant computational resources, and reducing the precision of the input data through quantization can lead to more efficient model training and inference. By representing numerical values with fewer bits, the memory requirements are reduced, allowing for faster processing**
* **You can play with other filters and quantizers as well** 

In [None]:
from scipy.signal import cheby1, butter, lfilter


def quantize_data_linear(data, classes):
    min_val, max_val = np.min(data), np.max(data)
    bins = np.linspace(min_val, max_val, classes + 1)
    quantized = np.digitize(data, bins) - 1
    return quantized


def chebyshev_lowpass_filter(data, cutoff_freq=20, sampling_rate=200, order=4, rp=0.5):
    nyquist = 0.5 * sampling_rate
    normal_cutoff = cutoff_freq / nyquist
    b, a = cheby1(order, rp, normal_cutoff, btype='low', analog=False)
    filtered_data = lfilter(b, a, data, axis=0)
    return filtered_data

class Dataset(torch.utils.data.Dataset):

    def __init__(self, data, eegs=None, augmentations=None, test=False): 
        self.data = data
        self.eegs = eegs
        self.augmentations = augmentations
        self.test = test
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        row = self.data.iloc[index]      
        data = self.eegs[row.eeg_id]

        data = np.clip(data, -1024, 1024)
        data = np.nan_to_num(data, nan=0) / 32.0
        
        data = chebyshev_lowpass_filter(data, order=4, rp=0.5)
        data = quantize_data_linear(data, 256)

        samples = torch.from_numpy(data).float()
        samples = samples.squeeze()

        samples = samples.permute(1, 0)
        if not self.test:
            label = row[TARGETS] 
            label = torch.tensor(label).float()  
            return samples, label
        else:
            return samples

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# CV Split Block
    
####  stratified k-fold cross-validation is advantageous when working with datasets containing imbalanced class distributions, and it helps in obtaining more reliable performance estimates, improving the generalization of the model, and identifying potential issues related to overfitting or underfitting.
    
    

In [None]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
train['fold'] = 0

for fold, (tr_idx, val_idx) in enumerate(skf.split(train, train['target'])):
    train.loc[val_idx, 'fold'] = fold


In [None]:
def get_fold_dls(df_train, df_valid):

    
    train_data = Dataset(
        df_train, 
        eegs=raw_eegs,
        augmentations =  get_eeg_transforms(data='valid'),
        test = False
    )
    
    val_data = Dataset(
        df_valid, 
        eegs=raw_eegs,
        augmentations = get_eeg_transforms(data='valid'),
        test = False
    )
    train_dataloader = DataLoader(train_data, batch_size=Config.batch_size , shuffle=True, num_workers = 2)    
    val_dataloader = DataLoader(val_data, batch_size=Config.batch_size, num_workers = 2)
    return train_dataloader, val_dataloader, train_data, val_data

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">

# <strong>Learning Rate Schduler and Optimizer</strong>

#### **Common learning rate schedulers include:**

**StepLR: Reduces the learning rate by a factor after a fixed number of epochs.**

**MultiStepLR: Similar to StepLR but allows the learning rate to be reduced at specific epochs.**

**ExponentialLR: Multiplies the learning rate by a constant factor at each epoch.**

**ReduceLROnPlateau: Adjusts the learning rate based on a validation metric (e.g., reducing the learning rate if the validation loss plateaus).**

**CyclicLR: Alternates between lower and upper learning rate values in a cycle.**

**CosineAnnealingLR: Gradually reduces the learning rate in a cosine-shaped manner.**

 #### **Optimizers:** 
    
The optimizer is an algorithm that adjusts the model's parameters during training to minimize the loss function. It plays a crucial role in determining how quickly the model learns, converges, and generalizes.

Common optimizers include:

**Stochastic Gradient Descent (SGD): Updates the model's parameters in the opposite direction of the gradient of the loss function with respect to the parameters.**

**Adam: Combines ideas from RMSprop and Momentum. It adapts the learning rates for each parameter individually.**

**Adagrad: Adapts the learning rates of all model parameters based on historical gradients.**

**RMSprop: Similar to Adagrad but uses a moving average of squared gradients to adapt the learning rates.**

**Adadelta: An extension of Adagrad that seeks to reduce its aggressive, monotonically decreasing learning rates.**

Nadam: Nesterov Adam optimizer, a variant of Adam incorporating Nesterov momentum</strong>

In [None]:
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

def get_optimizer_and_scheduler(lr, params):
    if Config.optimizers == 'adam':
        optimizer = optim.Adam(params, lr=lr, weight_decay=Config.weight_decay)
    elif Config.optimizers == 'nadam':
        optimizer = optim.Nadam(params, lr=lr, weight_decay=Config.weight_decay)
    elif Config.optimizers == 'adamW':
        optimizer = optim.AdamW(params, lr=lr, weight_decay=Config.weight_decay)
    elif Config.optimizers == 'sgd':
        optimizer = optim.SGD(params, lr=lr, weight_decay=Config.weight_decay)
    else:
        return None  # Return None if an unsupported optimizer is specified
    
    if Config.scheduler == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=Config.T_max, eta_min=Config.min_lr)
    elif Config.scheduler == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=Config.epochs, eta_min=Config.min_lr)
    elif Config.scheduler == 'ReduceLROnPlateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=7, 
                                                   threshold=0.0001, min_lr=Config.min_lr)
    elif Config.scheduler == 'ExponentialLR':
        scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.85)
    else:
        scheduler = None  # Return None if an unsupported scheduler is specified

    interval = "epoch"
    return {
        "optimizer": optimizer,
        "lr_scheduler": {
            "scheduler": scheduler,
            "interval": interval,
            "monitor": "val_loss",
            "frequency": 1
        }
    }

In [None]:
!pip install torchtoolbox

In [None]:
from torchtoolbox.tools import mixup_data, mixup_criterion
import torch.nn as nn
from torch.nn.functional import cross_entropy
import torchmetrics
import timm
import sklearn.metrics
import sys
sys.path.append('/kaggle/input/kaggle-kl-div')

from kaggle_kl_div import score


<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Loss 

In [None]:
class KLDivLossWithLogits(nn.KLDivLoss):

    def __init__(self):
        super().__init__(reduction="batchmean")

    def forward(self, y, t):
        y = nn.functional.log_softmax(y,  dim=1)
        loss = super().forward(y, t)

        return loss

<div style="background-color: #e0f8e6; padding: 20px; border-radius: 50px; border: 5px solid #ffa76e;">
    
# Training Block
    

In [None]:
class EEGModel(pl.LightningModule):
    def __init__(self, num_classes = Config.num_classes, pretrained = Config.pretrained, fold = fold):
        super().__init__()
        self.num_classes = num_classes
        self.fold = fold
        self.backbone = EEGNet(in_channels=Config.num_channels, num_classes=Config.num_classes)
        self.loss_function = KLDivLossWithLogits()
        self.validation_step_outputs = []
        self.lin = nn.Softmax(dim=1)
        self.best_score = 1000.0
    def forward(self,images):
        logits = self.backbone(images)
        return logits
        
    def configure_optimizers(self):
        return get_optimizer_and_scheduler(lr=Config.LR, params=self.parameters())

    def train_with_mixup(self, X, y):
        X, y_a, y_b, lam = mixup_data(X, y, alpha=Config.mixup_alpha)
        y_pred = self(X)
        loss_mixup = mixup_criterion(KLDivLossWithLogits(), y_pred, y_a, y_b, lam)
        return loss_mixup

    def training_step(self, batch, batch_idx):
        image, target = batch        
        if Config.use_mixup:
            loss = self.train_with_mixup(image, target)
        else:
            y_pred = self(image)
            loss = self.loss_function(y_pred,target)

        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss        

    def validation_step(self, batch, batch_idx):
        image, target = batch 
        y_pred = self(image)
        val_loss = self.loss_function(y_pred, target)
        self.log("val_loss", val_loss, on_step=True, on_epoch=True, logger=True, prog_bar=True)
        self.validation_step_outputs.append({"val_loss": val_loss, "logits": y_pred, "targets": target})

        return {"val_loss": val_loss, "logits": y_pred, "targets": target}
    
    def train_dataloader(self):
        return self._train_dataloader 
    
    def validation_dataloader(self):
        return self._validation_dataloader
    
    def on_validation_epoch_end(self):
        outputs = self.validation_step_outputs
        # print(len(outputs))
        avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
        output_val = nn.Softmax(dim=1)(torch.cat([x['logits'] for x in outputs],dim=0)).cpu().detach().numpy()
        target_val = torch.cat([x['targets'] for x in outputs],dim=0).cpu().detach().numpy()
        self.validation_step_outputs = []

        val_df = pd.DataFrame(target_val, columns = list(TARGETS))
        pred_df = pd.DataFrame(output_val, columns = list(TARGETS))

        val_df['id'] = [f'id_{i}' for i in range(len(val_df))] 
        pred_df['id'] = [f'id_{i}' for i in range(len(pred_df))] 


        avg_score = score(val_df, pred_df, row_id_column_name = 'id')

        if avg_score < self.best_score:
            print(f'Fold {self.fold}: Epoch {self.current_epoch} validation loss {avg_loss}')
            print(f'Fold {self.fold}: Epoch {self.current_epoch} validation KDL score {avg_score}')
            self.best_score = avg_score
        return {'val_loss': avg_loss,'val_cmap':avg_score}

In [None]:
from tqdm import tqdm
tqdm.pandas()


def predict(data_loader, model):
        
    model.to('cuda')
    model.eval()    
    predictions = []
    for batch in tqdm(data_loader):

        with torch.no_grad():
            x, y = batch
            x = x.cuda()
            # inputs = {key:val.reshape(val.shape[0], -1).to(config.device) for key,val in batch.items()}
            outputs = model(x)
            outputs = nn.Softmax(dim=1)(outputs)
        predictions.extend(outputs.detach().cpu().numpy())
    predictions = np.vstack(predictions)
    return predictions



In [None]:
from pytorch_lightning.loggers import WandbLogger
import gc
torch.set_float32_matmul_precision('high')

TARGETS = data_processor.TARGETS
def run_training(fold_id, Config):
    print(f"Running training for fold {fold_id}...")
    logger = None
    pred_cols = [f'pred_{t}' for t in TARGETS]
    
    df_train = train[train['fold']!=fold_id].copy()
    df_valid = train[train['fold']==fold_id].copy()

    print(len(df_train),'train length')
    print(len(df_valid),'valid length')
    
    dl_train, dl_val, ds_train, ds_val = get_fold_dls(df_train, df_valid)
    
    eeg_model = EEGModel(num_classes = Config.num_classes, pretrained = Config.pretrained, fold = fold_id)

    
    early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=0.00, patience=Config.PATIENCE, verbose= True, mode="min")
    checkpoint_callback = ModelCheckpoint(monitor='val_loss',
                                          dirpath= f"{Config.output_dir}/",
                                      save_top_k=1,
                                      save_last= True,
                                      save_weights_only=False,
                                      filename= f'eegnet_best_loss_fold{fold_id}',
                                      verbose= True,
                                      mode='min')
    
    callbacks_to_use = [checkpoint_callback,early_stop_callback]


    trainer = pl.Trainer(
        devices=[0],
        
        val_check_interval=0.5,
        deterministic=True,
        max_epochs=Config.epochs,        
        logger=logger,
        callbacks=callbacks_to_use,
        precision=Config.PRECISION*2,
        accelerator="gpu" 
    )
    

    print("Running trainer.fit")
    trainer.fit(eeg_model, train_dataloaders = dl_train, val_dataloaders = dl_val)                
    # trainer.

    model = EEGModel.load_from_checkpoint(f'{Config.output_dir}/eegnet_best_loss_fold{fold_id}.ckpt',train_dataloader=None,validation_dataloader=None,config=Config)    
    preds = predict(dl_val, model)  
    print(preds.shape)
    df_valid[pred_cols] = preds
    df_valid.to_csv(f'{Config.output_dir}/pred_df_f{fold_id}.csv',index=False)
    gc.collect()
    # torch.cuda.empty_cache()
    return preds

In [None]:

oof_df = train.copy()
pred_cols = [f'pred_{t}' for t in TARGETS]
oof_df[pred_cols] = 0.0
for f in Config.trn_folds:
    val_idx = list(train[train['fold']==f].index)
    print(len(val_idx))
    val_preds = run_training(f, Config)    
    oof_df.loc[val_idx, pred_cols] = val_preds
    