## 📌 Overview

#### 📋 Key Aspects of the Notebook
This notebook is used for the inference of three pretrained models, ResNet34d, EfficientNetB0 and EfficientnetB1: trained exclusively on the Kaggle-provided data (EEG spectrograms) using a batch size of 16. 
* links:
    * https://www.kaggle.com/code/andreasbis/hms-train-resnet34d
    * https://www.kaggle.com/code/andreasbis/hms-train-efficientnetb0
    * https://www.kaggle.com/code/andreasbis/hms-train-efficientnetb1


### 🙏 Acknowledgement
Inspired by the work of @yunsuxiaozi. Don't forget to upvote their work if you find it helpful!
* links:
    * https://www.kaggle.com/code/yunsuxiaozi/hms-baseline-resnet34d-512-512-training-5-folds
    * https://www.kaggle.com/code/yunsuxiaozi/hms-baseline-resnet34d-512-512-inference-6-models

## 📙 Import libraries and modules

In [None]:
# Importing essential libraries
import gc
import os
import random
import warnings
import numpy as np
import pandas as pd
from IPython.display import display

# PyTorch for deep learning
import timm
import torch
import torch.nn as nn  
import torch.optim as optim
import torch.nn.functional as F

# torchvision for image processing and augmentation
import torchvision.transforms as transforms

# Suppressing minor warnings to keep the output clean
warnings.filterwarnings('ignore', category=Warning)

# Reclaim memory no longer in use.
gc.collect()

## ⚙️ Configuration

In [None]:
class Config:
    seed=42
    image_transform=transforms.Resize((512, 512))
    num_folds=5
    
# Set the seed for reproducibility across multiple libraries
def set_seed(seed):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    
set_seed(Config.seed)

## 📂 Data Loading

In [None]:
# Load and store the trained models for each fold into a list
models = []

# Load ResNet34d
for i in range(Config.num_folds):
    # Create the same model architecture as during training
    model_resnet = timm.create_model('resnet34d', pretrained=False, num_classes=6, in_chans=1)
    
    # Load the trained weights from the corresponding file
    model_resnet.load_state_dict(torch.load(f'/kaggle/input/hms-train-resnet34d/resnet34d_fold{i}.pth', map_location=torch.device('cpu')))
    
    # Append the loaded model to the models list
    models.append(model_resnet)

# Reclaim memory no longer in use.
gc.collect()

# Load EfficientNetB0
for j in range(Config.num_folds):
    # Create the same model architecture as during training
    model_effnet_b0 = timm.create_model('efficientnet_b0', pretrained=False, num_classes=6, in_chans=1)
    
    # Load the trained weights from the corresponding file
    model_effnet_b0.load_state_dict(torch.load(f'/kaggle/input/hms-train-efficientnetb0/efficientnet_b0_fold{j}.pth', map_location=torch.device('cpu')))
    
    # Append the loaded model to the models list
    models.append(model_effnet_b0)
    
# Reclaim memory no longer in use.
gc.collect()
    
# Load EfficientNetB1
for k in range(Config.num_folds):
    # Create the same model architecture as during training
    model_effnet_b1 = timm.create_model('efficientnet_b1', pretrained=False, num_classes=6, in_chans=1)
    
    # Load the trained weights from the corresponding file
    model_effnet_b1.load_state_dict(torch.load(f'/kaggle/input/hms-train-efficientnetb1/efficientnet_b1_fold{k}.pth', map_location=torch.device('cpu')))
    
    # Append the loaded model to the models list
    models.append(model_effnet_b1)

# Reclaim memory no longer in use.
gc.collect()

In [None]:
# Load test data and sample submission dataframe
test_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/test.csv")
submission = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/sample_submission.csv")

# Merge the submission dataframe with the test data on EEG IDs
submission = submission.merge(test_df, on='eeg_id', how='left')

# Generate file paths for each spectrogram based on the EEG data in the submission dataframe
submission['path'] = submission['spectrogram_id'].apply(lambda x: f"/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms/{x}.parquet")

# Display the first few rows of the submission dataframe
display(submission.head())

# Reclaim memory no longer in use
gc.collect()

## 🎰 Predictions

In [None]:
# Define the weights for each model
weight_resnet34d = 0.32
weight_effnetb0 = 0.36
weight_effnetb1 = 0.32

# Get file paths for test spectrograms
paths = submission['path'].values
test_preds = []

# Generate predictions for each spectrogram using all models
for path in paths:
    eps = 1e-6
    # Read and preprocess spectrogram data
    data = pd.read_parquet(path)
    data = data.fillna(-1).values[:, 1:].T
    data = np.clip(data, np.exp(-6), np.exp(10))
    data = np.log(data)
    
    # Normalize the data
    data_mean = data.mean(axis=(0, 1))
    data_std = data.std(axis=(0, 1))
    data = (data - data_mean) / (data_std + eps)
    data_tensor = torch.unsqueeze(torch.Tensor(data), dim=0)
    data = Config.image_transform(data_tensor)

    test_pred = []
    
    # Generate predictions using all models
    for model in models:
        model.eval()
        with torch.no_grad():
            pred = F.softmax(model(data.unsqueeze(0)))[0]
            pred = pred.detach().cpu().numpy()
        test_pred.append(pred)
        
    # Combine predictions from all models using weighted voting
    weighted_pred = weight_resnet34d * np.mean(test_pred[:Config.num_folds], axis=0) + \
                     weight_effnetb0 * np.mean(test_pred[Config.num_folds:2*Config.num_folds], axis=0) + \
                     weight_effnetb1 * np.mean(test_pred[2*Config.num_folds:], axis=0)
    
    test_preds.append(weighted_pred)

# Convert the list of predictions to a NumPy array for further processing
test_preds = np.array(test_preds)

# Reclaim memory no longer in use
gc.collect()

## 🚀 Submission

In [None]:
# Load the sample submission file and update it with model predictions for each label
submission = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/sample_submission.csv")
labels = ['seizure', 'lpd', 'gpd', 'lrda', 'grda', 'other']

# Assign model predictions to respective columns in the submission DataFrame
for i in range(len(labels)):
    submission[f'{labels[i]}_vote'] = test_preds[:, i]

# Save the updated DataFrame as the final submission file
submission.to_csv("submission.csv", index=None)

# Display the first few rows of the submission file
display(submission.head())

# Reclaim memory no longer in use.
gc.collect()