In [None]:
import os
import sys
import pandas as pd
import numpy as np
import random

from sklearn import model_selection

import tensorflow

import torch
from torch.utils.data import DataLoader,Dataset
import torch.nn as nn
from torch import optim
import torch.functional as F
from pathlib import Path
from tqdm import tqdm

import matplotlib.pyplot as plt

from torchvision import transforms

import albumentations as A
from albumentations.pytorch import ToTensorV2

import timm

# from tqdm import tqdm
from tqdm.auto import tqdm


import warnings
warnings.filterwarnings('ignore')


In [None]:
ROOT = Path.cwd().parent
INPUT = ROOT / "input"
OUTPUT = ROOT / "output"
SRC = ROOT / "src"

DATA = INPUT / "hms-harmful-brain-activity-classification"
TRAIN_SPEC = DATA / "train_spectrograms"
TEST_SPEC = DATA / "test_spectrograms"

TMP = ROOT / "tmp"
TRAIN_SPEC_SPLIT = TMP / "train_spectrograms_split"
TEST_SPEC_SPLIT = TMP / "test_spectrograms_split"
TMP.mkdir(exist_ok=True)
TRAIN_SPEC_SPLIT.mkdir(exist_ok=True)
TEST_SPEC_SPLIT.mkdir(exist_ok=True)


class CFG:
    CLASSES = ["seizure_vote", "lpd_vote", "gpd_vote", "lrda_vote", "grda_vote", "other_vote"]
    N_CLASSES = len(CLASSES)
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    image_transform=transforms.Resize((256, 256))
    SEED=1086
    N_FOLDS=5
    NUM_EPOCHS=8    
    BATCH_SIZE=32
    NUM_WORKERS=4
    PATIENCE=3
    EPS=1e-5
cfg = CFG()

In [None]:
def seed_everything(seed):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
seed_everything(cfg.SEED)

In [None]:
test = pd.read_csv(DATA / "test.csv")
sample_submission = pd.read_csv(DATA / "sample_submission.csv")

In [None]:
for spec_id in test["spectrogram_id"]:
    spec = pd.read_parquet(TEST_SPEC / f"{spec_id}.parquet")
    spec_arr = spec.fillna(0).values[:, 1:].T.astype("float32")
    np.save(TEST_SPEC_SPLIT / f"{spec_id}.npy", spec_arr)

In [None]:
class HMSDataset(Dataset):
    def __init__(self,df):
        self.df = df
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx:int):
        img_path = TEST_SPEC_SPLIT / f"{self.df.loc[:,'spectrogram_id'][idx]}.npy"
        labels = np.full((len(test), 6), -1, dtype="float32")
        img = np.load(img_path)
        data_mean=img.mean(axis=(0,1))
        data_std=img.std(axis=(0,1))
        img=(img-data_mean)/(data_std+cfg.EPS)
        img = np.nan_to_num(img, nan=0.0)
        data_tensor = torch.unsqueeze(torch.Tensor(img), dim=0)
        img = cfg.image_transform(data_tensor)
        lab = [l for l in labels]
        return torch.tensor(img),torch.tensor(lab)
    
class HMSCnn(nn.Module):
    def __init__(self):
        super(HMSCnn, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(128 * 32 * 32, 256)
        self.relu4 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 6)

    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))
        x = self.pool2(self.relu2(self.conv2(x)))
        x = self.pool3(self.relu3(self.conv3(x)))
        x = x.view(-1, 128 * 32 * 32)
        x = self.relu4(self.fc1(x))
        x = self.fc2(x)
        x = self.fc3(x)
        return x
model = HMSCnn()
model = model.to(cfg.DEVICE)

In [None]:
labels_map = dict()
for index, value in enumerate((sample_submission).columns[1:]):
    labels_map[value] = index

In [None]:
def test_mod(model,dataloader):
    model.eval()
    predictions =pd.DataFrame() 
    with torch.no_grad():
        for data,_ in tqdm(dataloader):
            data = data.to(cfg.DEVICE)
            output = model(data)
            m = nn.Softmax(dim=1)
            res = m(output)
            predictions = pd.concat([predictions, pd.DataFrame(torch.Tensor.cpu(res).numpy(), columns = list(labels_map.keys()))], ignore_index=True)
            
        return predictions

In [None]:
def test_on_fold(fold):
    model.load_state_dict(torch.load(f'/kaggle/input/hms-starter-training-cnn/hms_model_fold_{fold}.bin'))
    test_set = HMSDataset(test)
    test_loader = DataLoader(test_set,batch_size=cfg.BATCH_SIZE,shuffle=False,num_workers=0)
    preds = test_mod(model,test_loader)
    return preds

In [None]:
pred1 = test_on_fold(0)
pred2 = test_on_fold(1)
pred3 = test_on_fold(2)
pred4 = test_on_fold(3)
pred5 = test_on_fold(4)

In [None]:
final_pred = (pred1+pred2+pred3+pred4+pred5)/5

In [None]:
sample_submission.iloc[:,1:] = final_pred.values

In [None]:
sample_submission

In [None]:
sample_submission.to_csv('submission.csv',index=False)