This notebook is written for educational purposes. I wanted to learn pytorch so I thought I'll implement this [notebook](https://www.kaggle.com/code/cdeotte/efficientnetb0-starter-lb-0-43) by [Chris Deotte](https://www.kaggle.com/cdeotte). Thanks a lot Chris!

In [None]:
import numpy as np
import pandas as pd
import torch
import torchvision
from sklearn.model_selection import GroupKFold
from tqdm.auto import tqdm

In [None]:
chris_spec_path = '/kaggle/input/brain-eeg-spectrograms/eeg_specs.npy'
kaggle_spec_path = '/kaggle/input/brain-spectrograms/specs.npy'

In [None]:
%%time
chris_spec = np.load(chris_spec_path, allow_pickle = True).item()
kaggle_spec = np.load(kaggle_spec_path, allow_pickle = True).item()

In [None]:
train_df = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')
targets = train_df.columns[-6:]
train_df.head()

In [None]:
idx2targets = {idx : target for idx, target in enumerate(targets)}
targets2idx = {target : idx for idx, target in enumerate(targets)}
targets2idx

In [None]:
train = train_df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_id':'first','spectrogram_label_offset_seconds':'min'})
train.columns = ['spec_id','min']

tmp = train_df.groupby('eeg_id')[['spectrogram_id','spectrogram_label_offset_seconds']].agg(
    {'spectrogram_label_offset_seconds':'max'})
train['max'] = tmp

tmp = train_df.groupby('eeg_id')[['patient_id']].agg('first')
train['patient_id'] = tmp

tmp = train_df.groupby('eeg_id')[targets].agg('sum')
for t in targets:
    train[t] = tmp[t].values
    
y_data = train[targets].values
y_data = y_data / y_data.sum(axis=1,keepdims=True)
train[targets] = y_data

tmp = train_df.groupby('eeg_id')[['expert_consensus']].agg('first')
train['target'] = tmp

train = train.reset_index()
print('Train non-overlapp eeg_id shape:', train.shape )
train.head()

In [None]:
class HMSData(torch.utils.data.Dataset):
    def __init__(self, kaggle_spec, chris_spec, df, mode):
        super().__init__()
        self.df = df
        self.kaggle_spec = kaggle_spec
        self.chris_spec = chris_spec
        self.mode = mode
        
    def __getitem__(self, index):
        row = self.df.iloc[index]
        
        X = np.zeros((128, 256, 8), dtype = 'float32')
        y = np.zeros(6, dtype = 'float32')
        img = np.zeros((128, 265), dtype = 'float32')
        
        if self.mode == 'test':
            r = 0
        else:
            r = int((row['max'] + row['min']) // 4) # each row in spectogram has a time difference of 2secs. Hence division by 4.
        
        for k in range(4): # that is, for LL, LR, RL, RR
            img = self.kaggle_spec[row.spec_id][r : r + 300, k * 100 : (k + 1) * 100].T
            
            img = np.clip(img, np.exp(-4), np.exp(8))
            img = np.log(img)
            
            ep = 1e-6
            m = np.nanmean(img.flatten())
            s = np.nanstd(img.flatten())
            img = (img-m)/(s+ep)
            img = np.nan_to_num(img, nan=0.0)
            
            X[14:-14, :, k] = img[:, 22:-22] / 2.0
        
        X[:, :, 4:] = self.chris_spec[row.eeg_id]
        
        if self.mode != 'test':
            y = row[targets].values
            y = y.astype(np.float32)
            
        X = torch.from_numpy(X)
        y = torch.from_numpy(y)
        
        return X, y
        
    def __len__(self):
        return self.df.shape[0]

In [None]:
class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        effnet = torchvision.models.efficientnet_b0(weights = 'DEFAULT')
        modules = list(effnet.children())[:-1]
        self.backbone = torch.nn.Sequential(*modules)
        for param in self.backbone.parameters():
            param.requires_grad = False
        
        self.dropout = torch.nn.Dropout1d(0.2)
        self.linear = torch.nn.Linear(effnet.classifier[1].in_features, 6)
        self.softmax = torch.nn.Softmax(dim = 1)
        
    
    def forward(self, inputs): # shape : (batch_size, 128, 256, 8)
        x1 = [inputs[:, :, :, i : i + 1] for i in range(4)]
        x1 = torch.cat(x1, dim = 1)
        x2 = [inputs[:, :, :, i + 4 : i + 5] for i in range(4)]
        x2 = torch.cat(x2, dim = 1)
        x = torch.cat((x1, x2), dim = 2)
        x = torch.cat((x, x, x), dim = 3) #shape : (batch_size, 512, 512, 3)
        x = torch.permute(x, (0, 3, 1, 2)) #shape : (batch_size, 3, 512, 512)
#         print(x.shape)
        x = self.backbone(x)
        batch_size = x.shape[0]
        x = x.reshape(batch_size, -1)
        x = self.dropout(x)
        outputs = self.linear(x)
#         outputs = self.softmax(x)
#         print(outputs.shape)
        return outputs        

In [None]:
batch_size = 128
k = 5

device = ('cuda' if torch.cuda.is_available() else 'cpu')
use_amp = True
criterion = torch.nn.KLDivLoss(reduction = 'batchmean').to(device)
splits = GroupKFold(n_splits = k)

LR_START = 1e-4
LR_MAX = 1e-3
LR_RAMPUP_EPOCHS = 0
LR_SUSTAIN_EPOCHS = 1
LR_STEP_DECAY = 0.1
EVERY = 1
EPOCHS = 5

def lrfn(epoch):
    if epoch < LR_RAMPUP_EPOCHS:
        lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
    elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
        lr = LR_MAX
    else:
        lr = LR_MAX * LR_STEP_DECAY**((epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS)//EVERY)
    return lr

In [None]:
def train_epoch(model, dataloader, optimizer, scheduler, device = device):
    model.to(device)
    model.train()
    train_loss = 0.0
    scaler = torch.cuda.amp.GradScaler(enabled = use_amp)
    for X, y in tqdm(dataloader, total = len(dataloader), leave = False):
        
        with torch.autocast(device_type=device, dtype=torch.float16, enabled=use_amp):
            X = X.to(device)
            y = y.to(device)
            
            output = model(X)
            log_output = torch.nn.functional.log_softmax(output, dim = 1)

            loss = criterion(log_output, y)
            train_loss += loss.item()
            
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()
#     print(train_loss)
    scheduler.step()
    return train_loss / dataloader.batch_size


def valid_epoch(model, dataloader, optimizer, device = device):
    model.to(device)
    model.eval()
    valid_loss = 0.0
    outputs = []
    softmax = torch.nn.Softmax(dim = 1)
    with torch.no_grad():
        for X, y in tqdm(dataloader, total = len(dataloader), leave = False):
            X = X.to(device)
            y = y.to(device)

            output = model(X)
            log_output = torch.nn.functional.log_softmax(output, dim = 1)
            softmax_output = softmax(output)
            outputs.append(output.cpu().numpy())

            loss = criterion(log_output, y)
            valid_loss += loss.item()
    
    outputs = np.concatenate(outputs, axis = 0)
    return valid_loss / dataloader.batch_size, outputs

In [None]:
fold_losses = []
all_oof = []
all_true = []

for i, (train_indices, val_indices) in enumerate(splits.split(train, train.target, train.patient_id)):
    print('*' * 25 + f' Fold {i+1} ' + '*' * 25)
    
    train_dataset = HMSData(kaggle_spec, chris_spec, train.iloc[train_indices], 'train')
    val_dataset = HMSData(kaggle_spec, chris_spec, train.iloc[val_indices], 'valid')
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size = batch_size, shuffle = True)
    
    model = Model()
    optimizer = torch.optim.Adam(model.parameters())
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda = lrfn)
    train_loss = 0.0
    for epoch in range(EPOCHS):
        print(f'Running Fold : {i + 1}, Epoch : {epoch + 1}')
        train_loss += train_epoch(model, train_loader, optimizer, scheduler)
        
    print(f'Fold : {i + 1}, Train Loss : {(train_loss / EPOCHS)}')
    torch.save(model, f'model_fold_{i + 1}.pt')
    
    valid_loss, outputs = valid_epoch(model, val_loader, optimizer)
    print(f'Fold : {i + 1}, Validation Loss : {valid_loss}')
    
    all_true.append(train.iloc[val_indices][targets].values)
#     print(train.iloc[val_indices][targets].values)
    all_oof.append(outputs)
    
    
all_oof = np.concatenate(all_oof)
all_true = np.concatenate(all_true)