In [None]:
import pandas as pd, numpy as np, os
import matplotlib.pyplot as plt, gc
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import freqz

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm

In [None]:
train = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')
print('Train shape', train.shape )
display( train.head() )

In [None]:
TARGETS = train.columns[-6:]

In [None]:
def butter_bandpass(lowcut, highcut, fs, order=5):
    return butter(order, [lowcut, highcut], fs=fs, btype='band')

def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y


def denoise_filter(x):
    # Sample rate and desired cutoff frequencies (in Hz).
    fs = 200.0
    lowcut = 1.0
    highcut = 25.0
    
    # Filter a noisy signal.
    T = 50
    nsamples = T * fs
    t = np.arange(0, nsamples) / fs
    y = butter_bandpass_filter(x, lowcut, highcut, fs, order=6)
    y = (y + np.roll(y,-1)+ np.roll(y,-2)+ np.roll(y,-3))/4
    y = y[0:-1:4]
    
    return y

In [None]:
IS_TRAINING=False

In [None]:
if IS_TRAINING:
    eegs_data = np.load('/kaggle/input/hms-eeg-raw-dataset/eeg_specs.npy',allow_pickle=True).item()

## Dataloader

In [None]:
NAMES = ['LL','LP','RP','RR']

FEATS = [['Fp1','F7','T3','T5','O1'],
         ['Fp1','F3','C3','P3','O1'],
         ['Fp2','F8','T4','T6','O2'],
         ['Fp2','F4','C4','P4','O2']]
PATH = '/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/'

class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        
        row = self.dataframe.iloc[idx]
        eeg_id = row['eeg_id']
        eeg_sub_id = row['eeg_sub_id']
        eeg_key = f'{eeg_id}_{eeg_sub_id}'
        signals = eegs_data[eeg_key]
        labels = row[TARGETS].values.astype(np.float64) #np.array(row[-6:]).reshape(6,1)
        labels = labels/np.sum(labels)
        return torch.tensor(signals,dtype=torch.float64), torch.tensor(labels,dtype=torch.float64)



In [None]:
# Define the 1D CNN model
class CNN1D(nn.Module):
    def __init__(self,in_channels):
        super(CNN1D, self).__init__()
        self.hidden_channels = 128
        self.conv1 = nn.Conv1d(in_channels, 64, 20, 10)
        self.conv2 = nn.Conv1d(64, 64, 10, 5)
        self.conv3 = nn.Conv1d(64, 64, 12, 4)
#         self.conv3 = nn.Conv1d(32, 32, 98, 1)
        self.flatten = nn.Flatten()
#         self.pool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(640, 32)
        self.fc2 = nn.Linear(32, 6)  # Adjust the input size based on your input dimensions
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
#         print(x.shape)
        x = F.relu(self.conv1(x))
        x = self.dropout(x)
#         print(x.shape)
        x = F.relu(self.conv2(x))
        x = self.dropout(x)
#         print(x.shape)
        x = F.relu(self.conv3(x))
        x = self.dropout(x)
#         print(x.shape)
        x = (self.flatten(x))
#         print(x.shape)
#         x = x.view(-1, self.num_flat_features(x))
#         print(x.shape)
        x = self.fc1(x)
        x = self.fc2(x)
#         print(x)
        x = self.softmax(x)
#         print(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [None]:
# Create an instance of your custom dataset
dataset = CustomDataset(dataframe=train)

In [None]:
TARS = {'Seizure':0, 'LPD':1, 'GPD':2, 'LRDA':3, 'GRDA':4, 'Other':5}
expert_consensus = [TARS[train.iloc[i,8]] for i in range(len(train))]

class_sample_count = np.array(
    [len(np.where(expert_consensus == t)[0]) for t in np.unique(expert_consensus)])

weight = 1. / class_sample_count
samples_weight = np.array([weight[t] for t in expert_consensus])
samples_weight = torch.from_numpy(samples_weight)

In [None]:
sampler = torch.utils.data.sampler.WeightedRandomSampler(samples_weight.type('torch.DoubleTensor'), len(samples_weight))

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
if not os.path.exists('CNN1D_Model'):
        os.makedirs('CNN1D_Model')

In [None]:
# Create a DataLoader to handle batching and shuffling
batch_size = 256
train_dataloader = DataLoader(dataset, batch_size=batch_size, sampler=sampler)

# Create an instance of the model
model = CNN1D(in_channels=4).double()
# Define KL Divergence loss
criterion = nn.KLDivLoss(reduction="batchmean")
# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.to(device)
if IS_TRAINING:
    model.train()    
    epochs = 2
    for epoch in range(epochs):
        pbar = tqdm(train_dataloader)
        for batch in pbar:
            eeg_, label = batch
            pred = model(eeg_.to(device))
            loss = criterion(torch.log(pred), label.to(device))
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    #         stop

        # Print loss for monitoring training progress
            pbar.set_description('Batch loss{:.3f}'.format(loss.item()))
#         print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

    
        torch.save(model.state_dict(), f'CNN1D_Model/model_{epoch}.pt')

# Submit to Kaggle LB


In [None]:
del train; gc.collect()
test = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/test.csv')
print('Test shape:',test.shape)
test.head()

In [None]:
test_path = '/kaggle/input/hms-harmful-brain-activity-classification/test_eegs/'
class CustomDataset_test(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        
        row = self.dataframe.iloc[idx]
        eeg_id = row['eeg_id']
        parq_path = f'{test_path}{eeg_id}.parquet'
        eeg = pd.read_parquet(parq_path)
        rows = len(eeg)
        offset = (rows-10_000)//2
        eeg = eeg.iloc[offset:offset+10_000]
        
        signals = []
        for k in range(4):
            COLS = FEATS[k]

            # COMPUTE PAIR DIFFERENCES AND AVERAGE
            x = eeg[COLS[0]].values - eeg[COLS[1]].values
            for j in range(3):
                x += eeg[COLS[j+1]].values - eeg[COLS[j+2]].values
            x /= 4.0
            x = denoise_filter(x)
            signals.append(x)
        signals = np.array(signals)
        
        return torch.tensor(signals,dtype=torch.float64)



In [None]:
dataset_test = CustomDataset_test(dataframe=test)

In [None]:
test_loader = DataLoader(dataset_test, batch_size=16,shuffle=False)
model = CNN1D(in_channels=4).double()
model.load_state_dict(torch.load('/kaggle/input/cnn-drop/model_1.pt'))
model.eval()
model.cpu()
preds = []

for batch in test_loader:
    pred = model(batch)
    preds.append(pred.detach().numpy())
preds = np.vstack(preds)

In [None]:
# CREATE SUBMISSION.CSV
from IPython.display import display

sub = pd.DataFrame({'eeg_id':test.eeg_id.values})
sub[TARGETS] = preds
sub.to_csv('submission.csv',index=False)
print('Submission shape',sub.shape)
display( sub.head() )

# SANITY CHECK TO CONFIRM PREDICTIONS SUM TO ONE
print('Sub row 0 sums to:',sub.iloc[0,-6:].sum())