In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
import os
import torch

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

gpus = tf.config.list_physical_devices('GPU')
if len(gpus)<=1: 
    strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    print(f'Using {len(gpus)} GPU')
else: 
    strategy = tf.distribute.MirroredStrategy()
    print(f'Using {len(gpus)} GPUs')
    
MIX = True
if MIX:
    tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
    print('Mixed precision enabled')
else:
    print('Using full precision')

In [None]:
# sample_submission_df = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/sample_submission.csv')
# sample_submission_df.info()

In [None]:
# sample_submission_df.head(5)

In [None]:
train_data = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')
train_data.info()

In [None]:
# train_data.head(20)

In [None]:
# sns.countplot(x='expert_consensus', data=train_data)
# plt.show()

In [None]:
class_names = ['seizure', 'lpd', 'gpd', 'lrda', 'grda', 'other']
class_name_to_index = {'Seizure' : 0 , 'LPD' : 1 , 
                       'LRDA' : 3 , 'GPD' : 2 , 
                       'GRDA' : 4 , 'Other' : 5}

plt.figure(figsize=(15, 10)) 

for i, class_name in enumerate(class_names):
    plt.subplot(2, 3, i+1) 
    sns.countplot(x=f'{class_name}_vote', data=train_data)
    plt.title(f'Distribution of {class_name} votes')
    plt.tight_layout()

plt.show()

In [None]:
train_data.hist(bins=10, figsize=(15, 20), layout=(7, 2))
plt.suptitle('Feature Distributions')
plt.show()

In [None]:
vote_columns = [f'{name}_vote' for name in class_names]
corr_matrix = train_data[vote_columns].corr()

plt.figure(figsize=(12, 8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='viridis')
plt.title('Correlation Matrix for Vote Columns')
plt.show()

In [None]:
# plt.figure(figsize=(15, 10))

# for i, col in enumerate([f'{name}_vote' for name in class_names]):
#     plt.subplot(2, 3, i+1)
#     sns.boxplot(y='expert_consensus', x=col, data=train_data)
#     plt.title(f'Box Plot of {col} vs Expert Consensus')
#     plt.tight_layout() 

# plt.show()

In [None]:
eeg_dir = '/kaggle/input/hms-harmful-brain-activity-classification/train_eegs'
spectrogram_dir = '/kaggle/input/hms-harmful-brain-activity-classification/train_spectrograms'
metadata_path = '/kaggle/input/hms-harmful-brain-activity-classification/train.csv'


In [None]:
def load_data(ids, file_dir):
    file_path = f"{file_dir}/{int(ids)}.parquet"
    data_df = pd.read_parquet(file_path)
    return data_df

def load_eeg_data(ids):
    return load_data(ids, eeg_dir)

def load_spectrogram_data(ids):
    return load_data(ids, spectrogram_dir).drop(columns=['time'])

In [None]:
# df_eeg_example = load_eeg_data(1628180742)
# df_eeg_example.info()

In [None]:
# df_spectro_example = load_spectrogram_data(999431)
# df_spectro_example.info()

In [None]:
# df_spectro_example.columns

In [None]:
# load_eeg_data(train_data['eeg_id'][190])

In [None]:
# load_spectrogram_data(train_data['spectrogram_id'][28])

In [None]:
df_train = train_data.drop(columns=['eeg_sub_id','eeg_label_offset_seconds',
                         'spectrogram_sub_id','spectrogram_label_offset_seconds',
                         'label_id','patient_id'])

df_train = df_train.drop_duplicates().reset_index()
df_train.drop(columns=['index'], inplace=True)

In [None]:
df_train['total'] = df_train[vote_columns].sum(axis=1)
df_train[vote_columns] = df_train[vote_columns].div(df_train['total'], axis=0)
df_train.drop(columns=['total'], inplace=True)

df_train['expert_consensus'] = df_train['expert_consensus'].map(class_name_to_index)

In [None]:
df_train

In [None]:
# df_train[vote_columns]

In [None]:
def preprocess(dataframe, eeg_dir, spectrogram_dir, vote_columns):
    eeg_features_list = []
    spectrogram_features_list = []
    labels_list = []

    for idx in range(len(dataframe)):
        eeg_id = dataframe.iloc[idx]['eeg_id']
        spectrogram_id = dataframe.iloc[idx]['spectrogram_id']

        eeg_data = load_data(eeg_id, eeg_dir)
        eeg_features = extract_features(eeg_data)
        eeg_features_list.append(eeg_features)

        spectrogram_data = load_data(spectrogram_id, spectrogram_dir).drop(columns=['time'])
        spectrogram_features = extract_features(spectrogram_data)
        spectrogram_features_list.append(spectrogram_features)

        label = dataframe.iloc[idx][vote_columns].values
        labels_list.append(label)

    eeg_features_tensor = torch.tensor(eeg_features_list, dtype=torch.float32)
    spectrogram_features_tensor = torch.tensor(spectrogram_features_list, dtype=torch.float32)
    labels_tensor = torch.tensor(labels_list, dtype=torch.float32)

    return eeg_features_tensor, spectrogram_features_tensor, labels_tensor


def extract_features(df):
    current_size = len(df)

    # Basic statistical features
    min_values = df.min()
    max_values = df.max()
    mean_values = df.mean()
    std_values = df.std()

    # Time-domain features
    rms_values = np.sqrt(np.mean(np.square(df), axis=0))
    var_values = df.var()
    skew_values = df.skew()
    kurtosis_values = df.kurtosis()

    # Concatenate all features
    features = np.concatenate([
        min_values, max_values, mean_values, std_values, 
        rms_values, var_values, skew_values, kurtosis_values
    ])


    return features

In [None]:
# eeg_features_tensor, spectrogram_features_tensor, labels_tensor = preprocess(df_train, eeg_dir, spectrogram_dir, vote_columns)

In [None]:
# torch.save(eeg_features_tensor, '/kaggle/working/eeg_features.pt')
# torch.save(spectrogram_features_tensor, '/kaggle/working/spectrogram_features.pt')
# torch.save(labels_tensor, '/kaggle/working/labels.pt')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

eeg_features_tensor = torch.load('/kaggle/input/harmful-brain-activity-contest-simplied-dataset/Harmful Brain Activity/eeg_features.pt', map_location=device)
spectrogram_features_tensor = torch.load('/kaggle/input/harmful-brain-activity-contest-simplied-dataset/Harmful Brain Activity/spectrogram_features.pt', map_location=device)
labels_tensor = torch.load('/kaggle/input/harmful-brain-activity-contest-simplied-dataset/Harmful Brain Activity/labels.pt', map_location=device)


In [None]:
spectrogram_features_tensor.shape

In [None]:
from torch.utils.data import Dataset

class EEGSpectrogramDataset(Dataset):
    def __init__(self, eeg_features, spectrogram_features, labels):
        self.eeg_features = eeg_features
        self.spectrogram_features = spectrogram_features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.eeg_features[idx], self.spectrogram_features[idx], self.labels[idx]

In [None]:
from torch.utils.data import DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = EEGSpectrogramDataset(eeg_features_tensor, spectrogram_features_tensor, labels_tensor)

In [None]:
from torch.utils.data import random_split

total_size = len(dataset)
train_size = int(0.8 * total_size) 
test_size = total_size - train_size 

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=True)

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class EEGSpectrogramNet(nn.Module):
    def __init__(self):
        super(EEGSpectrogramNet, self).__init__()
        self.eeg_layer = nn.Linear(160, 512)
        self.spectrogram_layer = nn.Linear(3200, 512)
        
        self.eeg_fc1 = nn.Linear(512, 256)
        self.eeg_dropout = nn.Dropout(0.2)
        
        self.spectrogram_fc1 = nn.Linear(512, 256)
        self.spectrogram_dropout = nn.Dropout(0.2)
        
        self.fc1 = nn.Linear(256 + 256, 512) 
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128) 
        self.dropout = nn.Dropout(0.3)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 32)

        self.output = nn.Linear(32, 6)
        
        self._initialize_weights()

    def forward(self, eeg_data, spectrogram_data):
        # EEG and spectrogram pathways
        eeg_features = self.eeg_dropout(F.relu(self.eeg_fc1(self.eeg_layer(eeg_data))))
        spectrogram_features = self.spectrogram_dropout(F.relu(self.spectrogram_fc1(self.spectrogram_layer(spectrogram_data))))

        # Concatenate features
        combined = torch.cat((eeg_features, spectrogram_features), dim=1)

        # Further processing
        x = F.relu(self.fc1(combined))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        
        # Output layer
        x = self.output(x)
        return F.softmax(x, dim=1)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

In [None]:
model = EEGSpectrogramNet()
print(f"Using {device}")
model.to(device)

In [None]:
from torch.optim import Adam
from torch.nn.utils import clip_grad_norm_
import torch.nn.functional as F

optimizer = Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.50, patience=50, verbose=True)

running_loss = 0.0
num_epochs = 100
print_interval = 10
val_loss = 0.0
val_mse = 0.0

for epoch in range(num_epochs):
    model.train()
    
    for eeg_data, spectrogram_data, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(eeg_data, spectrogram_data)
        loss = F.mse_loss(outputs, labels)
        loss.backward()

        clip_grad_norm_(model.parameters(), max_norm=1.0)

        optimizer.step()

        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/(len(train_loader)):.4f}")
    running_loss = 0.0

    model.eval()  # Set model to evaluation mode
    val_running_loss = 0.0
    val_running_mse = 0.0
    with torch.no_grad():
        for eeg_data, spectrogram_data, labels in test_loader:
            outputs = model(eeg_data, spectrogram_data)
            loss = F.mse_loss(outputs, labels)
            val_running_loss += loss.item()

            mse_loss = F.mse_loss(outputs, labels)
            val_running_mse += mse_loss.item()

    val_loss = val_running_loss / len(test_loader)
    val_mse = val_running_mse / len(test_loader)
    print(f"Validation Loss: {val_loss:.4f}, Validation MSE: {val_mse:.4f}")
        
    scheduler.step(val_loss)

In [None]:
def preprocess_test(dataframe, eeg_test_dir, spectrogram_test_dir, vote_columns):
    eeg_features_list = []
    spectrogram_features_list = []

    for idx in range(len(dataframe)):
        eeg_id = dataframe.iloc[idx]['eeg_id']
        spectrogram_id = dataframe.iloc[idx]['spectrogram_id']

        eeg_data = load_data(eeg_id, eeg_test_dir)
        eeg_features = extract_features(eeg_data)
        eeg_features_list.append(eeg_features)

        spectrogram_data = load_data(spectrogram_id, spectrogram_test_dir).drop(columns=['time'])
        spectrogram_features = extract_features(spectrogram_data)
        spectrogram_features_list.append(spectrogram_features)


    eeg_features_tensor = torch.tensor(eeg_features_list, dtype=torch.float32)
    spectrogram_features_tensor = torch.tensor(spectrogram_features_list, dtype=torch.float32)

    return eeg_features_tensor, spectrogram_features_tensor

In [None]:
eeg_test_dir = '/kaggle/input/hms-harmful-brain-activity-classification/test_eegs'
spectrogram_test_dir = '/kaggle/input/hms-harmful-brain-activity-classification/test_spectrograms'

test_data = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/test.csv')

In [None]:
eeg_features_tensor, spectrogram_features_tensor = preprocess_test(test_data, eeg_test_dir, spectrogram_test_dir, vote_columns)
eeg_features_tensor = eeg_features_tensor.to(device)
spectrogram_features_tensor = spectrogram_features_tensor.to(device)

In [None]:
model.eval()

outputs = model(eeg_features_tensor, spectrogram_features_tensor)
outputs = outputs.cpu().detach().numpy()

In [None]:
submission_df = pd.DataFrame({
    'id': test_data['eeg_id'],
    'seizure_vote': outputs[:, 0],
    'lpd_vote': outputs[:, 1],
    'gpd_vote': outputs[:, 2],
    'lrda_vote': outputs[:, 3],
    'grda_vote': outputs[:, 4],
    'other_vote': outputs[:, 5]
})

In [None]:
submission_df.to_csv('submission.csv', index=False)
submission_df.head()