# Prepare

## import

In [None]:
import pandas as pd
import numpy as np

train_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
train_df.head()

In [None]:
train_df.groupby('patient_id').agg(
{
    'seizure_vote': 'sum',
    'lpd_vote' : np.sum,
    'gpd_vote' : np.sum,
    'lrda_vote': np.sum,
    'grda_vote': np.sum,
    'other_vote': np.sum,
}
)

In [None]:
import matplotlib.pyplot as plt
_ = plt.hist(np.log10(train_df.groupby('patient_id')['eeg_sub_id'].nunique().values), bins = 100)
plt.ylabel('Log10 Number of patients')
plt.xlabel('Log10 Number of samples')

plt.grid()

In [None]:
from sklearn.model_selection import StratifiedKFold, KFold, train_test_split

train_df['sampling_index'] = train_df['spectrogram_id'].astype(str) + "_" + train_df['eeg_sub_id'].astype(str)

train_set, test_set = train_test_split(train_df.groupby(['patient_id']).sampling_index.apply(list).reset_index())

sampling_df = train_df.groupby(['patient_id']).sampling_index.apply(list).reset_index()

In [None]:
sample_size = 55
cv = KFold(n_splits=5, shuffle=True)
for train_idx, val_idx in cv.split(train_set):
    train_set_slice = train_set.iloc[train_idx]
    val_set_slice = train_set.iloc[val_idx]
    train_eegs_subs = np.concatenate(train_set_slice['sampling_index'].apply(lambda x: 
                                                                            np.unique(
                                                                                np.random.choice(x, size=sample_size, replace=True)
                                                                            )
                                                                        ).values
                                    )
    val_eegs_subs = np.concatenate(val_set_slice['sampling_index'].values)
    sampled_train = train_df.set_index('sampling_index').loc[train_eegs_subs].reset_index()
    sampled_val = train_df.set_index('sampling_index').loc[val_eegs_subs].reset_index()
    
    
    print("Sampled_train", sampled_train.shape)
    print("Sampled_val",sampled_val.shape)

In [None]:
from sklearn.model_selection import StratifiedKFold, KFold, train_test_split
StratifiedKFold()
class ValidationSchema:
    def __init__(self, nfolds=5, stratified=True, mode='patient', sample_size=55):
        self.nfolds = nfolds
        self.stratified = True
        self.mode = mode
        self.models = []
        self.train_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
        self.train_df['sampling_index'] = self.train_df['spectrogram_id'].astype(str) + "_" + self.train_df['eeg_sub_id'].astype(str)

        if self.mode == 'patient':
#             sampling_df = train_df.groupby(['patient_id', 'spectrogram_id']).eeg_id.apply(list).reset_index()
            self.sampling_df = train_df.groupby(['patient_id']).sampling_index.apply(list).reset_index()
            self.train_set, self.test_set = train_test_split(self.sampling_df, test_size = 0.1)
            
            self.folds = []
            cv = KFold(n_splits=5, shuffle=True)
            for train_idx, val_idx in cv.split(self.train_set):
                train_set_slice = self.train_set.iloc[train_idx]
                val_set_slice = self.train_set.iloc[val_idx]
                train_eegs_subs = np.concatenate(train_set_slice['sampling_index'].apply(lambda x: 
                                                                                        np.unique(
                                                                                            np.random.choice(x, size=sample_size, replace=True)
                                                                                        )
                                                                                    ).values
                                                )
                val_eegs_subs = np.concatenate(val_set_slice['sampling_index'].values)
                sampled_train = train_df.set_index('sampling_index').loc[train_eegs_subs].reset_index()
                sampled_val = train_df.set_index('sampling_index').loc[val_eegs_subs].reset_index()
                print(f"Train fold size {sampled_train.shape} || Test fold size {sampled_val.shape}")
                self.folds.append((sampled_train, sampled_val))
                print(sampled_train.head())
                break
                
    
        else:
            raise ValueError()

        
    def validate_model(model):
        if self.mode == 'patient':
            for sampled_train, sampled_val in self.folds:
                model.fit(sampled_train)
                res = model.predict(sampled_val)
        else:
            raise ValueError()

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import numpy as np
from tqdm.notebook import tqdm

class EEGDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.loc[idx]

        # Extract relevant information from the dataframe row
        eeg_id = row['eeg_id']
        spectrogram_id = row['spectrogram_id']
        seizure_vote = row['seizure_vote']  # Assuming these columns exist in your dataframe
        lpd_vote = row['lpd_vote']
        gpd_vote = row['gpd_vote']
        lrda_vote = row['lrda_vote']
        grda_vote = row['grda_vote']
        other_vote = row['other_vote']

        # Construct the path to the parquet file
        path = f"/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/{eeg_id}.parquet"

        # Read the parquet file
        eeg = pl.read_parquet(path, 
                              row_count_offset=float(row['eeg_label_offset_seconds']) * 200, 
                              n_rows=10000)

        # Assuming you have some specific way of processing the data, modify the following line accordingly
        eeg_data = eeg.to_numpy(use_pyarrow=False)

        # Assuming you have labels associated with the EEG data
        labels = torch.tensor([seizure_vote, lpd_vote, gpd_vote, lrda_vote, grda_vote, other_vote], 
                              dtype=torch.float32)
        # Normalize the labels
        labels = labels / labels.sum()
        return {'eeg_data': eeg_data, 'labels': labels}

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout=0.1):
        super(LSTMModel, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        
        # Fully connected output layer
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    @autocast()  # Apply mixed-precision training
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).cuda()
        
        # Initialize cell state with zeros
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).cuda()
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

def kl_divergence_loss(predicted, target):
    # Ensure both predicted and target have probabilities summing up to 1
    predicted = torch.log_softmax(predicted, dim=-1)
    target = torch.softmax(target, dim=-1)
    
    # Compute KL divergence
    kl_loss = torch.nn.functional.kl_div(predicted, target, reduction='batchmean')
    
    return kl_loss

class ValidationSchema:
    def __init__(self, nfolds=5, stratified=True, mode='patient', sample_size=55):
        self.nfolds = nfolds
        self.stratified = True
        self.mode = mode
        self.models = []
        self.train_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
        self.train_df['sampling_index'] = self.train_df['spectrogram_id'].astype(str) + "_" + self.train_df['eeg_sub_id'].astype(str)

        if self.mode == 'patient':
            self.sampling_df = self.train_df.groupby(['patient_id']).sampling_index.apply(list).reset_index()
            self.train_set, self.test_set = train_test_split(self.sampling_df, test_size=0.1)
            
            self.folds = []
            cv = KFold(n_splits=5, shuffle=True)
            for train_idx, val_idx in cv.split(self.train_set):
                train_set_slice = self.train_set.iloc[train_idx]
                val_set_slice = self.train_set.iloc[val_idx]
                train_eegs_subs = np.concatenate(train_set_slice['sampling_index'].apply(lambda x: 
                                                                                        np.unique(
                                                                                            np.random.choice(x, size=sample_size, replace=True)
                                                                                        )
                                                                                    ).values
                                                )
                val_eegs_subs = np.concatenate(val_set_slice['sampling_index'].values)
                sampled_train = self.train_df.set_index('sampling_index').loc[train_eegs_subs].reset_index()
                sampled_val = self.train_df.set_index('sampling_index').loc[val_eegs_subs].reset_index()
                self.folds.append((sampled_train, sampled_val))
                print(f"Train fold size {sampled_train.shape} || Test fold size {sampled_val.shape}")
    
    def validate_model(self, model):
        if self.mode == 'patient':
            for sampled_train, sampled_val in self.folds:
                train_dataloader = create_dataloader(sampled_train, batch_size=256, shuffle=True)
                val_dataloader = create


In [None]:
!du -sh /kaggle/input/hms-harmful-brain-activity-classification/train_eegs/

In [None]:
import polars as pl
test_eeg_id = 1628180742
path = f"/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/{test_eeg_id}.parquet"
test_eeg = pl.read_parquet(path, 
                           row_count_offset=0.0 * 200, 
                           n_rows = 10000)

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import polars as pl
from tqdm.notebook import tqdm

class EEGDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.loc[idx]

        # Extract relevant information from the dataframe row
        eeg_id = row['eeg_id']
        spectrogram_id = row['spectrogram_id']
        seizure_vote = row['seizure_vote']  # Assuming these columns exist in your dataframe
        lpd_vote = row['lpd_vote']
        gpd_vote = row['gpd_vote']
        lrda_vote = row['lrda_vote']
        grda_vote = row['grda_vote']
        other_vote = row['other_vote']

        # Construct the path to the parquet file
        path = f"/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/{eeg_id}.parquet"

        # Read the parquet file
        eeg = pl.read_parquet(path, 
                              row_count_offset=float(row['eeg_label_offset_seconds']) * 200, 
                              n_rows=10000)

        # Assuming you have some specific way of processing the data, modify the following line accordingly
        eeg_data = eeg.to_numpy(use_pyarrow=False)

        # Assuming you have labels associated with the EEG data
        labels = torch.tensor([seizure_vote, lpd_vote, gpd_vote, lrda_vote, grda_vote, other_vote], 
                              dtype=torch.float32)
        #todo laplace fix
        labels = labels / labels.sum()
        return {'eeg_data': eeg_data, 'labels': labels}

def create_dataloader(dataframe, batch_size=32, shuffle=True, num_workers=0):
    dataset = EEGDataset(dataframe)
    dataloader = DataLoader(dataset, 
                            batch_size=batch_size, 
                            shuffle=shuffle,
                            num_workers=num_workers)
    return dataloader


# Example usage:
# Assuming you have a dataframe named 'train_df' containing the data
dataloader = create_dataloader(train_df, batch_size=256, shuffle=True)

# Iterate over batches
for batch in tqdm(dataloader):
    eeg_data = batch['eeg_data']
    labels = batch['labels']
    # Your training/validation loop here
    break

In [None]:
import torch
import torch.nn.functional as F

def kl_divergence_loss(predicted, target):
    # Ensure both predicted and target have probabilities summing up to 1
    predicted = F.softmax(predicted, dim=-1)
    target = F.softmax(target, dim=-1)
    
    # Compute KL divergence
    kl_loss = F.kl_div(predicted.log(), target, reduction='batchmean')
    
    return kl_loss

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import polars as pl
from tqdm import tqdm
from torch.optim import AdamW
from torch.cuda.amp import GradScaler, autocast

class EEGDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.loc[idx]

        # Extract relevant information from the dataframe row
        eeg_id = row['eeg_id']
        spectrogram_id = row['spectrogram_id']
        seizure_vote = row['seizure_vote']  # Assuming these columns exist in your dataframe
        lpd_vote = row['lpd_vote']
        gpd_vote = row['gpd_vote']
        lrda_vote = row['lrda_vote']
        grda_vote = row['grda_vote']
        other_vote = row['other_vote']

        # Construct the path to the parquet file
        path = f"/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/{eeg_id}.parquet"

        # Read the parquet file
        eeg = pl.read_parquet(path, 
                              row_count_offset=float(row['eeg_label_offset_seconds']) * 200, 
                              n_rows=10000)

        # Assuming you have some specific way of processing the data, modify the following line accordingly
        eeg_data = eeg.to_numpy(use_pyarrow=False)

        # Assuming you have labels associated with the EEG data
        labels = torch.tensor([seizure_vote, lpd_vote, gpd_vote, lrda_vote, grda_vote, other_vote], 
                              dtype=torch.float32)
        # Normalize the labels
        labels = labels / labels.sum()
        return {'eeg_data': eeg_data, 'labels': labels}

def create_dataloader(dataframe, batch_size=32, shuffle=True, num_workers=0):
    dataset = EEGDataset(dataframe)
    dataloader = DataLoader(dataset, 
                            batch_size=batch_size, 
                            shuffle=shuffle,
                            num_workers=num_workers)
    return dataloader

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout=0.1):
        super(LSTMModel, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # LSTM layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout)
        
        # Fully connected output layer
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    @autocast()  # Apply mixed-precision training
    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).cuda()
        
        # Initialize cell state with zeros
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).cuda()
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

def kl_divergence_loss(predicted, target):
    # Ensure both predicted and target have probabilities summing up to 1
    predicted = torch.log_softmax(predicted, dim=-1)
#     target = torch.softmax(target, dim=-1)
    
    # Compute KL divergence
    kl_loss = torch.nn.functional.kl_div(predicted, target, reduction='batchmean')
    
    return kl_loss

# # Assuming you have a dataframe named 'train_df' containing the data
# train_dataloader = create_dataloader(train_df, batch_size=256, shuffle=True)

# # Define model parameters
# input_dim = 20  # Number of channels
# output_dim = 6  # Number of output dimensions
# hidden_dim = 64  # Hidden dimension of the LSTM
# num_layers = 1  # Number of LSTM layers
# dropout = 0.1  # Dropout probability

# # Create model instance
# model = LSTMModel(input_dim, hidden_dim, num_layers, output_dim).cuda()

# # Create optimizer
# optimizer = AdamW(model.parameters(), lr=1e-3)

# # Enable mixed-precision training
# scaler = GradScaler()

# # Training loop
# num_epochs = 10

# for epoch in range(num_epochs):
#     model.train()
#     total_loss = 0.0
    
#     for batch in tqdm(train_dataloader):
#         eeg_data = batch['eeg_data'].cuda()
#         labels = batch['labels'].cuda()
        
#         optimizer.zero_grad()
        
#         with autocast():
#             output = model(eeg_data)
#             loss = kl_divergence_loss(output, labels)
        
#         scaler.scale(loss).backward()
#         scaler.step(optimizer)
#         scaler.update()
        
#         total_loss += loss.item()
    
#     avg_loss = total_loss / len(train_dataloader)
#     print(f"Epoch {epoch+1}, Average Loss: {avg_loss:.4f}")


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW, Adam
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import numpy as np
from tqdm import tqdm

class EEGDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.loc[idx]

        # Extract relevant information from the dataframe row
        eeg_id = row['eeg_id']
        spectrogram_id = row['spectrogram_id']
        seizure_vote = row['seizure_vote']  # Assuming these columns exist in your dataframe
        lpd_vote = row['lpd_vote']
        gpd_vote = row['gpd_vote']
        lrda_vote = row['lrda_vote']
        grda_vote = row['grda_vote']
        other_vote = row['other_vote']

        # Construct the path to the parquet file
        path = f"/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/{eeg_id}.parquet"

        # Read the parquet file
        eeg = pl.read_parquet(path, 
                              row_count_offset=float(row['eeg_label_offset_seconds']) * 200, 
                              n_rows=10000).select(pl.all().forward_fill()).select(pl.all().backward_fill())

        # Assuming you have some specific way of processing the data, modify the following line accordingly
        eeg_data = eeg.to_numpy(use_pyarrow=False)

        # Assuming you have labels associated with the EEG data
        labels = torch.tensor([seizure_vote, lpd_vote, gpd_vote, lrda_vote, grda_vote, other_vote], 
                              dtype=torch.float32)
        # Normalize the labels
        labels = labels / labels.sum()
        return {'eeg_data': eeg_data, 'labels': labels}

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW, Adam
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
import numpy as np
from tqdm import tqdm

# Define your Dataset and Model classes here

class ValidationSchema:
    def __init__(self, nfolds=5, stratified=True, mode='patient', sample_size=55, debug=False, random_seed=4545):
        self.nfolds = nfolds
        self.stratified = True
        self.mode = mode
        self.models = []
        self.train_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
        if debug:
            self.train_df = self.train_df.head(1000)
        self.train_df['sampling_index'] = self.train_df['spectrogram_id'].astype(str) + "_" + self.train_df['eeg_sub_id'].astype(str)

        if self.mode == 'patient':
            self.sampling_df = self.train_df.groupby(['patient_id']).sampling_index.apply(list).reset_index()
            self.train_set, self.test_set = train_test_split(self.sampling_df, test_size=0.1, random_state=random_seed)
            
            self.folds = []
            cv = KFold(n_splits=nfolds, shuffle=True, random_state=random_seed)
            for train_idx, val_idx in cv.split(self.train_set, ):
                train_set_slice = self.train_set.iloc[train_idx]
                val_set_slice = self.train_set.iloc[val_idx]
                train_eegs_subs = np.concatenate(train_set_slice['sampling_index'].apply(lambda x: 
                                                                                        np.unique(
                                                                                            np.random.choice(x, size=sample_size, replace=True)
                                                                                        )
                                                                                    ).values
                                                )
                val_eegs_subs = np.concatenate(val_set_slice['sampling_index'].values)
                sampled_train = self.train_df.set_index('sampling_index').loc[train_eegs_subs].reset_index()
                sampled_val = self.train_df.set_index('sampling_index').loc[val_eegs_subs].sample(frac=0.1).reset_index()
                self.folds.append((sampled_train, sampled_val))
                print(f"Train fold size {sampled_train.shape} || Test fold size {sampled_val.shape}")
    
    def validate_model(self, model, batch_size=256, num_epochs=5, patience=3):
        if self.mode == 'patient':
            for sampled_train, sampled_val in self.folds:
                train_dataloader = create_dataloader(sampled_train, batch_size=batch_size, shuffle=True)
                val_dataloader = create_dataloader(sampled_val, batch_size=batch_size, shuffle=False)
                
                model.cuda()
                optimizer = Adam(model.parameters(), lr=1e-2)
                scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=patience, verbose=True)
                criterion = nn.KLDivLoss(reduction='batchmean')
                
                best_val_loss = float('inf')
                no_improvement = 0
                
                for epoch in range(num_epochs):
                    model.train()
                    total_loss = 0.0
                    for batch in tqdm(train_dataloader):
                        eeg_data = batch['eeg_data'].cuda()
                        labels = batch['labels'].cuda()
                        
                        optimizer.zero_grad()

                        output = model(eeg_data)

                        loss = criterion(F.log_softmax(output, dim=1), F.softmax(labels, dim=1))
                        
                        loss.backward()
                        optimizer.step()
                        
                        total_loss += loss.item()
                    
                    avg_loss = total_loss / len(train_dataloader)
                    
                    # Validation loop
                    model.eval()
                    total_val_loss = 0.0
                    for batch in tqdm(val_dataloader):
                        eeg_data = batch['eeg_data'].cuda()
                        labels = batch['labels'].cuda()
                        
                        with torch.no_grad():
                            output = model(eeg_data)
                            val_loss = criterion(F.log_softmax(output, dim=1), F.softmax(labels, dim=1))
                        
                        total_val_loss += val_loss.item()
                    
                    avg_val_loss = total_val_loss / len(val_dataloader)
                    print(f"Average Loss: {avg_loss:.4f}\tEpoch {epoch+1}\tValidation Loss: {avg_val_loss:.4f}")
                    
                    # Adjust learning rate based on validation loss
                    scheduler.step(avg_val_loss)
                    
                    # Check for early stopping
                    if avg_val_loss < best_val_loss:
                        best_val_loss = avg_val_loss
                        no_improvement = 0
                    else:
                        no_improvement += 1
                        if no_improvement >= patience:
                            print(f"No improvement for {patience} epochs. Early stopping.")
                            break


## Вот тут вот поменял внузу датасета, чтобы он выдавал нам только кусуки

In [None]:
import os
import numpy as np
import polars as pl
from torch.utils.data import Dataset
from tqdm import trange

class EEGDataset(Dataset):
    def __init__(self, dataframe, memmap_dir='./memmap_dir1', stack_files=True):
        self.dataframe = dataframe
        self.memmap_dir = memmap_dir
        self.stack_files = stack_files

        # Create memmap directory if it does not exist
        os.makedirs(self.memmap_dir, exist_ok=True)

        # Preprocess the files into memmap
        self.preprocess_memmap()

    def preprocess_memmap(self):

        for idx in trange(len(self.dataframe), desc="Preprocessing Memmap"):
            row = self.dataframe.loc[idx]
            eeg_id = row['eeg_id']
            path = f"/kaggle/input/hms-harmful-brain-activity-classification/train_eegs/{eeg_id}.parquet"
            memmap_path = os.path.join(self.memmap_dir, f"{eeg_id}.npy")

            # Check if memmap file already exists
            if not os.path.exists(memmap_path):
                # Read the Parquet file and preprocess
                eeg = pl.read_parquet(path, 
                                      row_count_offset=int(row['eeg_label_offset_seconds']) * 200, 
                                      n_rows=10000).select(pl.all().forward_fill()).select(pl.all().backward_fill())
                eeg_data = eeg.to_numpy(use_pyarrow=False)

                # Check for NaN values
                if np.isnan(eeg_data).any():
                    print(f"Found NaN values in file {eeg_id}. Skipping...")
                    continue

                # Stack files if required
                if self.stack_files:
                    eeg_data = np.stack(eeg_data)

                # Save as memmap
                np.save(memmap_path, eeg_data)

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.loc[idx]
        eeg_id = row['eeg_id']
        memmap_path = os.path.join(self.memmap_dir, f"{eeg_id}.npy")
        eeg_data = np.load(memmap_path, mmap_mode='r')

        seizure_vote = row['seizure_vote']
        lpd_vote = row['lpd_vote']
        gpd_vote = row['gpd_vote']
        lrda_vote = row['lrda_vote']
        grda_vote = row['grda_vote']
        other_vote = row['other_vote']

        labels = torch.tensor([seizure_vote, lpd_vote, gpd_vote, lrda_vote, grda_vote, other_vote], 
                              dtype=torch.float32)
        # Normalize the labels
        labels = labels / labels.sum()
        return {'eeg_data_10': eeg_data[:200*10, :], 'eeg_data_20': eeg_data[200*10: 200*20, :],
                'eeg_data_30': eeg_data[200*20: 200*30, :], 'eeg_data_40': eeg_data[200*30: 200*40, :],
                'eeg_data_50': eeg_data[200*40: ,:], 'labels': labels}


## Добавил изменения батча из датасета и чтобы девайс передавать можно было и не дебагать на видеокарте. Местами добавил if для использования wandb, где его не было, потому что у меня ключ не добавлется почему-то и код падает

In [None]:
import wandb
from kaggle_secrets import UserSecretsClient

class ValidationSchema:
    def __init__(self, nfolds=5, stratified=True, mode='patient', sample_size=55, debug=False, random_seed=4545, use_wandb=True, device='cuda'):
        self.device = device
        self.nfolds = nfolds
        self.stratified = True
        self.mode = mode
        self.models = []
        self.train_df = pd.read_csv("/kaggle/input/hms-harmful-brain-activity-classification/train.csv")
        if debug:
            self.train_df = self.train_df.head(1000)
        self.train_df['sampling_index'] = self.train_df['spectrogram_id'].astype(str) + "_" + self.train_df['eeg_sub_id'].astype(str)
        self.use_wandb = use_wandb

        if self.mode == 'patient':
            self.sampling_df = self.train_df.groupby(['patient_id']).sampling_index.apply(list).reset_index()
            self.train_set, self.test_set = train_test_split(self.sampling_df, test_size=0.1, random_state=random_seed)
            
            self.folds = []
            cv = KFold(n_splits=nfolds, shuffle=True, random_state=random_seed)
            for train_idx, val_idx in cv.split(self.train_set, ):
                train_set_slice = self.train_set.iloc[train_idx]
                val_set_slice = self.train_set.iloc[val_idx]
                train_eegs_subs = np.concatenate(train_set_slice['sampling_index'].apply(lambda x: 
                                                                                        np.unique(
                                                                                            np.random.choice(x, size=sample_size, replace=True)
                                                                                        )
                                                                                    ).values
                                                )
                val_eegs_subs = np.concatenate(val_set_slice['sampling_index'].values)
                sampled_train = self.train_df.set_index('sampling_index').loc[train_eegs_subs].reset_index()
                sampled_val = self.train_df.set_index('sampling_index').loc[val_eegs_subs].sample(frac=0.1).reset_index()
                self.folds.append((sampled_train, sampled_val))
                print(f"Train fold size {sampled_train.shape} || Test fold size {sampled_val.shape}")
    
    def validate_model(self, model, params):
        batch_size = params.get('batch_size', 256)
        num_epochs = params.get('num_epochs', 5)
        patience = params.get('patience', 3)
        optimizer_params = params.get('optimizer_params', {'lr': 1e-2})
        scheduler_params = params.get('scheduler_params', {'mode': 'min', 'factor': 0.5, 'patience': patience, 'verbose': True})
        optimizer_name = params.get('optimizer_name', 'Adam')
        
        wandb_group = str(np.random.randint(0, 1000))
        
        if self.mode == 'patient':
            for sampled_train, sampled_val in self.folds:
                
                if self.use_wandb:
                    user_secrets = UserSecretsClient()
                    wandb_key = user_secrets.get_secret("Wandb_key")
                    os.environ['WANDB_API_KEY'] = wandb_key
                    wandb.init(project="HMS-HBAC", 
                               entity="asimandia",
                               group = wandb_group)
                
                train_dataloader = create_dataloader(sampled_train, batch_size=batch_size, shuffle=True)
                val_dataloader = create_dataloader(sampled_val, batch_size=batch_size, shuffle=False)
                
                model = model.to(device)
                optimizer = Adam(model.parameters(), **optimizer_params) if optimizer_name == 'Adam' else SGD(model.parameters(), **optimizer_params)
                scheduler = ReduceLROnPlateau(optimizer, **scheduler_params)
                criterion = nn.KLDivLoss(reduction='batchmean')
                
                best_val_loss = float('inf')
                no_improvement = 0
                
                if self.use_wandb:
                    wandb.config.update(params)
                
                for epoch in range(num_epochs):
                    model.train()
                    total_loss = 0.0
                    for batch in tqdm(train_dataloader):
                        eeg_data_10, eeg_data_20, eeg_data_30, eeg_data_40, eeg_data_50 = batch['eeg_data_10'].to(self.device), batch['eeg_data_20'].to(self.device), batch['eeg_data_30'].to(self.device), batch['eeg_data_40'].to(self.device), batch['eeg_data_50'].to(self.device)
                        labels = batch['labels'].to(self.device)
                        
                        optimizer.zero_grad()

                        output = model(eeg_data_10, eeg_data_20, eeg_data_30, eeg_data_40, eeg_data_50)

                        loss = criterion(F.log_softmax(output, dim=1), F.softmax(labels, dim=1))
                        
                        loss.backward()
                        optimizer.step()
                        
                        total_loss += loss.item()
                    
                    avg_loss = total_loss / len(train_dataloader)
                    
                    # Validation loop
                    model.eval()
                    total_val_loss = 0.0
                    for batch in tqdm(val_dataloader):
                        eeg_data_10, eeg_data_20, eeg_data_30, eeg_data_40, eeg_data_50 = batch['eeg_data_10'].to(self.device), batch['eeg_data_20'].to(self.device), batch['eeg_data_30'].to(self.device), batch['eeg_data_40'].to(self.device), batch['eeg_data_50'].to(self.device)
                        labels = batch['labels'].to(self.device)
                        
                        with torch.no_grad():
                            output = model(eeg_data_10, eeg_data_20, eeg_data_30, eeg_data_40, eeg_data_50)
                            val_loss = criterion(F.log_softmax(output, dim=1), F.softmax(labels, dim=1))
                        
                        total_val_loss += val_loss.item()
                    
                    avg_val_loss = total_val_loss / len(val_dataloader)
                    print(f"Average Loss: {avg_loss:.4f}\tEpoch {epoch+1}\tValidation Loss: {avg_val_loss:.4f}")
                    
                    # Log losses to WandB
                    if self.use_wandb:
                        wandb.log({"Train Loss": avg_loss, "Validation Loss": avg_val_loss})
                    
                    # Adjust learning rate based on validation loss
                    scheduler.step(avg_val_loss)
                    
                    # Check for early stopping
                    if avg_val_loss < best_val_loss:
                        best_val_loss = avg_val_loss
                        no_improvement = 0
                    else:
                        no_improvement += 1
                        if no_improvement >= patience:
                            print(f"No improvement for {patience} epochs. Early stopping.")
                            break
                
                if self.use_wandb:
                    wandb.log({"Best Validation Loss": best_val_loss})
                
                # Calculate score on test set
                test_loss = self.calculate_test_loss(model, val_dataloader, criterion)
                if self.use_wandb:
                    wandb.log({"Test Loss": test_loss})
    
    def calculate_test_loss(self, model, test_dataloader, criterion):
        model.eval()
        total_loss = 0.0
        for batch in tqdm(test_dataloader):
            eeg_data_10, eeg_data_20, eeg_data_30, eeg_data_40, eeg_data_50 = batch['eeg_data_10'].to(self.device), batch['eeg_data_20'].to(self.device), batch['eeg_data_30'].to(self.device), batch['eeg_data_40'].to(self.device), batch['eeg_data_50'].to(self.device)
            labels = batch['labels'].to(self.device)

            with torch.no_grad():
                output = model(eeg_data)
                loss = criterion(F.log_softmax(output, dim=1), F.softmax(labels, dim=1))

            total_loss += loss.item()

        avg_loss = total_loss / len(test_dataloader)
        return avg_loss


In [None]:
import torch
import torch.nn as nn

class ChannelwiseCNN1DModel(nn.Module):
    def __init__(self, 
                 input_dim, 
                 output_dim, 
                 num_filters=64, 
                 kernel_size=3, 
                 dropout=0.1, 
                 seq_length=10000,
                 max_pool_kernel_size=10,
                 num_conv_layers=2  # Number of additional convolutional layers
                ):
        super(ChannelwiseCNN1DModel, self).__init__()
        self.channelwise_conv_layers = nn.ModuleList()
        for _ in range(input_dim):
            self.channelwise_conv_layers.append(nn.Conv1d(in_channels=1,
                                                          out_channels=num_filters,
                                                          kernel_size=kernel_size))
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=max_pool_kernel_size)
        self.dropout = nn.Dropout(dropout)
        
        self.conv_layers = nn.ModuleList()
        self.conv_layers.append(nn.Conv1d(in_channels=input_dim * num_filters,
                                          out_channels=num_filters,
                                          kernel_size=kernel_size))
        for _ in range(num_conv_layers - 1):
            self.conv_layers.append(nn.Conv1d(in_channels=num_filters,
                                              out_channels=num_filters,
                                              kernel_size=kernel_size))
        
        # Adjusting the input size for the fully connected layer
        self.fc_input_size = self.calculate_fc_input_size(input_dim, num_filters, seq_length, max_pool_kernel_size, num_conv_layers)
        
        self.fc1 = nn.Linear(self.fc_input_size, output_dim)
    
    def calculate_fc_input_size(self, 
                                input_dim, 
                                num_filters, 
                                seq_length, 
                                max_pool_kernel_size, 
                                num_conv_layers):
        # Calculate the size of the tensor after passing through convolutions and pooling
        # This function is used to compute the input size for the fully connected layer
        x = torch.randn(1, seq_length, 20)  # Create a dummy input tensor
        batch_size, seq_len, input_channels = x.size()
        x_list = []

        for i in range(input_channels):  # Loop over input channels
            x_channel = x[:, :, i].unsqueeze(1)  # Get a single channel and add channel dimension
            x_channel = self.channelwise_conv_layers[i](x_channel)
            x_channel = self.relu(x_channel)
            x_channel = self.maxpool(x_channel)
            x_channel = self.dropout(x_channel)
            x_list.append(x_channel)
        
        x = torch.cat(x_list, dim=1)  # Concatenate along the channel dimension
        
        for conv_layer in self.conv_layers:
            x = conv_layer(x)
            x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout(x)
        x = torch.flatten(x, start_dim=1)  # Flatten along sequence_length dimension
#         x = self.fc1(x)
        return x.size(1)
    
    def forward(self, x, debug = False):
        batch_size, seq_len, input_channels = x.size()
        x_list = []

        for i in range(input_channels):  # Loop over input channels
            x_channel = x[:, :, i].unsqueeze(1)  # Get a single channel and add channel dimension
            x_channel = self.channelwise_conv_layers[i](x_channel)
            x_channel = self.relu(x_channel)
            x_channel = self.maxpool(x_channel)
            x_channel = self.dropout(x_channel)
            x_list.append(x_channel)
        
        x = torch.cat(x_list, dim=1)  # Concatenate along the channel dimension
        
        for conv_layer in self.conv_layers:
            x = conv_layer(x)
            x = self.relu(x)
        x = self.maxpool(x)
        x = self.dropout(x)
        x = torch.flatten(x, start_dim=1)  # Flatten along sequence_length dimension
        x = self.fc1(x)
        return x

## Написал слой внимания

In [None]:
class AttentionLayer(nn.Module):
    def __init__(self, enc_size, dec_size, hid_size, activ=torch.tanh):
        """ A layer that computes additive attention response and weights """
        super().__init__()
        self.enc_size = enc_size
        self.dec_size = dec_size
        self.hid_size = hid_size
        self.activ = activ       

        self.fc_enc = nn.Linear(enc_size, hid_size, bias=False)
        self.fc_dec = nn.Linear(dec_size, hid_size, bias=False)
        self.fc_att = nn.Linear(hid_size, 1, bias=False)


    def forward(self, enc, dec):

        logits_enc = self.fc_enc(enc)
        logits_dec = self.fc_dec(dec)
        logits = self.fc_att(self.activ(logits_enc + logits_dec.unsqueeze(1))).squeeze(-1)
        
        probs = nn.functional.softmax(logits, dim=-1).squeeze(1)
        
        attn = torch.einsum('ijkh,ijk->ikh', enc, probs)
        return attn

## Встроил слой внимания так, как я это вижу

In [None]:
class ChannelwiseCNN1DModelWithAtt(nn.Module):
    def __init__(self, 
                 input_dim, 
                 output_dim, 
                 num_filters=64, 
                 kernel_size=3, 
                 dropout=0.1, 
                 seq_length=10000,
                 max_pool_kernel_size=10,
                 num_conv_layers=2  # Number of additional convolutional layers
                ):
        super(ChannelwiseCNN1DModelWithAtt, self).__init__()
        
        self.channelwise_conv_layers = nn.ModuleList()
        
        for _ in range(input_dim):
            self.channelwise_conv_layers.append(nn.Conv1d(in_channels=1,
                                                          out_channels=num_filters,
                                                          kernel_size=kernel_size))
        
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=max_pool_kernel_size)
        self.dropout = nn.Dropout(dropout)
        
        self.conv_layers = nn.ModuleList()
        self.conv_layers.append(nn.Conv1d(in_channels=input_dim * num_filters,
                                          out_channels=num_filters,
                                          kernel_size=kernel_size))
        for _ in range(num_conv_layers - 1):
            self.conv_layers.append(nn.Conv1d(in_channels=num_filters,
                                              out_channels=num_filters,
                                              kernel_size=kernel_size))
            
            
        self.att_input_size, x, y = self.calculate_att_input_size(input_dim, num_filters, seq_length//5, max_pool_kernel_size, num_conv_layers)
        self.att = AttentionLayer(self.att_input_size, self.att_input_size, self.att_input_size*2)
        
        # Adjusting the input size for the fully connected layer
        self.fc_input_size = self.calculate_fc_input_size(x, y, input_dim, num_filters, seq_length//5, max_pool_kernel_size, num_conv_layers)
        
        self.fc1 = nn.Linear(self.fc_input_size, output_dim)
    
    def calculate_att_input_size(self,
                                input_dim,
                                num_filters,
                                seq_length,
                                max_pool_kernel_size,
                                num_conv_layers):
        
        x = torch.randn(1, seq_length, 20)  # Create a dummy input tensor
        y = torch.randn(1, seq_length, 20)  # Create dummy to use in attention
        batch_size, seq_len, input_channels = x.size()
        x_list = []
        y_list = []

        for i in range(input_channels):  # Loop over input channels
            x_channel = x[:, :, i].unsqueeze(1)  # Get a single channel and add channel dimension
            y_channel = y[:, :, i].unsqueeze(1)
            x_channel = self.channelwise_conv_layers[i](x_channel)
            y_channel = self.channelwise_conv_layers[i](y_channel)
            x_channel = self.relu(x_channel)
            y_channel = self.relu(y_channel)
            x_channel = self.maxpool(x_channel)
            y_channel = self.maxpool(y_channel)
            x_channel = self.dropout(x_channel)
            y_channel = self.dropout(y_channel)
            x_list.append(x_channel)
            y_list.append(y_channel)
        x = torch.cat(x_list, dim=1)  # Concatenate along the channel dimension
        
        x = x.unsqueeze(1).expand(-1, 4, -1, -1)
        y = torch.cat(y_list, dim=1)   
        
        return x.shape[-1], x, y
    
    def calculate_fc_input_size(self,
                                x, y, 
                                input_dim, 
                                num_filters, 
                                seq_length, 
                                max_pool_kernel_size, 
                                num_conv_layers):
        
        x = self.att(x, y)
        y = y + x
        
        for conv_layer in self.conv_layers:
            y = conv_layer(y)
            y = self.relu(y)
        y = self.maxpool(y)
        y = self.dropout(y)
        y = torch.flatten(y, start_dim=1)
        
#         x = self.fc1(x)
        return y.size(1)
    
    def forward(self, x1, x2, x3, x4, x5, debug = False):
        x_data = [x1, x2, x3, x4, x5]
        x_res = []
        batch_size, seq_len, input_channels = x1.size()
        for x in x_data:
            x_list = []
            for i in range(input_channels):  # Loop over input channels
                x_channel = x[:, :, i].unsqueeze(1)  # Get a single channel and add channel dimension
                x_channel = self.channelwise_conv_layers[i](x_channel)
                x_channel = self.relu(x_channel)
                x_channel = self.maxpool(x_channel)
                x_channel = self.dropout(x_channel)
                x_list.append(x_channel)
            
            x = torch.cat(x_list, dim=1)  # Concatenate along the channel dimension
            x_res.append(x.unsqueeze(1))
        
        x3 = x_res.pop(2).squeeze(1)
        x_res = torch.cat(x_res, dim=1)
        
        x3 = self.att(x_res, x3)
            
        for conv_layer in self.conv_layers:
            x3 = conv_layer(x3)
            x3 = self.relu(x3)
        x3 = self.maxpool(x3)
        x3 = self.dropout(x3)
        x3 = torch.flatten(x3, start_dim=1)  # Flatten along sequence_length dimension
        x3 = self.fc1(x3)
        return x3

## Следующие 5 ячеек — я просто тестил, что все работает

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
for i in [3]:
    # Example usage:
    params = {
        'batch_size' : 180,
        'num_epochs' : 100,
        'patience' : 3, 
        'optimizer_params' : {
            'lr' : 10e-3,
        },
        'scheduler_params' : {'mode': 'min', 
                                  'factor': 0.5, 
                                  'patience': 3, 
                                  'verbose': True},
        'optimizer_name' : 'Adam',
        'num_conv_layers' : i
        
    }
    input_dim = 20  # Number of input channels
    output_dim = 6  # Number of output dimensions
    model = ChannelwiseCNN1DModelWithAtt(input_dim, 
                                  output_dim, 
                                  num_conv_layers=i).to(device)

In [None]:
model

In [None]:
rand_seed = 4545
validation_schema = ValidationSchema(debug=False, 
                                     random_seed = rand_seed, 
                                     nfolds=3, use_wandb=False, device=device)

In [None]:
dataset = EEGDataset(train_df)

In [None]:
x1, x2, x3, x4, x5 = torch.tensor(dataset[0]['eeg_data_10']), torch.tensor(dataset[0]['eeg_data_10']), torch.tensor(dataset[0]['eeg_data_10']), torch.tensor(dataset[0]['eeg_data_10']), torch.tensor(dataset[0]['eeg_data_10'])
model(x1.unsqueeze(0), x2.unsqueeze(0), x3.unsqueeze(0), x4.unsqueeze(0), x5.unsqueeze(0))

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for i in [2, 4, 6]:
    # Example usage:
    params = {
        'batch_size' : 180,
        'num_epochs' : 100,
        'patience' : 3, 
        'optimizer_params' : {
            'lr' : 10e-3,
        },
        'scheduler_params' : {'mode': 'min', 
                                  'factor': 0.5, 
                                  'patience': 3, 
                                  'verbose': True},
        'optimizer_name' : 'Adam',
        'num_conv_layers' : i
        
    }
    input_dim = 20  # Number of input channels
    output_dim = 6  # Number of output dimensions
    model = ChannelwiseCNN1DModelWithAtt(input_dim, 
                                  output_dim, 
                                  num_conv_layers=i).to(device)
    model = nn.DataParallel(model)

    # Example usage:
    # Create an instance of the ValidationSchema
    rand_seed = 4545
    validation_schema = ValidationSchema(debug=False, 
                                         random_seed = rand_seed, 
                                         nfolds=3, use_wandb=False, device=device)
    # Validate the model using the defined schema
    validation_schema.validate_model(model, 
                                     params
                                    )