# Fix seeds

In [1]:
import os
import torch
import random
import numpy as np

seed = 1996

random.seed(seed)

np.random.seed(seed)

torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

torch.backends.cudnn.determenistic = True
torch.use_deterministic_algorithms(True)

os.environ['PYTHONHASHSEED'] = str(seed)

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

print(device)

cuda:0


# Load train data

In [2]:
import pandas as pd
from tqdm import tqdm
from pathlib import Path

xs = []
ys = []
ys_ = []

traindir = Path('./data/train/')
print(traindir.absolute())


for datapath in tqdm([*sorted(traindir.glob('*_data.csv'))]):
    eventpath = datapath.parent / ( datapath.stem[:-5] + '_events.csv' )
    
    
    x = pd.read_csv(datapath)
    y = pd.read_csv(eventpath)

    x = x.iloc[:,1:].values
    y_ = y.iloc[:,1:].values

    xs.append(x.astype(np.float32))
    ys.append(y_.astype(np.uint8))

xs_train = xs[:-2]
ys_train = ys[:-2]

xs_valid = xs[-2:]
ys_valid = ys[-2:]

print(len(xs_train))


e:\GitHub\HQA_EEG\data\train


100%|██████████| 96/96 [00:34<00:00,  2.80it/s]

94





In [3]:
import ecgmentations as E

from torch.utils.data import DataLoader, Dataset

class EEGDataset(Dataset):
    def __init__(self, x, y, augs=dict, train=False):
        self.x = x
        self.y = y
        self.augs = augs
        
        self.train = train

    def __getitem__(self, idx):
        eeg = self.x[idx]
        mask = self.y[idx]

        if self.train:
            length = mask.shape[0]

            size = 5000
            smask = (np.sum(mask[:-size], axis=1) > 0).astype(np.uint8)
            smask = smask * 5 + 1
            p = smask / smask.sum()

            jdx = np.random.choice(length-size, p=p)

            eeg = eeg[jdx:jdx+size]
            mask = mask[jdx:jdx+size]

        auged = self.augs(ecg=eeg, mask=mask)
        eeg, mask = auged['ecg'], auged['mask']

        return eeg.T, mask.T

    def __len__(self):
        return len(self.x)

augs = E.Sequential([
    E.TimeCrop(length=5000, p=1.0),
])

dataset = EEGDataset(xs_train, ys_train, augs, True) 

train_dataloader = DataLoader(dataset, batch_size=25, num_workers=3, shuffle=True)

dataset = EEGDataset(xs_train, ys_train) 
train_dataloader_ = DataLoader(dataset, batch_size=32, num_workers=3, shuffle=False)

dataset = EEGDataset(xs_valid, ys_valid)
valid_dataloader = DataLoader(dataset, batch_size=32, num_workers=3, shuffle=False)



# Train model

In [4]:
print(dataset.x[0].shape)

(128210, 32)


In [5]:
for i in train_dataloader_:
    print(i)
    break

In [5]:
import copy
import torch.nn.functional as F

from sklearn import metrics
from nnspt.segmentation.unet import Unet

model = Unet(in_channels=32, out_channels=6, encoder='timm-efficientnet-b1')
model.to(device)
print("9")
nepochs = 10

opt = torch.optim.AdamW(model.parameters(), lr=0.00175)
shed = torch.optim.lr_scheduler.CosineAnnealingLR(opt, nepochs*len(train_dataloader))

loss_his, train_loss = [], []
print("16")
best_score = 0.
best_state_dict = copy.deepcopy(model.state_dict())
print("19")

for i, (eeg_batch, mask_batch) in enumerate(train_dataloader):
    print("22")
    eeg_batch, mask_batch = eeg_batch.to(device), mask_batch.to(device)
    print("24")
    logits = model(eeg_batch)
    print("26")
    loss = F.binary_cross_entropy_with_logits(logits, mask_batch.float())
    print("28")
    loss.backward()
    print("30")
    opt.step()
    print("32")
    shed.step()
    print("34")
    opt.zero_grad()
    print("36")
    train_loss.append(loss.item())
    print("38")

  from .autonotebook import tqdm as notebook_tqdm


9
16
19


In [None]:
import copy
import torch.nn.functional as F

from sklearn import metrics
from nnspt.segmentation.unet import Unet

model = Unet(in_channels=32, out_channels=6, encoder='timm-efficientnet-b1')
model.to(device)
print("9")
nepochs = 10

opt = torch.optim.AdamW(model.parameters(), lr=0.00175)
shed = torch.optim.lr_scheduler.CosineAnnealingLR(opt, nepochs*len(train_dataloader))

loss_his, train_loss = [], []
print("16")
best_score = 0.
best_state_dict = copy.deepcopy(model.state_dict())
print("19")
for epoch in range(nepochs):
    model.train()
    print("22")
    for i, (eeg_batch, mask_batch) in enumerate(train_dataloader):
        eeg_batch, mask_batch = eeg_batch.to(device), mask_batch.to(device)

        logits = model(eeg_batch)
        loss = F.binary_cross_entropy_with_logits(logits, mask_batch.float())
        loss.backward()

        opt.step()
        shed.step()
        opt.zero_grad()

        train_loss.append(loss.item())
        print("35")
    if (epoch + 1) % 25 == 0:
        loss_his.append(np.mean(train_loss))
        train_loss.clear()

        print('[Epoch {}/{}] [Loss: {}]'.format(epoch+1, nepochs, loss_his[-1]))
        
        model.eval()

        y_pred = []

        size = 10000
        
        for eeg_batch, _ in tqdm(valid_dataloader):
            for idx in range((eeg_batch.shape[-1] + size - 1) // size):
                with torch.no_grad():
                    eeg_batch_ = eeg_batch[:, :, idx*size: (idx+1)*size].to(device)

                    logits = model(eeg_batch_)
                    probs = torch.sigmoid(logits).cpu().numpy()[0]

                    y_pred.append(probs)

        y_pred = np.concatenate(y_pred, axis=1).T
        y_true = np.concatenate(ys_valid, axis=0)

        score = metrics.roc_auc_score(y_true, y_pred)

        print('[Epoch {}/{}] [Score: {}]'.format(epoch+1, nepochs, score))

        if score > best_score:
            best_score = score
            best_state_dict = copy.deepcopy(model.state_dict())
    print("68")
model.load_state_dict(best_state_dict)

# Score on train part

In [None]:
import matplotlib.pyplot as plt

def plot_roc(y_true, y_pred):
    fig, axs = plt.subplots(3, 2, figsize=(15, 13))

    for i, label in enumerate(labels):
        fpr, tpr, _ = metrics.roc_curve(y_true[i], y_pred[i])
        ax = axs[i//2, i%2]
        ax.plot(fpr, tpr)
        ax.set_title(label + ' ROC')
        ax.plot([0, 1], [0, 1], 'k--')

    plt.show()

In [None]:
model.eval()

y_pred = []

size = 10000

for eeg_batch, _ in tqdm(train_dataloader_):
    for idx in range((eeg_batch.shape[-1] + size - 1) // size):
        with torch.no_grad():
            eeg_batch_ = eeg_batch[:, :, idx*size: (idx+1)*size].to(device)

            logits = model(eeg_batch_)
            probs = torch.sigmoid(logits).cpu().numpy()[0]

            y_pred.append(probs)

y_pred = np.concatenate(y_pred, axis=1).T
y_true = np.concatenate(ys_train, axis=0)

plot_roc(y_true.T, y_pred.T)

print('roc auc: ', metrics.roc_auc_score(y_true, y_pred))

# Score on val part

In [None]:
model.eval()

y_pred = []

size = 10000

for eeg_batch, _ in tqdm(valid_dataloader):
    for idx in range((eeg_batch.shape[-1] + size - 1) // size):
        with torch.no_grad():
            eeg_batch_ = eeg_batch[:, :, idx*size: (idx+1)*size].to(device)

            logits = model(eeg_batch_)
            probs = torch.sigmoid(logits).cpu().numpy()[0]

            y_pred.append(probs)

y_pred = np.concatenate(y_pred, axis=1).T
y_true = np.concatenate(ys_valid, axis=0)

plot_roc(y_true.T, y_pred.T)

print('roc auc: ', metrics.roc_auc_score(y_true, y_pred))

# Create submission

In [None]:
import pandas as pd
from tqdm import tqdm

xs_test = []
lengths = {}

testdir = Path('test')

FNAME = 'subj{}_series{}_{}.csv'

for subj in range(1, 13):
    for series in [9, 10]:
        datapath = testdir / FNAME.format(subj, series, 'data')

        x = pd.read_csv(datapath)
        x = x.iloc[:,1:].values

        xs_test.append(x.astype(np.float32))
        lengths['{}_{}'.format(subj, series)] = xs_test[-1].shape[0]

In [None]:
class EEGDatasetTest(Dataset):
    def __init__(self, x):
        self.x = x

    def __getitem__(self, idx):
        eeg = self.x[idx]

        return eeg.T

    def __len__(self):
        return len(self.x)

dataset = EEGDatasetTest(xs_test)
test_dataloader = DataLoader(dataset, batch_size=1, num_workers=3, shuffle=False)

In [None]:
model.eval()

y_pred = []

size = 10000

for eeg_batch in tqdm(test_dataloader):
    for idx in range((eeg_batch.shape[-1] + size - 1) // size):
        with torch.no_grad():
            eeg_batch_ = eeg_batch[:, :, idx*size: (idx+1)*size].to(device)

            logits = model(eeg_batch_)
            probs = torch.sigmoid(logits).cpu().numpy()[0]

            y_pred.append(probs)

y_pred = np.concatenate(y_pred, axis=1).T

In [None]:
submission = pd.DataFrame(y_pred, index=['subj{}_series{}_{}'.format(sbj, i, j) for sbj in range(1, 13) for i in [9, 10] for j in range(lengths['{}_{}'.format(sbj, i)])], columns=labels)
submission.to_csv('Submission.csv', index_label='id', float_format='%.3f')

submission.tail()

In [None]:
!head Submission.csv

In [None]:
!head sample_submission.csv