# Deep Learning 基礎講座　最終課題: 脳波分類

## 概要
被験者が画像を見ているときの脳波から，その画像がどのカテゴリに属するかを分類するタスク．
- サンプル数: 訓練 118,800 サンプル，検証 59,400 サンプル，テスト 59,400 サンプル
- クラス数: 5
- 入力: 脳波データ（チャンネル数 x 系列長）
- 出力: 対応する画像のクラス
- 評価指標: Top-1 accuracy

### 元データセット ([Gifford2022 EEG dataset](https://osf.io/3jk45/)) との違い

- 本コンペでは難易度調整の目的で元データセットにいくつかの改変を加えています．

1. 訓練セットのみの使用
  - 元データセットでは訓練データに存在しなかったクラスの画像を見ているときの脳波においてテストが行われますが，これは難易度が非常に高くなります．
  - 本コンペでは元データセットの訓練セットを再分割し，訓練時に存在した画像に対応する別の脳波において検証・テストを行います．

2. クラス数の減少
  - 元データセット（の訓練セット）では16,540枚の画像に対し，1,654のクラスが存在します．
    - e.g. `aardvark`, `alligator`, `almond`, ...
  - 本コンペでは1,654のクラスを，`animal`, `food`, `clothing`, `tool`, `vehicle`の5つにまとめています．
    - e.g. `aardvark -> animal`, `alligator -> animal`, `almond -> food`, ...

### 考えられる工夫の例

- 音声モデルの導入
  - 脳波と同じ波である音声を扱うアーキテクチャを用いることが有効であると知られています．
  - 例）Conformer [[Gulati+ 2020](https://arxiv.org/abs/2005.08100)]
- 画像データを用いた事前学習
  - 本コンペのタスクは脳波のクラス分類ですが，配布してある画像データを脳波エンコーダの事前学習に用いることを許可します．
  - 例）CLIP [Radford+ 2021]
  - 画像を用いる場合は[こちら](https://osf.io/download/3v527/)からダウンロードしてください．
- 過学習を防ぐ正則化やドロップアウト


## 修了要件を満たす条件
- ベースラインモデルのbest test accuracyは38.7%となります．**これを超えた提出のみ，修了要件として認めます**．
- ベースラインから改善を加えることで，55%までは性能向上することを運営で確認しています．こちらを 1 つの指標として取り組んでみてください．

## 注意点
- 学習するモデルについて制限はありませんが，必ず訓練データで学習したモデルで予測してください．
    - 事前学習済みモデルを利用して，訓練データを fine-tuning しても構いません．
    - 埋め込み抽出モデルなど，モデルの一部を訓練しないケースは構いません．
    - 学習を一切せずに，ChatGPT などの基盤モデルを利用することは禁止とします．

## 1.準備

In [1]:
# omnicampus 実行用
!pip install ipywidgets

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting ipywidgets
  Downloading ipywidgets-8.1.7-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Downloading widgetsnbextension-4.0.14-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Downloading jupyterlab_widgets-3.0.15-py3-none-any.whl (216 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m216.6/216.6 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: widgetsnbextension, jupyterlab_widgets, ipywidgets
Successfully installed ipywidgets-8.1.7 jupyterlab_widgets-3.0.15 widgetsnbextension-4.0.14
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][

In [2]:
# pip install "timm>=0.9.2" "torch>=2.0,<2.1" "torchvision>=0.15,<0.16"
# pip install optuna
# pip install tsaug

In [2]:
# ライブラリのインポートとシード固定
import os, sys
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from einops.layers.torch import Rearrange
from einops import repeat
from glob import glob
from termcolor import cprint
from tqdm.notebook import tqdm
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
from einops.layers.torch import Rearrange
from einops import rearrange, repeat
from timm.layers import DropPath
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.model_selection import StratifiedKFold
import optuna
from tsaug import TimeWarp, AddNoise, Reverse, Dropout as TsaugDropout


SEED = 0
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
class SubjectNormalizer:
    def __init__(self):
        self.stats = {}

    def fit(self, eegs, subject_idxs):
        subject_idxs = torch.tensor(subject_idxs)  
        for subj in torch.unique(subject_idxs):
            mask = subject_idxs == subj
            data = torch.tensor(eegs[mask])  
            mean = data.mean(dim=(0, 2), keepdim=True)
            std = data.std(dim=(0, 2), keepdim=True) + 1e-6
            self.stats[int(subj)] = (mean, std)

    def transform(self, eegs, subject_idxs):
        subject_idxs = torch.tensor(subject_idxs)  
        eegs = torch.tensor(eegs, dtype=torch.float32)  
        normed = torch.zeros_like(eegs)
        for i, subj in enumerate(subject_idxs):
            mean, std = self.stats[int(subj)]
            normed[i] = (eegs[i] - mean) / std
        return normed

In [4]:
DATA_DIR   = "./data/"
DEVICE     = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 256
EPOCHS     = 30
TRIALS     = 15
TTA_LOOPS  = 3

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

AUG = (
    TimeWarp(n_speed_change=5, max_speed_ratio=2.0) @ 0.5
  + AddNoise(scale=0.02) @ 0.5
  + Reverse() @ 0.3
  + TsaugDropout(p=0.1, size=10) @ 0.5
)
def tsaugment(x: torch.Tensor) -> torch.Tensor:
    x_np = x.permute(1,0).unsqueeze(0).cpu().numpy()
    aug = AUG.augment(x_np).squeeze(0)
    return torch.from_numpy(aug).permute(1,0)

class EEGDataset(Dataset):
    def __init__(self, X, y=None, subj=None, augment=False):
        if isinstance(X, torch.Tensor):
            self.X = X.clone().detach().float()
        else:
            self.X = torch.tensor(X, dtype=torch.float32)

        self.y = None if y is None else torch.tensor(y, dtype=torch.long)
        self.subj = None if subj is None else torch.tensor(subj - 1, dtype=torch.long)
        self.augment = augment

    def __len__(self): return len(self.X)

    def __getitem__(self, i):
        x = self.X[i]
        if self.augment:
            x = tsaugment(x)
        if self.y is None:
            return x, self.subj[i]
        return x, self.y[i], self.subj[i]

def get_loaders():
    def load(split):
        X = np.load(os.path.join(DATA_DIR, f"{split}/eeg.npy"))
        subj = np.load(os.path.join(DATA_DIR, f"{split}/subject_idxs.npy"))
        y = None
        if split != 'test':
            y = np.load(os.path.join(DATA_DIR, f"{split}/labels.npy"))
        return X, y, subj

    X_tr, y_tr, s_tr = load('train')
    X_va, y_va, s_va = load('val')
    X_te, _,    s_te = load('test')

    norm = SubjectNormalizer(); norm.fit(X_tr, s_tr)
    X_tr = norm.transform(X_tr, s_tr)
    X_va = norm.transform(X_va, s_va)
    X_te = norm.transform(X_te, s_te)

    tr_dl = DataLoader(EEGDataset(X_tr, y_tr, s_tr, augment=True),  BATCH_SIZE, shuffle=True)
    va_dl = DataLoader(EEGDataset(X_va, y_va, s_va),               BATCH_SIZE*2)
    te_dl = DataLoader(EEGDataset(X_te,     None, s_te),           BATCH_SIZE*2)
    return tr_dl, va_dl, te_dl, norm

In [5]:
class ChannelSEAttention(nn.Module):
    def __init__(self, num_channels, reduction=16):
        super().__init__()
        self.fc1 = nn.Linear(num_channels, num_channels // reduction)
        self.fc2 = nn.Linear(num_channels // reduction, num_channels)
    def forward(self, x):  
        weights = x.mean(dim=2)             
        weights = F.relu(self.fc1(weights))
        weights = torch.sigmoid(self.fc2(weights)) 
        return x * weights.unsqueeze(-1)

class SubjectSpatialFilter(nn.Module):
    def __init__(self, num_subjects, num_channels):
        super().__init__()
        weight = torch.stack([torch.eye(num_channels) for _ in range(num_subjects)])
        self.register_parameter("weight", nn.Parameter(weight))
    def forward(self, x, subj_idx):
        W = self.weight[subj_idx]
        return torch.bmm(W, x)


class MultiScaleConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.branch1 = nn.Conv1d(in_channels, out_channels, kernel_size=5, padding=2)
        self.branch2 = nn.Conv1d(in_channels, out_channels, kernel_size=15, padding=7)
        self.branch3 = nn.Conv1d(in_channels, out_channels, kernel_size=31, padding=15)
        self.bn = nn.BatchNorm1d(out_channels * 3)
        self.act = nn.GELU()
    def forward(self, x):  
        x1 = self.branch1(x)
        x2 = self.branch2(x)
        x3 = self.branch3(x)
        out = torch.cat([x1, x2, x3], dim=1)  
        return self.act(self.bn(out))

class MultiHeadAttention(nn.Module):
    def __init__(self, emb_dim, num_heads, dropout=0.1):
        super().__init__()
        self.num_heads = num_heads
        self.head_dim = emb_dim // num_heads
        self.qkv = nn.Linear(emb_dim, emb_dim * 3)
        self.att_drop = nn.Dropout(dropout)
        self.proj = nn.Linear(emb_dim, emb_dim)
        self.proj_drop = nn.Dropout(dropout)
    def forward(self, x):  # [B, T, C]
        B, T, C = x.shape
        qkv = self.qkv(x).reshape(B, T, 3, self.num_heads, self.head_dim).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]
        att = (q @ k.transpose(-2, -1)) * (1.0 / (self.head_dim ** 0.5))
        att = F.softmax(att, dim=-1)
        att = self.att_drop(att)
        x = (att @ v).transpose(1, 2).reshape(B, T, C)
        x = self.proj(x)
        return self.proj_drop(x)

class ConformerBlock(nn.Module):
    def __init__(self, emb_dim, num_heads, kernel_size=31, dropout=0.1):
        super().__init__()
        self.ffn1 = nn.Sequential(
            nn.LayerNorm(emb_dim),
            nn.Linear(emb_dim, emb_dim * 4),
            nn.SiLU(),
            nn.Dropout(dropout),
            nn.Linear(emb_dim * 4, emb_dim),
            nn.Dropout(dropout)
        )
        self.attn = nn.Sequential(
            nn.LayerNorm(emb_dim),
            MultiHeadAttention(emb_dim, num_heads, dropout)
        )
        self.conv = nn.Sequential(
            nn.LayerNorm(emb_dim),
            Rearrange('b t c -> b c t'),
            nn.Conv1d(emb_dim, 2 * emb_dim, kernel_size, padding=kernel_size // 2, groups=emb_dim),
            nn.GLU(dim=1),
            nn.BatchNorm1d(emb_dim),
            nn.SiLU(),
            nn.Conv1d(emb_dim, emb_dim, 1),
            Rearrange('b c t -> b t c'),
            nn.Dropout(dropout)
        )
        self.ffn2 = nn.Sequential(
            nn.LayerNorm(emb_dim),
            nn.Linear(emb_dim, emb_dim * 4),
            nn.SiLU(),
            nn.Dropout(dropout),
            nn.Linear(emb_dim * 4, emb_dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        x = x + self.ffn1(x)
        x = x + self.attn(x)
        x = x + self.conv(x)
        x = x + self.ffn2(x)
        return x

class EEGEnhancedModel(nn.Module):
    def __init__(self, num_classes, num_subjects, num_channels, seq_len, emb_dim=256, depth=6):
        super().__init__()
        self.subject_filter = SubjectSpatialFilter(num_subjects, num_channels)
        self.channel_att = ChannelSEAttention(num_channels)
        self.temporal = MultiScaleConvBlock(num_channels, emb_dim // 3)
        self.project = nn.Conv1d(emb_dim, emb_dim, 1)
        self.pos_emb = nn.Parameter(torch.randn(1, seq_len, emb_dim))
        self.blocks = nn.Sequential(*[ConformerBlock(emb_dim, num_heads=8) for _ in range(depth)])
        self.cls_token = nn.Parameter(torch.randn(1, 1, emb_dim))
        self.norm = nn.LayerNorm(emb_dim)
        self.head = nn.Linear(emb_dim, num_classes)

    def forward(self, x, subj_idx):  
        x = self.subject_filter(x, subj_idx)
        x = self.channel_att(x)
        x = self.temporal(x)
        x = self.project(x)
        x = rearrange(x, 'b c t -> b t c')
        b, t, e = x.shape
        cls = self.cls_token.expand(b, -1, -1)
        x = torch.cat([cls, x + self.pos_emb[:, :t]], dim=1)
        x = self.blocks(x)
        x = self.norm(x[:, 0])
        return self.head(x)

In [6]:
from torch.cuda.amp import GradScaler, autocast
optuna.logging.set_verbosity(optuna.logging.INFO)

def optuna_search():
    tr_dl, va_dl, _, norm = get_loaders()
    scaler = GradScaler()

    def objective(trial):
        emb   = trial.suggest_categorical("emb_dim", [312, 384, 480])
        depth = trial.suggest_int("depth", 5, 10)
        lr    = trial.suggest_float("lr", 3e-4, 1e-3, log=True)
        sm    = trial.suggest_float("smooth", 0.0, 0.15)

        print(f"Trying: emb={emb}, depth={depth}, lr={lr:.5f}, smooth={sm:.3f}")

        model = EEGEnhancedModel(5, 10, 17, 100, emb, depth).to(DEVICE)
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
        scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer, max_lr=lr, epochs=3, steps_per_epoch=len(tr_dl)
        )
        criterion = nn.CrossEntropyLoss(label_smoothing=sm)

        for epoch in range(3):
            model.train()
            print(f"  Epoch {epoch+1}/3")
            for i, (x, y, s) in enumerate(tr_dl):
                x, y, s = x.to(DEVICE), y.to(DEVICE), s.to(DEVICE)
                optimizer.zero_grad()
                with autocast():
                    loss = criterion(model(x, s), y)
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                scheduler.step()
                if i == 0:
                    print(f"    Batch 1: loss={loss.item():.4f}")

            # validation
            model.eval(); correct, total = 0, 0
            with torch.no_grad():
                for x, y, s in va_dl:
                    x, y, s = x.to(DEVICE), y.to(DEVICE), s.to(DEVICE)
                    with autocast():
                        pred = model(x, s).argmax(1)
                    correct += (pred == y).sum().item()
                    total += y.size(0)
            val_acc = correct / total
            print(f"    Val Acc: {val_acc:.4f}")

            trial.report(val_acc, epoch)
            if trial.should_prune():
                print("    Trial pruned.")
                raise optuna.TrialPruned()

        return val_acc

    study = optuna.create_study(
        direction="maximize",
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=1)
    )
    study.optimize(objective, n_trials=TRIALS)
    print("Best params:", study.best_params)
    return study.best_params

In [7]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=True):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, val_acc):
        score = val_acc
        if self.best_score is None:
            self.best_score = score
        elif score <= self.best_score:
            self.counter += 1
            if self.verbose:
                print(f"  ⏸ No improvement. {self.counter}/{self.patience} patience used.")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            if self.verbose and self.counter > 0:
                print("Accuracy improved, resetting patience.")
            self.best_score = score
            self.counter = 0


In [10]:
from itertools import islice
import pickle

def train_full_and_save(params, n_splits: int = 5):
    scaler = GradScaler()
    X_all = np.concatenate([
        np.load(DATA_DIR + "train/eeg.npy"),
        np.load(DATA_DIR + "val/eeg.npy")
    ])
    y_all = np.concatenate([
        np.load(DATA_DIR + "train/labels.npy"),
        np.load(DATA_DIR + "val/labels.npy")
    ])
    s_all = np.concatenate([
        np.load(DATA_DIR + "train/subject_idxs.npy"),
        np.load(DATA_DIR + "val/subject_idxs.npy")
    ])

    kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    for fold, (tr_idx, va_idx) in islice(enumerate(kfold.split(X_all, y_all)), 3):
        print(f"\n=== Fold {fold} / 2 ===")

        model_path = f"model_fold{fold}.pth"
        ckpt_path  = f"checkpoint_fold{fold}.pt"

        if os.path.exists(model_path):
            print(f"Fold {fold} already trained. Skipping.")
            continue
            
        X_tr, y_tr, s_tr = X_all[tr_idx], y_all[tr_idx], s_all[tr_idx]
        X_va, y_va, s_va = X_all[va_idx], y_all[va_idx], s_all[va_idx]

        start_epoch = 0
        if os.path.exists(ckpt_path):
            print(f"Resuming from checkpoint: {ckpt_path}")
            ckpt = torch.load(ckpt_path)
            model = EEGEnhancedModel(5, 10, 17, 100, params['emb_dim'], params['depth']).to(DEVICE)
            model.load_state_dict(ckpt['model_state_dict'])
            opt = torch.optim.AdamW(model.parameters(), lr=params['lr'])
            opt.load_state_dict(ckpt['optimizer_state_dict'])
            sch = torch.optim.lr_scheduler.OneCycleLR(opt, max_lr=params['lr'], epochs=EPOCHS, steps_per_epoch=len(X_tr)//BATCH_SIZE)
            sch.load_state_dict(ckpt['scheduler_state_dict'])
            es = ckpt['early_stopping']
            norm = ckpt['norm']
            start_epoch = ckpt['epoch'] + 1
        else:
            model = EEGEnhancedModel(5, 10, 17, 100, params['emb_dim'], params['depth']).to(DEVICE)
            opt = torch.optim.AdamW(model.parameters(), lr=params['lr'])
            sch = torch.optim.lr_scheduler.OneCycleLR(opt, max_lr=params['lr'], epochs=EPOCHS, steps_per_epoch=len(X_tr)//BATCH_SIZE)
            crit = nn.CrossEntropyLoss(label_smoothing=params['smooth'])
            es = EarlyStopping(patience=3)
            norm = SubjectNormalizer()
            norm.fit(X_tr, s_tr)

        X_tr = norm.transform(X_tr, s_tr)
        X_va = norm.transform(X_va, s_va)

        tr_dl = DataLoader(EEGDataset(X_tr, y_tr, s_tr, augment=True), BATCH_SIZE, shuffle=True)
        va_dl = DataLoader(EEGDataset(X_va, y_va, s_va), BATCH_SIZE*2)

        crit = nn.CrossEntropyLoss(label_smoothing=params['smooth'])

        for epoch in range(start_epoch, EPOCHS):
            model.train()
            for x, y, sb in tr_dl:
                x, y, sb = x.to(DEVICE), y.to(DEVICE), sb.to(DEVICE)
                opt.zero_grad()
                with autocast():
                    loss = crit(model(x, sb), y)
                scaler.scale(loss).backward()
                scaler.step(opt)
                scaler.update()
                sch.step()

            model.eval(); correct = total = 0
            with torch.no_grad():
                for x, y, sb in va_dl:
                    x, y, sb = x.to(DEVICE), y.to(DEVICE), sb.to(DEVICE)
                    with autocast():
                        pred = model(x, sb).argmax(1)
                    correct += (pred == y).sum().item()
                    total += y.size(0)
            val_acc = correct / total
            print(f"Fold {fold}  Epoch {epoch+1}  ValAcc {val_acc:.4f}")

            if es.best_score is None or val_acc > es.best_score:
                torch.save(model.state_dict(), model_path)
                print(" New best model saved.")

            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': opt.state_dict(),
                'scheduler_state_dict': sch.state_dict(),
                'early_stopping': es,
                'norm': norm,
            }, ckpt_path)

            es(val_acc)
            if es.early_stop:
                print(" EarlyStopping.")
                break

In [8]:
if __name__ == '__main__':
    best = optuna_search()
    model,norm = train_full_and_save(best)

[I 2025-07-18 06:16:48,673] A new study created in memory with name: no-name-ed64548d-8d6e-4cfd-bec5-a761457a7bce


Trying: emb=384, depth=10, lr=0.00070, smooth=0.031
  Epoch 1/3
    Batch 1: loss=1.6960
    Val Acc: 0.4008
  Epoch 2/3
    Batch 1: loss=1.5079
    Val Acc: 0.4742
  Epoch 3/3
    Batch 1: loss=1.3836


[I 2025-07-18 06:44:08,429] Trial 0 finished with value: 0.4857575757575758 and parameters: {'emb_dim': 384, 'depth': 10, 'lr': 0.0006958322562772423, 'smooth': 0.03104644135977158}. Best is trial 0 with value: 0.4857575757575758.


    Val Acc: 0.4858
Trying: emb=312, depth=5, lr=0.00043, smooth=0.094
  Epoch 1/3
    Batch 1: loss=1.5869
    Val Acc: 0.3880
  Epoch 2/3
    Batch 1: loss=1.4819
    Val Acc: 0.4614
  Epoch 3/3
    Batch 1: loss=1.4174


[I 2025-07-18 07:02:04,118] Trial 1 finished with value: 0.47734006734006734 and parameters: {'emb_dim': 312, 'depth': 5, 'lr': 0.00042937377820675114, 'smooth': 0.09448069968796559}. Best is trial 0 with value: 0.4857575757575758.


    Val Acc: 0.4773
Trying: emb=384, depth=5, lr=0.00050, smooth=0.139
  Epoch 1/3
    Batch 1: loss=1.6207
    Val Acc: 0.3958
  Epoch 2/3
    Batch 1: loss=1.4976
    Val Acc: 0.4577
  Epoch 3/3
    Batch 1: loss=1.3909


[I 2025-07-18 07:21:25,521] Trial 2 finished with value: 0.48175084175084176 and parameters: {'emb_dim': 384, 'depth': 5, 'lr': 0.0005011371630416184, 'smooth': 0.13934316911686168}. Best is trial 0 with value: 0.4857575757575758.


    Val Acc: 0.4818
Trying: emb=384, depth=6, lr=0.00050, smooth=0.122
  Epoch 1/3
    Batch 1: loss=1.7988
    Val Acc: 0.4033
  Epoch 2/3
    Batch 1: loss=1.5455
    Val Acc: 0.4539
  Epoch 3/3
    Batch 1: loss=1.4357


[I 2025-07-18 07:42:23,380] Trial 3 finished with value: 0.480976430976431 and parameters: {'emb_dim': 384, 'depth': 6, 'lr': 0.0004961663935629247, 'smooth': 0.12232456139678108}. Best is trial 0 with value: 0.4857575757575758.


    Val Acc: 0.4810
Trying: emb=384, depth=8, lr=0.00035, smooth=0.095
  Epoch 1/3
    Batch 1: loss=1.9041
    Val Acc: 0.3872
  Epoch 2/3
    Batch 1: loss=1.5044
    Val Acc: 0.4513
  Epoch 3/3
    Batch 1: loss=1.4275


[I 2025-07-18 08:06:35,996] Trial 4 finished with value: 0.47496632996633 and parameters: {'emb_dim': 384, 'depth': 8, 'lr': 0.0003545453772150599, 'smooth': 0.09529970552183809}. Best is trial 0 with value: 0.4857575757575758.


    Val Acc: 0.4750
Trying: emb=384, depth=9, lr=0.00072, smooth=0.089
  Epoch 1/3
    Batch 1: loss=1.7601
    Val Acc: 0.4072
  Epoch 2/3
    Batch 1: loss=1.5056
    Val Acc: 0.4591
  Epoch 3/3
    Batch 1: loss=1.4547


[I 2025-07-18 08:32:23,057] Trial 5 pruned. 


    Val Acc: 0.4773
    Trial pruned.
Trying: emb=480, depth=6, lr=0.00039, smooth=0.008
  Epoch 1/3
    Batch 1: loss=1.8071
    Val Acc: 0.2925
  Epoch 2/3
    Batch 1: loss=1.6032


[I 2025-07-18 08:47:54,900] Trial 6 pruned. 


    Val Acc: 0.4493
    Trial pruned.
Trying: emb=480, depth=8, lr=0.00072, smooth=0.021
  Epoch 1/3
    Batch 1: loss=1.5438
    Val Acc: 0.3873
  Epoch 2/3
    Batch 1: loss=1.4591


[I 2025-07-18 09:06:07,174] Trial 7 pruned. 


    Val Acc: 0.4354
    Trial pruned.
Trying: emb=312, depth=7, lr=0.00063, smooth=0.060
  Epoch 1/3
    Batch 1: loss=1.7402
    Val Acc: 0.4181
  Epoch 2/3
    Batch 1: loss=1.4377
    Val Acc: 0.4793
  Epoch 3/3
    Batch 1: loss=1.4170


[I 2025-07-18 09:26:39,738] Trial 8 finished with value: 0.4881144781144781 and parameters: {'emb_dim': 312, 'depth': 7, 'lr': 0.0006324137498211563, 'smooth': 0.05974557224801802}. Best is trial 8 with value: 0.4881144781144781.


    Val Acc: 0.4881
Trying: emb=480, depth=10, lr=0.00055, smooth=0.077
  Epoch 1/3
    Batch 1: loss=1.6213
    Val Acc: 0.3493
  Epoch 2/3
    Batch 1: loss=1.5823


[I 2025-07-18 09:47:13,191] Trial 9 pruned. 


    Val Acc: 0.4568
    Trial pruned.
Trying: emb=312, depth=7, lr=0.00096, smooth=0.051
  Epoch 1/3
    Batch 1: loss=1.5742
    Val Acc: 0.3945
  Epoch 2/3
    Batch 1: loss=1.4640


[I 2025-07-18 10:00:39,843] Trial 10 pruned. 


    Val Acc: 0.4489
    Trial pruned.
Trying: emb=312, depth=10, lr=0.00069, smooth=0.044
  Epoch 1/3
    Batch 1: loss=1.7942
    Val Acc: 0.3888
  Epoch 2/3
    Batch 1: loss=1.4405
    Val Acc: 0.4633
  Epoch 3/3
    Batch 1: loss=1.3762


[I 2025-07-18 10:24:45,095] Trial 11 pruned. 


    Val Acc: 0.4811
    Trial pruned.
Trying: emb=312, depth=7, lr=0.00093, smooth=0.037
  Epoch 1/3
    Batch 1: loss=1.6889
    Val Acc: 0.4149
  Epoch 2/3
    Batch 1: loss=1.4738
    Val Acc: 0.4619
  Epoch 3/3
    Batch 1: loss=1.3862


[I 2025-07-18 10:44:52,342] Trial 12 finished with value: 0.4874242424242424 and parameters: {'emb_dim': 312, 'depth': 7, 'lr': 0.0009250288919173885, 'smooth': 0.0372371512336716}. Best is trial 8 with value: 0.4881144781144781.


    Val Acc: 0.4874
Trying: emb=312, depth=7, lr=0.00095, smooth=0.056
  Epoch 1/3
    Batch 1: loss=1.8189
    Val Acc: 0.3968
  Epoch 2/3
    Batch 1: loss=1.4981


[I 2025-07-18 10:58:19,070] Trial 13 pruned. 


    Val Acc: 0.4508
    Trial pruned.
Trying: emb=312, depth=6, lr=0.00030, smooth=0.000
  Epoch 1/3
    Batch 1: loss=1.9150
    Val Acc: 0.4037
  Epoch 2/3
    Batch 1: loss=1.4214


[I 2025-07-18 11:10:55,141] Trial 14 pruned. 


    Val Acc: 0.4502
    Trial pruned.
Best params: {'emb_dim': 312, 'depth': 7, 'lr': 0.0006324137498211563, 'smooth': 0.05974557224801802}

=== Fold 0 / 5 ===
Fold 0  Epoch 1  ValAcc 0.3872


AttributeError: 'EarlyStopping' object has no attribute 'best_acc'

In [11]:
if __name__ == '__main__':
    best = {
    'emb_dim': 312,
    'depth': 7,
    'lr': 0.0006324137498211563,
    'smooth': 0.05974557224801802
    }
    
    train_full_and_save(best)


=== Fold 0 / 2 ===
Fold 0  Epoch 1  ValAcc 0.3872
 New best model saved.
Fold 0  Epoch 2  ValAcc 0.4012
 New best model saved.
Fold 0  Epoch 3  ValAcc 0.4554
 New best model saved.
Fold 0  Epoch 4  ValAcc 0.4745
 New best model saved.
Fold 0  Epoch 5  ValAcc 0.4824
 New best model saved.
Fold 0  Epoch 6  ValAcc 0.4929
 New best model saved.
Fold 0  Epoch 7  ValAcc 0.4798
  ⏸ No improvement. 1/3 patience used.
Fold 0  Epoch 8  ValAcc 0.5078
 New best model saved.
Accuracy improved, resetting patience.
Fold 0  Epoch 9  ValAcc 0.5019
  ⏸ No improvement. 1/3 patience used.
Fold 0  Epoch 10  ValAcc 0.5149
 New best model saved.
Accuracy improved, resetting patience.
Fold 0  Epoch 11  ValAcc 0.5138
  ⏸ No improvement. 1/3 patience used.
Fold 0  Epoch 12  ValAcc 0.5165
 New best model saved.
Accuracy improved, resetting patience.
Fold 0  Epoch 13  ValAcc 0.5215
 New best model saved.
Fold 0  Epoch 14  ValAcc 0.5258
 New best model saved.
Fold 0  Epoch 15  ValAcc 0.5264
 New best model saved.

In [28]:
def tsaugment2(x: torch.Tensor) -> torch.Tensor:
    x_np = x.permute(0, 2, 1).cpu().numpy()
    
    aug_np = AUG.augment(x_np)  

    aug = torch.from_numpy(aug_np).permute(0, 2, 1).to(x.device)
    return aug

def predict_ensemble_tta(folds=[0, 1, 2]):
    X_test = np.load(DATA_DIR + "test/eeg.npy")
    s_test = np.load(DATA_DIR + "test/subject_idxs.npy")
    raw_dataset = EEGDataset(X_test, None, s_test)  
    raw_loader = DataLoader(raw_dataset, batch_size=BATCH_SIZE*2)

    all_logits = []

    for fold in folds:
        model_path = f"model_fold{fold}.pth"
        norm_path  = f"norm_fold{fold}.pkl"
        if not (os.path.exists(model_path) and os.path.exists(norm_path)):
            print(f"Skip Fold {fold}: model or norm not found.")
            continue

        model = EEGEnhancedModel(5, 10, 17, 100, emb_dim=best['emb_dim'], depth=best['depth']).to(DEVICE)
        model.load_state_dict(torch.load(model_path))
        model.eval()

        with open(norm_path, "rb") as f:
            norm = pickle.load(f)

        X_norm = norm.transform(X_test.copy(), s_test)
        test_ds = EEGDataset(X_norm, None, s_test)
        test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE*2)

        fold_logits = []
        with torch.no_grad():
            for X, subj in tqdm(test_loader, desc=f"TTA Fold {fold}"):
                X, subj = X.to(DEVICE), subj.to(DEVICE)
                logit_accum = torch.zeros((X.size(0), 5), device=DEVICE)
                for _ in range(TTA_LOOPS):
                    Xa = tsaugment2(X)
                    logit_accum += model(Xa, subj)
                logits = logit_accum / TTA_LOOPS
                fold_logits.append(logits.cpu())

        all_logits.append(torch.cat(fold_logits, dim=0))

    avg_logits = torch.stack(all_logits).mean(dim=0).numpy()
    np.save("submission", avg_logits)
    print(f"Saved: submission.npy (shape={avg_logits.shape})")

In [20]:
fold = 0 

ckpt = torch.load(f"checkpoint_fold{fold}.pt")

# norm を取得して保存
norm = ckpt['norm']
with open(f"norm_fold{fold}.pkl", "wb") as f:
    pickle.dump(norm, f)

print(f"norm_fold{fold}.pkl saved successfully.")


norm_fold0.pkl saved successfully.


In [24]:
fold = 1

ckpt = torch.load(f"checkpoint_fold{fold}.pt")

# norm を取得して保存
norm = ckpt['norm']
with open(f"norm_fold{fold}.pkl", "wb") as f:
    pickle.dump(norm, f)

print(f"norm_fold{fold}.pkl saved successfully.")


norm_fold1.pkl saved successfully.


In [29]:
if __name__ == '__main__':
    predict_ensemble_tta(folds=[0, 1])

TTA Fold 0:   0%|          | 0/117 [00:00<?, ?it/s]

TTA Fold 1:   0%|          | 0/117 [00:00<?, ?it/s]

Saved: submission.npy (shape=(59400, 5))


## 提出方法

以下の3点をzip化し，Omnicampusの「最終課題 (EEG)」から提出してください．

- `submission.npy`
- `model_last.pt`や`model_best.pt`など，テストに使用した重み（拡張子は`.pt`のみ）
- 本Colab Notebook

In [30]:
from zipfile import ZipFile

model_path1 = "norm_fold0.pkl"
model_path2 = "norm_fold1.pkl"
notebook_path = "optuna.ipynb"

with ZipFile("submission.zip", "w") as zf:
    zf.write("submission.npy")
    zf.write(model_path1)
    zf.write(model_path2)
    zf.write(notebook_path)