In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!cd /content/drive/MyDrive/11777-Project-xs

In [2]:
import numpy as np
import scipy as sp
from tqdm import tqdm
import torch

import sys
sys.path.append('/content/drive/MyDrive/11777-Project-xs/LGG/')
from prepare_data_DEAP import PrepareData
from cross_validation import CrossValidation
from train_model import *

In [3]:
# Define default parameters
default_args = {
    'dataset': 'DEAP',
    'data_path': './data_preprocessed_python/',
    'subjects': 22,
    'num_class': 2,
    'label_type': 'V',
    'segment': 4,
    'overlap': 0,
    'sampling_rate': 128,
    'scale_coefficient': 1,
    'input_shape': (1, 32, 512),
    'data_format': 'eeg',
    'random_seed': 2021,
    'max_epoch': 200,
    'patient': 20,
    'patient_cmb': 8,
    'max_epoch_cmb': 20,
    'batch_size': 64,
    'learning_rate': 1e-3,
    'step_size': 5,
    'dropout': 0.5,
    'LS': True,
    'LS_rate': 0.1,
    'save_path': '/content/drive/MyDrive/11777-Project-xs/LGG/save/',
    'load_path': '/content/drive/MyDrive/11777-Project-xs/LGG/save/max-acc.pth',
    'load_path_final': '/content/drive/MyDrive/11777-Project-xs/LGG/save/final_model.pth',
    'gpu': '0',
    'save_model': True,
    'model': 'LGGNet',
    'pool': 16,
    'pool_step_rate': 0.25,
    'T': 64,
    'graph_type': 'hem',
    'hidden': 32
}

In [4]:
######## Reproduce the result using the saved model ######
# parser.add_argument('--reproduce', action='store_true')
# args = parser.parse_args()
class Args:
    def __init__(self, param_dict):
        for key, value in param_dict.items():
            setattr(self, key, value)

default_args['data_path'] = '/content/drive/MyDrive/11777-Project-xs/data_preprocessed_python/'
default_args['reproduce'] = 'store_true'
# Create an Args object with default parameters
args = Args(default_args)


In [5]:
# only need to run once
sub_to_run = np.arange(args.subjects)
prepdt = PrepareData(args)
prepdt.run(sub_to_run, split=True, expand=True)

data:(40, 32, 7680) label:(40, 4)
Binary label generated!
The data and label are split: Data shape:(40, 15, 1, 32, 512) Label:(40, 15)
Data and label prepared!
data:(40, 15, 1, 32, 512) label:(40, 15)
----------------------
data:(40, 32, 7680) label:(40, 4)
Binary label generated!
The data and label are split: Data shape:(40, 15, 1, 32, 512) Label:(40, 15)
Data and label prepared!
data:(40, 15, 1, 32, 512) label:(40, 15)
----------------------
data:(40, 32, 7680) label:(40, 4)
Binary label generated!
The data and label are split: Data shape:(40, 15, 1, 32, 512) Label:(40, 15)
Data and label prepared!
data:(40, 15, 1, 32, 512) label:(40, 15)
----------------------
data:(40, 32, 7680) label:(40, 4)
Binary label generated!
The data and label are split: Data shape:(40, 15, 1, 32, 512) Label:(40, 15)
Data and label prepared!
data:(40, 15, 1, 32, 512) label:(40, 15)
----------------------
data:(40, 32, 7680) label:(40, 4)
Binary label generated!
The data and label are split: Data shape:(40, 

In [6]:
from sklearn.model_selection import train_test_split

participant_trial_id = np.arange(40*22)
participant_trial_id_train, participant_trial_id_test = train_test_split(participant_trial_id, test_size=0.2, random_state=42)
participant_trial_id_tra, participant_trial_id_val = train_test_split(participant_trial_id_train, test_size=0.2, random_state=42)
print(participant_trial_id_tra.shape[0], participant_trial_id_val.shape[0], participant_trial_id_test.shape[0])


563 141 176


In [7]:
participant_id = np.arange(22)
participant_id_train, participant_id_test = train_test_split(participant_id, test_size=0.2, random_state=42)
participant_id_tra, participant_id_val = train_test_split(participant_id_train, test_size=0.2, random_state=42)
print(participant_id_tra.shape[0], participant_id_val.shape[0], participant_id_test.shape[0])

13 4 5


In [9]:
## Participant 1-22 (22): w/ facial recording
## Participant 23-32 (10): w/o facial recording
cv = CrossValidation(args)
Data = np.zeros((22, 40, 15, 1, 32, 512)) #np.zeros((22*40, 15, 1, 32, 512))
Label = np.zeros((22, 40, 15)) #np.zeros((22*40, 15))

for sub in range(22):
    data, label = cv.load_per_subject(sub)
    for trial in range(40):
        for seg in range(15):
            # Data [sub*40 + trial, seg, :, :, :] = data[trial, seg, :, :, :]
            # Label[sub*40 + trial, seg] = label[trial, seg]
            Data [sub, trial, seg, :, :, :] = data[trial, seg, :, :, :]
            Label[sub, trial, seg] = label[trial, seg]

>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)


### debug

In [7]:
from networks import LGGNet

original_order = ['Fp1', 'AF3', 'F3', 'F7', 'FC5', 'FC1', 'C3', 'T7', 'CP5', 'CP1', 'P3', 'P7', 'PO3', 'O1', 'Oz',
                  'Pz', 'Fp2', 'AF4', 'Fz', 'F4', 'F8', 'FC6', 'FC2', 'Cz', 'C4', 'T8', 'CP6', 'CP2', 'P4', 'P8',
                  'PO4', 'O2']

# Define proper channel orders for the local-global graphs in LGGNet. Please refer to three graph definitions (general, frontal, hemesphere).
graph_general_DEAP = [['Fp1', 'Fp2'], ['AF3', 'AF4'], ['F3', 'F7', 'Fz', 'F4', 'F8'],
                     ['FC5', 'FC1', 'FC6', 'FC2'], ['C3', 'Cz', 'C4'], ['CP5', 'CP1', 'CP2', 'CP6'],
                     ['P7', 'P3', 'Pz', 'P4', 'P8'], ['PO3', 'PO4'], ['O1', 'Oz', 'O2'],
                     ['T7'], ['T8']]

graph_idx = graph_general_DEAP   # The general graph definition for DEAP is used as an example.
idx = []
num_chan_local_graph = []
for i in range(len(graph_idx)):
    num_chan_local_graph.append(len(graph_idx[i]))
    for chan in graph_idx[i]:
        idx.append(original_order.index(chan))

data = torch.randn(1, 1, 32, 512)  # (batch_size=1, cnn_channel=1, EEG_channel=32, data_points=512)
data = data[:, :, idx, :]  # (batch_size=1, cnn_channel=1, EEG_channel=32, data_points=512)

LGG = LGGNet(
    num_classes=2,
    input_size=(1, 32, 512),
    sampling_rate=128,
    num_T=64,  # num_T controls the number of temporal filters
    out_graph=32,
    pool=16,
    pool_step_rate=0.25,
    idx_graph=num_chan_local_graph,
    dropout_rate=0.5
)

In [None]:
print(Data[participant_trial_id_tra].shape)
tmp = np.vstack(Data[participant_trial_id_tra])# Data[participant_trial_id_tra].reshape(563*15, 1, 32, 512)
tmp = np.concatenate(Data[participant_trial_id_tra], axis=0)
print(tmp.shape)
tmp[2*563+7, 0, 0, :5]

In [19]:
tmp[14, 0, 0, :5], Data[participant_trial_id_tra][0, 14, 0, 0, :5]

(array([-19.83967679,  -5.94046715, -12.88923806, -24.8860172 ,
        -13.84577025]),
 array([-19.83967679,  -5.94046715, -12.88923806, -24.8860172 ,
        -13.84577025]))

In [20]:
tmp[15, 0, 0, :5], Data[participant_trial_id_tra][1, 0, 0, 0, :5]

(array([-16.4860614 ,  -8.09385624,  -4.91468391, -10.2296136 ,
        -16.06645091]),
 array([-16.4860614 ,  -8.09385624,  -4.91468391, -10.2296136 ,
        -16.06645091]))

### train

In [10]:
data_train, data_val, label_train, label_val = \
    Data[participant_id_tra], Data[participant_id_val], Label[participant_id_tra], Label[participant_id_val]
data_train, data_val, label_train, label_val = \
    np.concatenate(data_train, axis=0), np.concatenate(data_val, axis=0), np.concatenate(label_train, axis=0), np.concatenate(label_val, axis=0)
data_train, data_val, label_train, label_val = \
    np.concatenate(data_train, axis=0), np.concatenate(data_val, axis=0), np.concatenate(label_train, axis=0), np.concatenate(label_val, axis=0)
print(data_train.shape, data_val.shape, label_train.shape, label_val.shape)

data_test, label_test = \
    Data[participant_id_test], Label[participant_id_test]
data_test, label_test = \
    np.concatenate(data_test, axis=0), np.concatenate(label_test, axis=0)
data_test, label_test = \
    np.concatenate(data_test, axis=0), np.concatenate(label_test, axis=0)
print(data_test.shape, label_test.shape)

(7800, 1, 32, 512) (2400, 1, 32, 512) (7800,) (2400,)
(3000, 1, 32, 512) (3000,)


In [11]:
# data_train, data_val, label_train, label_val = \
#     Data[participant_trial_id_tra], Data[participant_trial_id_val], Label[participant_trial_id_tra], Label[participant_trial_id_val]
# data_train, data_val, label_train, label_val = \
#     np.concatenate(data_train, axis=0), np.concatenate(data_val, axis=0), np.concatenate(label_train, axis=0), np.concatenate(label_val, axis=0)
# print(data_train.shape, data_val.shape, label_train.shape, label_val.shape)

data_train = torch.from_numpy(data_train).float()
label_train = torch.from_numpy(label_train).long()
data_val = torch.from_numpy(data_val).float()
label_val = torch.from_numpy(label_val).long()
data_test = torch.from_numpy(data_test).float()
label_test = torch.from_numpy(label_test).long()

In [13]:
# for sub in range()
acc_val, F1_val = train(args=args,
                        data_train=data_train,
                        label_train=label_train,
                        data_val=data_val,
                        label_val=label_val,
                        subject='all_train',
                        fold='noFold')


using gpu: 0
epoch 1, loss=0.7343 acc=0.5555 f1=0.6090
epoch 1, val, loss=0.7120 acc=0.4983 f1=0.5348
ETA:32s/27m SUB:all_train FOLD:noFold
epoch 2, loss=0.6761 acc=0.5997 f1=0.6422
epoch 2, val, loss=0.7015 acc=0.5413 f1=0.6422
ETA:57s/24m SUB:all_train FOLD:noFold
epoch 3, loss=0.6533 acc=0.6285 f1=0.6653
epoch 3, val, loss=0.6906 acc=0.5683 f1=0.7020
ETA:1m/23m SUB:all_train FOLD:noFold
epoch 4, loss=0.6422 acc=0.6290 f1=0.6653
epoch 4, val, loss=0.7085 acc=0.5921 f1=0.7366
ETA:2m/23m SUB:all_train FOLD:noFold
epoch 5, loss=0.6154 acc=0.6683 f1=0.6945
epoch 5, val, loss=0.8386 acc=0.6088 f1=0.7564
ETA:2m/22m SUB:all_train FOLD:noFold
epoch 6, loss=0.6045 acc=0.6853 f1=0.7113
epoch 6, val, loss=0.6918 acc=0.5975 f1=0.7049
ETA:3m/22m SUB:all_train FOLD:noFold
epoch 7, loss=0.5703 acc=0.7181 f1=0.7406
epoch 7, val, loss=0.7852 acc=0.6104 f1=0.7395
ETA:3m/22m SUB:all_train FOLD:noFold
epoch 8, loss=0.5334 acc=0.7555 f1=0.7734
epoch 8, val, loss=0.7890 acc=0.5913 f1=0.7295
ETA:4m/22m SUB

In [46]:
from utils import *

def test(args, data, label, reproduce, subject, fold, path=None):
    set_up(args)
    seed_all(args.random_seed)
    test_loader = get_dataloader(data, label, args.batch_size)

    model = get_model(args)
    if CUDA:
        model = model.cuda()
    loss_fn = nn.CrossEntropyLoss()
    print('myTest')

    if reproduce:
        if path is None:
            model_name_reproduce = 'sub' + str(subject) + '.pth'
            data_type = 'model_{}_{}'.format(args.data_format, args.label_type)
            experiment_setting = 'T_{}_pool_{}'.format(args.T, args.pool)
            load_path_final = os.path.join(args.save_path, experiment_setting, data_type, model_name_reproduce)
            model.load_state_dict(torch.load(load_path_final))
        else:
            model.load_state_dict(torch.load(path))
    else:
        model.load_state_dict(torch.load(args.load_path_final))
    loss, pred, act = predict(
        data_loader=test_loader, net=model, loss_fn=loss_fn
    )
    acc, f1, cm = get_metrics(y_pred=pred, y_true=act)
    print('>>> Test:  loss={:.4f} acc={:.4f} f1={:.4f}'.format(loss, acc, f1))
    return acc, pred, act


In [23]:
label_test.float().mean()

tensor(0.5500)

In [15]:
path = '/content/drive/MyDrive/11777-Project-xs/LGG/save/candidate.pth'
acc_test, pred, act = test(args=args, data=data_test, label=label_test,
                                               reproduce=args.reproduce,
                                               subject='all_train', fold='noFold', path=path)

using gpu: 0
myTest
>>> Test:  loss=0.6939 acc=0.4800 f1=0.6486


# train user-model

In [32]:
import os
data_save_folder = '/content/drive/MyDrive/11777-Project-xs/data_face_seg_mean'
face_embed_file_list = os.listdir(data_save_folder)
# print(len(face_embed_file_list))
face_embed_file_list = [e for e in face_embed_file_list if 'embed' in e]
sub_trial_list = {}
for file in face_embed_file_list:
    # print(file)
    sub, trial, _, _, _ = file.split('_')
    sub, trial = int(sub[1:]), int(trial[5:])
    if sub in sub_trial_list.keys():
        sub_trial_list[sub].append(trial)
    else:
        sub_trial_list[sub] = [trial]

for sub in range(1,23):
    print('sub {}, {} trials with facial video'.format(sub, len(sub_trial_list[sub])))

sub 1, 40 trials with facial video
sub 2, 40 trials with facial video
sub 3, 39 trials with facial video
sub 4, 40 trials with facial video
sub 5, 39 trials with facial video
sub 6, 40 trials with facial video
sub 7, 40 trials with facial video
sub 8, 40 trials with facial video
sub 9, 40 trials with facial video
sub 10, 40 trials with facial video
sub 11, 37 trials with facial video
sub 12, 40 trials with facial video
sub 13, 40 trials with facial video
sub 14, 39 trials with facial video
sub 15, 40 trials with facial video
sub 16, 40 trials with facial video
sub 17, 40 trials with facial video
sub 18, 40 trials with facial video
sub 19, 40 trials with facial video
sub 20, 40 trials with facial video
sub 21, 40 trials with facial video
sub 22, 40 trials with facial video


In [57]:
CUDA = torch.cuda.is_available()
fold = 'noFold'

for sub in range(22):
    print('='*48)
    cv = CrossValidation(args)
    data, label = cv.load_per_subject(sub)
    trial_to_remove = [i-1 for i in range(1,41) if i not in sub_trial_list[sub+1]]

    if len(trial_to_remove) > 0:
        mask = np.ones(data.shape[0], dtype=bool)
        mask[trial_to_remove] = False
        data = data[mask]
        label = label[mask]
    print(sub+1, trial_to_remove, data.shape[0])
    trial_id = np.arange(data.shape[0])
    trial_id_train, trial_id_test = train_test_split(trial_id, test_size=0.2, random_state=42)
    trial_id_tra, trial_id_val = train_test_split(trial_id_train, test_size=0.2, random_state=42)
    print(trial_id_tra.shape[0], trial_id_val.shape[0], trial_id_test.shape[0])

    data_train, data_val, label_train, label_val = \
        data[trial_id_tra], data[trial_id_val], label[trial_id_tra], label[trial_id_val]
    data_train, data_val, label_train, label_val = \
        np.concatenate(data_train, axis=0), np.concatenate(data_val, axis=0), np.concatenate(label_train, axis=0), np.concatenate(label_val, axis=0)
    print(data_train.shape, data_val.shape, label_train.shape, label_val.shape)

    data_test, label_test = \
        data[trial_id_test], label[trial_id_test]
    data_test, label_test = \
        np.concatenate(data_test, axis=0), np.concatenate(label_test, axis=0)
    print(data_test.shape, label_test.shape)

    data_train = torch.from_numpy(data_train).float()
    label_train = torch.from_numpy(label_train).long()
    data_val = torch.from_numpy(data_val).float()
    label_val = torch.from_numpy(label_val).long()
    data_test = torch.from_numpy(data_test).float()
    label_test = torch.from_numpy(label_test).long()

    acc_val, F1_val = train(args=args,
                        data_train=data_train,
                        label_train=label_train,
                        data_val=data_val,
                        label_val=label_val,
                        subject = str(sub+1),
                        fold='noFold')
    model = get_model(args)
    if CUDA:
        model = model.cuda()
    model.load_state_dict(torch.load(os.path.join(args.save_path, 'candidate.pth')))

    model_name_reproduce = 'sub' + str(sub+1) + '.pth'
    data_type = 'model_{}_{}'.format(args.data_format, args.label_type)
    experiment_setting = 'T_{}_pool_{}'.format(args.T, args.pool)
    save_path = os.path.join(args.save_path, experiment_setting, data_type)
    ensure_path(save_path)
    model_name_reproduce = os.path.join(save_path, model_name_reproduce)
    print(model_name_reproduce)
    torch.save(model.state_dict(), model_name_reproduce)

    model.load_state_dict(torch.load(model_name_reproduce))
    acc_test, pred, act = test(args=args, data=data_test, label=label_test,
                                               reproduce=args.reproduce,
                                               subject=str(sub+1), fold='noFold')


>>> Data:(40, 15, 1, 32, 512) Label:(40, 15)
1 [] 40
25 7 8
(375, 1, 32, 512) (105, 1, 32, 512) (375,) (105,)
(120, 1, 32, 512) (120,)
using gpu: 0
epoch 1, loss=1.0065 acc=0.5200 f1=0.5082
epoch 1, val, loss=0.7366 acc=0.4286 f1=0.6000
ETA:1s/1m SUB:1 FOLD:noFold
epoch 2, loss=0.7018 acc=0.5947 f1=0.5682
epoch 2, val, loss=0.7751 acc=0.4286 f1=0.6000
ETA:2s/1m SUB:1 FOLD:noFold
epoch 3, loss=0.6111 acc=0.7040 f1=0.7056
epoch 3, val, loss=0.7745 acc=0.4286 f1=0.6000
ETA:3s/1m SUB:1 FOLD:noFold
epoch 4, loss=0.5380 acc=0.7413 f1=0.7087
epoch 4, val, loss=0.7858 acc=0.4286 f1=0.6000
ETA:5s/1m SUB:1 FOLD:noFold
epoch 5, loss=0.4757 acc=0.8320 f1=0.8364
epoch 5, val, loss=0.8239 acc=0.4286 f1=0.6000
ETA:6s/1m SUB:1 FOLD:noFold
epoch 6, loss=0.3758 acc=0.8987 f1=0.8920
epoch 6, val, loss=0.8817 acc=0.4286 f1=0.6000
ETA:7s/1m SUB:1 FOLD:noFold
epoch 7, loss=0.2719 acc=0.9760 f1=0.9749
epoch 7, val, loss=0.8855 acc=0.4286 f1=0.6000
ETA:8s/1m SUB:1 FOLD:noFold
epoch 8, loss=0.2349 acc=0.9973 f

In [53]:

type(data_test)

numpy.ndarray

In [None]:
path = '/content/drive/MyDrive/11777-Project-xs/LGG/save/candidate.pth'