<br/><font size=6>4-1-2 GRU</font><br/>

In [1]:
import time
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.utils.data as Data
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score,accuracy_score,precision_score,recall_score,f1_score,classification_report

import myimporter
from BCI_functions import *  # BCI_functions.ipynb contains some functions we might use multiple times in this tutorial
import warnings
warnings.filterwarnings('ignore')

importing Jupyter notebook from BCI_functions.ipynb


In [2]:
dataset_1 = np.load('1.npy')
print('dataset_1 shape:', dataset_1.shape)

# check if a GPU is available
with_gpu = torch.cuda.is_available()
if with_gpu:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print('We are using %s now.' %device)

# remove instance with label==10 (rest)
removed_label = [2,3,4,5,6,7,8,9,10]  #2,3,4,5,
for ll in removed_label:
    id = dataset_1[:, -1]!=ll
    dataset_1 = dataset_1[id]

dataset_1 shape: (259520, 65)
We are using cpu now.


In [3]:
# data segmentation
n_class = int(11-len(removed_label))  # 0~9 classes ('10:rest' is not considered)
no_feature = 64  # the number of the features
segment_length = 16  # selected time window; 16=160*0.1
LR = 0.001  # learning rate
EPOCH = 101
n_hidden = 64  # number of neurons in hidden layer
l2 = 0.005  # the coefficient of l2-norm regularization

data_seg = extract(dataset_1, n_classes=n_class, n_fea=no_feature, time_window=segment_length, moving=(segment_length/2))  # 50% overlapping
print('After segmentation, the shape of the data:', data_seg.shape)

# split training and test data
no_longfeature = no_feature*segment_length
data_seg_feature = data_seg[:, :no_longfeature]
data_seg_label = data_seg[:, no_longfeature:no_longfeature+1]
train_feature, test_feature, train_label, test_label = train_test_split(data_seg_feature, data_seg_label, shuffle=True)

# normalization
# before normalize reshape data back to raw data shape
train_feature_2d = train_feature.reshape([-1, no_feature])
test_feature_2d = test_feature.reshape([-1, no_feature])

scaler1 = StandardScaler().fit(train_feature_2d)
train_fea_norm1 = scaler1.transform(train_feature_2d) # normalize the training data
test_fea_norm1 = scaler1.transform(test_feature_2d) # normalize the test data
print('After normalization, the shape of training feature:', train_fea_norm1.shape,
      '\nAfter normalization, the shape of test feature:', test_fea_norm1.shape)

# after normalization, reshape data to 3d
train_fea_norm1 = train_fea_norm1.reshape([-1, segment_length, no_feature])
test_fea_norm1 = test_fea_norm1.reshape([-1, segment_length, no_feature])
print('After reshape, the shape of training feature:', train_fea_norm1.shape,
      '\nAfter reshape, the shape of test feature:', test_fea_norm1.shape)

BATCH_size = test_fea_norm1.shape[0] # use test_data as batch size

After segmentation, the shape of the data: (2440, 1025)
After normalization, the shape of training feature: (29280, 64) 
After normalization, the shape of test feature: (9760, 64)
After reshape, the shape of training feature: (1830, 16, 64) 
After reshape, the shape of test feature: (610, 16, 64)


In [4]:
# feed data into dataloader
train_fea_norm1 = torch.tensor(train_fea_norm1).to(device)
train_label = torch.tensor(train_label.flatten()).to(device)
train_data = Data.TensorDataset(train_fea_norm1, train_label)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_size, shuffle=False)

test_fea_norm1 = torch.tensor(test_fea_norm1).to(device)
test_label = torch.tensor(test_label.flatten()).to(device)

In [5]:
# classifier
class GRU(nn.Module):
    def __init__(self):
        super(GRU, self).__init__()

        self.gru_layer = nn.GRU(
            input_size=no_feature,
            hidden_size=n_hidden,
            num_layers=2,
            bias=True,
            batch_first=True,       # input & output will has batch size as 1s dimension. e.g. (batch, segment_length, no_feature)
        )

        self.out = nn.Linear(n_hidden, n_class)

    def forward(self, x):
        r_out, (h_n, h_c) = self.gru_layer(x.float(), None)
        r_out = F.dropout(r_out, 0.3)
        test_output = self.out(r_out[:, -1, :]) # choose r_out at the last time step
        return test_output

gru = GRU()
gru.to(device)
print(gru)

optimizer = torch.optim.Adam(gru.parameters(), lr=LR, weight_decay=l2)   # optimize all parameters
loss_func = nn.CrossEntropyLoss()

GRU(
  (gru_layer): GRU(64, 64, num_layers=2, batch_first=True)
  (out): Linear(in_features=64, out_features=2, bias=True)
)


In [6]:
best_acc = []
best_auc = []

# training and testing
start_time = time.perf_counter()
for epoch in range(EPOCH):
    for step, (train_x, train_y) in enumerate(train_loader):

        output = gru(train_x)  # GRU output of training data
        loss = loss_func(output, train_y.long())  # cross entropy loss
        optimizer.zero_grad()  # clear gradients for this training step
        loss.backward()  # backpropagation, compute gradients
        optimizer.step()  # apply gradients

    if epoch % 10 == 0:
        test_output = gru(test_fea_norm1)  # GRU output of test data
        test_loss = loss_func(test_output, test_label.long())

        test_y_score = one_hot(test_label.data.cpu().numpy())
        pred_score = F.softmax(test_output, dim=1).data.cpu().numpy()  # normalize the output
        auc_score = roc_auc_score(test_y_score, pred_score)

        pred_y = torch.max(test_output, 1)[1].data.cpu().numpy()
        pred_train = torch.max(output, 1)[1].data.cpu().numpy()

        test_acc = accuracy_score(test_label.data.cpu().numpy(), pred_y)
        train_acc = accuracy_score(train_y.data.cpu().numpy(), pred_train)

        print('Epoch: ', epoch, '|train loss: %.4f' % loss.data.item(),
              ' train ACC: %.4f' % train_acc, '| test loss: %.4f' % test_loss.item(),
              'test ACC: %.4f' % test_acc, '| AUC: %.4f' % auc_score)
        best_acc.append(test_acc)
        best_auc.append(auc_score)
current_time = time.perf_counter()
running_time = current_time - start_time

print(classification_report(test_label.data.numpy(), pred_y))
print('BEST TEST ACC: {}, AUC: {}'.format(max(best_acc), max(best_auc)))
print("Total Running Time: {} seconds".format(round(running_time, 2)))

Epoch:  0 |train loss: 0.7043  train ACC: 0.4508 | test loss: 0.6971 test ACC: 0.4770 | AUC: 0.5269
Epoch:  10 |train loss: 0.6290  train ACC: 0.6770 | test loss: 0.6410 test ACC: 0.6279 | AUC: 0.7039
Epoch:  20 |train loss: 0.3093  train ACC: 0.8852 | test loss: 0.3304 test ACC: 0.8508 | AUC: 0.9324
Epoch:  30 |train loss: 0.1876  train ACC: 0.9426 | test loss: 0.2213 test ACC: 0.9180 | AUC: 0.9657
Epoch:  40 |train loss: 0.1070  train ACC: 0.9705 | test loss: 0.1747 test ACC: 0.9344 | AUC: 0.9757
Epoch:  50 |train loss: 0.0779  train ACC: 0.9836 | test loss: 0.1590 test ACC: 0.9410 | AUC: 0.9794
Epoch:  60 |train loss: 0.0576  train ACC: 0.9885 | test loss: 0.1461 test ACC: 0.9541 | AUC: 0.9815
Epoch:  70 |train loss: 0.0672  train ACC: 0.9869 | test loss: 0.2736 test ACC: 0.8852 | AUC: 0.9594
Epoch:  80 |train loss: 0.0477  train ACC: 0.9918 | test loss: 0.1457 test ACC: 0.9492 | AUC: 0.9834
Epoch:  90 |train loss: 0.0384  train ACC: 0.9934 | test loss: 0.1388 test ACC: 0.9475 | AUC