<br/><font size=6>4-3 GIN</font><br/>

In [1]:
import time
import numpy as np
from scipy.sparse import coo_matrix
import torch
from torch.nn import Sequential, Linear, ReLU
from torch_geometric.nn import GINConv, global_add_pool
import torch.utils.data as Data
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score,accuracy_score,precision_score,recall_score,f1_score,classification_report
from sklearn.preprocessing import StandardScaler

import myimporter
from BCI_functions import *  # BCI_functions.ipynb contains some functions we might use multiple times in this tutorial
import warnings
warnings.filterwarnings('ignore')

importing Jupyter notebook from BCI_functions.ipynb


In [2]:
# np.random.seed(0)

dataset_1 = np.load('1.npy')
print('dataset_1 shape:', dataset_1.shape)

# check if a GPU is available
with_gpu = torch.cuda.is_available()
if with_gpu:
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

# remove instance with label==10 (rest)
removed_label = [2,3,4,5,6,7,8,9,10]  #2,3,4,5,
for ll in removed_label:
    id = dataset_1[:, -1]!=ll
    dataset_1 = dataset_1[id]

dataset_1 shape: (259520, 65)


In [3]:
# data segmentation
n_class = int(11-len(removed_label))  # 0~9 classes ('10:rest' is not considered)
no_feature = 64  # the number of the features
segment_length = 16  # selected time window; 16=160*0.1
LR = 0.001  # learning rate
EPOCH = 101

data_seg = extract(dataset_1, n_classes=n_class, n_fea=no_feature, time_window=segment_length, moving=(segment_length/2))  # 50% overlapping
print('After segmentation, the shape of the data:', data_seg.shape)

# split training and test data
data_seg_feature = data_seg[:, :1024]
data_seg_label = data_seg[:, 1024:1025]
train_feature, test_feature, train_label, test_label = train_test_split(data_seg_feature, data_seg_label, shuffle=True)

# normalization
# before normalize reshape data back to raw data shape
train_feature_2d = train_feature.reshape([-1, no_feature])
test_feature_2d = test_feature.reshape([-1, no_feature])

scaler1 = StandardScaler().fit(train_feature_2d)
train_fea_norm1 = scaler1.transform(train_feature_2d) # normalize the training data
test_fea_norm1 = scaler1.transform(test_feature_2d) # normalize the test data
print('After normalization, the shape of training feature:', train_fea_norm1.shape,
      '\nAfter normalization, the shape of test feature:', test_fea_norm1.shape)

# after normalization, reshape data to 3d in order to feed in to LSTM
train_fea_norm1 = train_fea_norm1.reshape([-1, segment_length, no_feature])
test_fea_norm1 = test_fea_norm1.reshape([-1, segment_length, no_feature])
print('After reshape, the shape of training feature:', train_fea_norm1.shape,
      '\nAfter reshape, the shape of test feature:', test_fea_norm1.shape)

BATCH_size = test_fea_norm1.shape[0] # use test_data as batch size

After segmentation, the shape of the data: (2440, 1025)
After normalization, the shape of training feature: (29280, 64) 
After normalization, the shape of test feature: (9760, 64)
After reshape, the shape of training feature: (1830, 16, 64) 
After reshape, the shape of test feature: (610, 16, 64)


In [4]:
# feed data into dataloader
train_fea_norm1 = torch.tensor(train_fea_norm1).to(device)
train_label = torch.tensor(train_label.flatten()).to(device)
train_data = Data.TensorDataset(train_fea_norm1, train_label)
train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_size, shuffle=False)

test_fea_norm1 = torch.tensor(test_fea_norm1).to(device)
test_label = torch.tensor(test_label.flatten()).to(device)

# Create adjacency matrix (edge index). Here we take complete graph as an example (all nodes are connected to each other)
# You may design your own edge index in your research topic.
edge_data = torch.ones([64, 64])  # initialize edge index
edge_index = coo_matrix(edge_data)
edge_index = np.vstack((edge_index.row, edge_index.col))
edge_index = torch.from_numpy(edge_index).to(torch.int64).to(device)

In [5]:
class GIN(torch.nn.Module):
    def __init__(self):
        super(GIN, self).__init__()

        num_features = 16  # dimension of features for each node. In our case, it's the time-steps
        dim = 32  # dimension of hidden representations
        self.dim = dim

        nn1 = Sequential(Linear(num_features, dim), ReLU(), Linear(dim, dim))
        self.conv1 = GINConv(nn1)
        self.bn1 = torch.nn.BatchNorm1d(dim)

        nn2 = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim))
        self.conv2 = GINConv(nn2)
        self.bn2 = torch.nn.BatchNorm1d(dim)

        nn3 = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim))
        self.conv3 = GINConv(nn3)
        self.bn3 = torch.nn.BatchNorm1d(dim)

        self.fc1 = Linear(dim, dim)
        self.fc2 = Linear(dim, 2)

    def forward(self, x, batch, edge_index=None):

        x = x.reshape([-1, 16])

        x = F.relu(self.conv1(x.float(), edge_index))
        x = self.bn1(x)
        x = F.relu(self.conv2(x, edge_index))
        x = self.bn2(x)
        x = F.relu(self.conv3(x, edge_index))
        x = self.bn3(x)

        x = x.view(batch, 64, self.dim)
        x = x.sum(dim=1)

        x = F.dropout(x, p=0.3, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=-1)

model = GIN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_func = nn.CrossEntropyLoss()

In [6]:
best_acc = []
best_auc = []

# training and testing
start_time = time.perf_counter()
for epoch in range(EPOCH):
    for step, (train_x, train_y) in enumerate(train_loader):
        output = model(train_x,  BATCH_size, edge_index)
        loss = loss_func(output, train_y.long())  # cross entropy loss
        optimizer.zero_grad()  # clear gradients for this training step
        loss.backward()  # backpropagation, compute gradients
        optimizer.step()  # apply gradients

    if epoch % 10 == 0:
        test_output = model(test_fea_norm1,  BATCH_size, edge_index)
        test_loss = loss_func(test_output, test_label.long())

        test_y_score = one_hot(test_label.data.cpu().numpy())  # .cup() can be removed if your device is cpu.
        pred_score = F.softmax(test_output, dim=1).data.cpu().numpy()  # normalize the output
        auc_score = roc_auc_score(test_y_score, pred_score)

        pred_y = torch.max(test_output, 1)[1].data.cpu().numpy()
        pred_train = torch.max(output, 1)[1].data.cpu().numpy()

        test_acc = accuracy_score(test_label.data.cpu().numpy(), pred_y)
        train_acc = accuracy_score(train_y.data.cpu().numpy(), pred_train)


        print('Epoch: ', epoch, '|train loss: %.4f' % loss.item(),
              ' train ACC: %.4f' % train_acc, '| test loss: %.4f' % test_loss.item(),
              'test ACC: %.4f' % test_acc, '| AUC: %.4f' % auc_score)
        best_acc.append(test_acc)
        best_auc.append(auc_score)

current_time = time.perf_counter()
running_time = current_time - start_time
print(classification_report(test_label.data.cpu().numpy(), pred_y))
print('BEST TEST ACC: {}, AUC: {}'.format(max(best_acc), max(best_auc)))
print("Total Running Time: {} seconds".format(round(running_time, 2)))

Epoch:  0 |train loss: 5.6759  train ACC: 0.5492 | test loss: 8.4109 test ACC: 0.4656 | AUC: 0.4544
Epoch:  10 |train loss: 0.6644  train ACC: 0.5656 | test loss: 0.6863 test ACC: 0.6049 | AUC: 0.6387
Epoch:  20 |train loss: 0.5958  train ACC: 0.6705 | test loss: 0.6269 test ACC: 0.6656 | AUC: 0.7123
Epoch:  30 |train loss: 0.5770  train ACC: 0.6656 | test loss: 0.5820 test ACC: 0.6721 | AUC: 0.7575
Epoch:  40 |train loss: 0.5079  train ACC: 0.7279 | test loss: 0.5016 test ACC: 0.7246 | AUC: 0.8272
Epoch:  50 |train loss: 0.4735  train ACC: 0.7410 | test loss: 0.4919 test ACC: 0.7557 | AUC: 0.8414
Epoch:  60 |train loss: 0.3942  train ACC: 0.8197 | test loss: 0.4063 test ACC: 0.8230 | AUC: 0.8886
Epoch:  70 |train loss: 0.3929  train ACC: 0.8246 | test loss: 0.3904 test ACC: 0.8230 | AUC: 0.9096
Epoch:  80 |train loss: 0.2726  train ACC: 0.8754 | test loss: 0.3282 test ACC: 0.8574 | AUC: 0.9369
Epoch:  90 |train loss: 0.2753  train ACC: 0.8721 | test loss: 0.2696 test ACC: 0.8869 | AUC