In [1]:
from scipy.io import loadmat
from glob import glob
from collections import Counter
import numpy as np
import torch
from sklearn.preprocessing import normalize
from torch import nn
import torch.nn.functional as F
from torch import optim
from time import time
from torch.utils.tensorboard import SummaryWriter
import os

In [2]:
def split_signal(data):
    l = len(data)
    splited_data = []
    l -= 128*5 # skip first 5 seconds as well as the first clip in the Ext_annotation, Data sampled in 128Hz
    while l-20*128>0:
        splited_data.append(data[-l:-l+128*20])
        l -= 128*20
    splited_data.append(data[-128*20:]) # split the last clip according to the docs
    return splited_data
        

In [3]:
def extract_ext_annotation(data):
    ext_annotation = [] # extract labels_ext_annotation without the first clip according to the 
    for video_ext_annotation in data:
        ext_annotation += list(video_ext_annotation[1:, 1:])
    return ext_annotation

In [10]:
files = glob("./data_preprocessed/*/*.mat")
files.sort()

if os.path.isfile('GSR_DATA.npy') and os.path.isfile('ground_truth.npy'):
    print("File already exist, loading...")
    GSR_DATA = np.load('GSR_DATA.npy')
    ground_truth = np.load('ground_truth.npy')
else:
    print("Extracting data...")
    GSR_DATA = []
    ground_truth = []
    
    for f in files:
        if '08' not in f and '24' not in f and '28' not in f:
            part_data = loadmat(f) # load participant's all preprocessed physiological data.
            _, video_iter =  part_data['joined_data'].shape
            group_flag = '07' in f or '01' in f or '02' in f or '16' in f or\
                '15' in f or '11' in f or '12' in f or '10' in f or\
                '06' in f or '32' in f or '04' in f or '03' in f or\
                '29' in f or '5' in f or '27' in f or '21' in f or\
                '18' in f or '14' in f or '17' in f or '22' in f          
            if group_flag:
                video_iter -=4
                part_ext_annotation = part_data['labels_ext_annotation'][0,:-4]# load participant's labels ext_annotation
            else:
                part_ext_annotation = part_data['labels_ext_annotation'][0,:]# load participant's labels ext_annotation
            ground_truth += extract_ext_annotation(part_ext_annotation)
                
            for idx in range(video_iter):
                phys_data = part_data['joined_data'][0, idx] # the preprocessed physiological data of #idx video (not videoID)
                part_gsr_data = phys_data[:,16]
                GSR_DATA += split_signal(part_gsr_data)

    GSR_DATA = np.array(GSR_DATA)
    ground_truth = np.array(ground_truth)
    np.save("GSR_DATA" ,GSR_DATA)
    np.save("ground_truth", ground_truth)

File already exist, loading...


In [11]:
GSR_DATA.shape

(6516, 2560)

In [6]:
class GSRDataset(torch.utils.data.Dataset):
    # Create customized dataset
    def __init__(self, GSR, ground_truth=None, transforms=None):
        self.X = GSR
        self.y = ground_truth
        self.transforms = transforms
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, i): 
        X = torch.Tensor(self.X[i,:]).unsqueeze(0)
        y = torch.Tensor(self.y[i,:])
        return X, y

In [7]:

# gsr_norm = normalize(GSR_DATA, norm='l2')
# gsr_norm = GSR_DATA/1000
dataset = GSRDataset(GSR_DATA, ground_truth)
test_split = 0.3
num_data = len(dataset)
num_train = int(num_data*test_split)
num_test = num_data - num_train
train_set, test_set = torch.utils.data.random_split(dataset, [num_train, num_test])

train_data = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)
test_data = torch.utils.data.DataLoader(test_set, batch_size=128, shuffle=False, num_workers=2)

In [8]:
class Model(torch.nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = nn.Conv1d(1, 64, kernel_size=11, stride=5)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=7,stride=3)
        self.conv3 = nn.Conv1d(128, 256, kernel_size=5)
        
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 2)
        
        
        self.pool = nn.MaxPool1d(4)
        self.dropout = nn.Dropout(0.5)
        
        
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.dropout(x)
        x = self.pool(F.relu(self.conv2(x)))
        x = self.dropout(x)
        x = self.pool(F.relu(self.conv3(x)))
        x = self.dropout(x)
#         print(x.shape)
        x = x.view(-1, self.flatten_feature(x))
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)

        return x


    def flatten_feature(self, x):
        num_feature = 1
        for d in x.size()[1:]:
            num_feature *= d
        return num_feature

In [9]:
def test(model, inputs, device, criterion=None):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for data in inputs:
            X, y = data[0].to(device), data[1].to(device)
            outputs = model(X)
            total_loss += criterion(outputs, y)
    return float(total_loss/len(inputs))

In [12]:
writer = SummaryWriter('./runs/GSRmodel_CNN_3layers')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = Model()
criterion = torch.nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.1)
model.to(device)
total_epoch = 1000
inputs = next(iter(train_data))[0].to(device)
writer.add_graph(model, inputs)
old_test_loss = 0
for epoch in range(1, total_epoch+1):
    start = time()
    for i, data in enumerate(train_data):
        
        model.train()
        # get the inputs; data is a list of [inputs, labels]
        inputs = data[0].to(device)
        labels = data[1].to(device)
        # zero the parameter gradients

        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    train_loss = test(model, train_data, device, criterion)
    test_loss = test(model, test_data, device, criterion)
    writer.add_scalar('Test/Loss', test_loss, epoch)
    writer.add_scalar('Train/Loss', train_loss, epoch)
    if abs(old_test_loss-test_loss) < 10e-3:
        print("Test loss converging\nEarly Stop")
        print("### Epoch [{}/{}]\t\ttrain loss: {}\t\ttest loss:{}"
              .format(epoch,total_epoch, round(train_loss, 4), round(test_loss, 4)))
        break
    if epoch % 20 == 0:
        print("### Epoch [{}/{}]\t\ttrain loss: {}\t\ttest loss:{}"
              .format(epoch,total_epoch, round(train_loss, 4), round(test_loss, 4)))

### Epoch [20/1000]		train loss: 3.2007		test loss:2.4524
### Epoch [40/1000]		train loss: 0.0558		test loss:100.4619
### Epoch [60/1000]		train loss: 18.9487		test loss:0.1324
### Epoch [80/1000]		train loss: 5.9309		test loss:0.638
### Epoch [100/1000]		train loss: 3.3108		test loss:0.1953
### Epoch [120/1000]		train loss: 55.117		test loss:3.4188
### Epoch [140/1000]		train loss: 114.9206		test loss:8.1224
### Epoch [160/1000]		train loss: 58.4407		test loss:2.6598
### Epoch [180/1000]		train loss: 0.0621		test loss:0.0631
### Epoch [200/1000]		train loss: 0.2006		test loss:0.0741
### Epoch [220/1000]		train loss: 48.1982		test loss:0.1404
### Epoch [240/1000]		train loss: 1.079		test loss:0.1733
### Epoch [260/1000]		train loss: 24.9356		test loss:1.274
### Epoch [280/1000]		train loss: 0.1711		test loss:0.1733
### Epoch [300/1000]		train loss: 0.2286		test loss:0.2265
### Epoch [320/1000]		train loss: 0.267		test loss:0.2683
### Epoch [340/1000]		train loss: 0.3046		test loss:0.30