## Install Dependencies

In [1]:
# Connecting to Google drive in order to store the weights of the agents HDF5 files.
from google.colab import drive

drive.mount('/content/gdrive');

Mounted at /content/gdrive


In [2]:
seed_value = 42
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

In [3]:
pip install wfdb wget tqdm biosppy imbalanced-learn seaborn

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wfdb
  Downloading wfdb-4.1.0-py3-none-any.whl (159 kB)
[K     |████████████████████████████████| 159 kB 5.1 MB/s 
[?25hCollecting wget
  Downloading wget-3.2.zip (10 kB)
Collecting biosppy
  Downloading biosppy-1.0.0-py2.py3-none-any.whl (106 kB)
[K     |████████████████████████████████| 106 kB 56.5 MB/s 
Collecting shortuuid
  Downloading shortuuid-1.0.11-py3-none-any.whl (10 kB)
Collecting bidict
  Downloading bidict-0.22.0-py3-none-any.whl (36 kB)
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9674 sha256=43be420be5c74bd1fd560aa28064ac72f8f7b7974773ac5d882ae6435b9cfef4
  Stored in directory: /root/.cache/pip/wheels/bd/a8/c3/3cf2c14a1837a4e04bd98631724e81f33f462d86a1d895fae0
Successfully built wget
Installing collected packages: shortuuid, bidict, wget

In [4]:
pip install tensorflow-determinism

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow-determinism
  Downloading tensorflow-determinism-0.3.0.tar.gz (12 kB)
Building wheels for collected packages: tensorflow-determinism
  Building wheel for tensorflow-determinism (setup.py) ... [?25l[?25hdone
  Created wheel for tensorflow-determinism: filename=tensorflow_determinism-0.3.0-py3-none-any.whl size=9155 sha256=0c3b3e7f00e6f69ad2426c4d4d9b796b5fa2fd203b5dd180208367319e83f99f
  Stored in directory: /root/.cache/pip/wheels/bf/5e/dd/9670c2d20867bcff6eb19199822f6b32f16bbe01bea1cd35a8
Successfully built tensorflow-determinism
Installing collected packages: tensorflow-determinism
Successfully installed tensorflow-determinism-0.3.0


## Importing Libraries

In [5]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from glob import glob
import wget
import math
import zipfile
import wfdb as wf
import pickle
import sys
import datetime
import cv2
import random
import random as python_random
from scipy import signal
from scipy.signal import resample
from scipy.signal import find_peaks
from pathlib import Path
from sklearn.model_selection import train_test_split
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from imblearn.over_sampling import SMOTE

In [6]:
def reset_random_seeds():
   os.environ['PYTHONHASHSEED']=str(seed_value)
   tf.random.set_seed(seed_value)
   np.random.seed(seed_value)
   random.seed(seed_value)
   os.environ['TF_DETERMINISTIC_OPS'] = '1'

In [7]:
np.random.seed(seed_value)
vec = np.random.randint(1, 10)
print(vec)
random.seed(42)
print(random.random())
tf.random.set_seed(42)
print(tf.random.uniform([1])) 

7
0.6394267984578837
tf.Tensor([0.6645621], shape=(1,), dtype=float32)


## Extracting Data

In [8]:
module = os.path.abspath('./WESAD/')
if module not in sys.path:
  sys.path.append(module)
from DataManager import DataManager

## Data Preprocessing

In [9]:
# config
debug = False;

In [10]:
# subject_ids = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]

In [11]:
test_subject = [3]
rest_subjects = [2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]

In [12]:
def generateData(subject_ids):
    data = DataManager()
    all_subject_data = np.array([]);
    all_subject_labels = np.array([]);

#     subject_ids = [2, 3]

    for subject_id in subject_ids:
        if(debug): print("Processing subject id: ", subject_id);
        subject = data.load(subject_id)


        if(debug):
            print('baseline: ', subject[0], '\nstress: ', subject[1], '\namusement: ', subject[2], '\nbase_label: ', subject[3], '\nstress_label: ', subject[4], '\namuse_label: ', subject[5])

        ecg_base = subject[0]
        ecg_stress = subject[1]
        ecg_amusement = subject[2]

        base_label = subject[3]
        stress_label = subject[4]
        amuse_label = subject[5]

        ecg_base_T = ecg_base.T
        ecg_stress_T = ecg_stress.T
        ecg_amusement_T = ecg_amusement.T

        ecg_base_data = ecg_base_T[0]
        ecg_stress_data = ecg_stress_T[0]
        ecg_amusement_data = ecg_amusement_T[0]

        ecg_base_label =  base_label.T
        ecg_stress_label  = stress_label.T
        ecg_amusement_label  = amuse_label.T

        if(debug):
            print('ecg_base_data:', ecg_base_data)
            print('ecg_stress_data:', ecg_stress_data)
            print('ecg_amusement_data:', ecg_amusement_data)

            print('ecg_base_label:', ecg_base_label)
            print('ecg_stress_label:', ecg_stress_label)
            print('ecg_amusement_label:', ecg_amusement_label)


        base_secs = len(ecg_base_data) // 700 # Number of seconds in signal X
        base_samps = int(base_secs * 256 )    # Number of samples to downsample
        base_down = signal.resample(ecg_base_data, base_samps)


        stress_secs = len(ecg_stress_data) // 700 # Number of seconds in signal X
        stress_samps = int(stress_secs * 256 )    # Number of samples to downsample
        stress_down = signal.resample(ecg_stress_data, stress_samps)


        amuse_secs = len(ecg_amusement_data) // 700 # Number of seconds in signal X
        amuse_samps = int(amuse_secs * 256 )    # Number of samples to downsample
        amuse_down = signal.resample(ecg_amusement_data, amuse_samps)

    #     heart_beat_base = np.array_split(base_down, 256)
    #     heart_beat_stress = np.array_split(stress_down, 256)
    #     heart_beat_amuse = np.array_split(amuse_down, 256)

        window_size = 256
        window_shift = 256

        heart_beat_base = []
        for i in range(0,len(base_down) - window_size,window_shift):
            heart_beat_base.append(base_down[i:window_size + i])
        heart_beat_base.pop()
    #     heart_beat_base = np.array(list(heart_beat_base[:]), dtype=float)

        heart_beat_stress = []
        for i in range(0,len(stress_down) - window_size,window_shift):
            heart_beat_stress.append(stress_down[i:window_size + i])
        heart_beat_stress.pop()
    #     heart_beat_stress = np.array(list(heart_beat_stress[:]), dtype=float)

        heart_beat_amuse = []
        for i in range(0,len(amuse_down) - window_size,window_shift):
            heart_beat_amuse.append(amuse_down[i:window_size + i])
        heart_beat_amuse.pop()
    #     heart_beat_amuse = np.array(list(heart_beat_amuse[:]), dtype=float)


        for idx, idxval in enumerate(heart_beat_base):
            heart_beat_base[idx] = (heart_beat_base[idx] - heart_beat_base[idx].min()) / heart_beat_base[idx].ptp() # Normalize the readings to a 0-1 range 
            heart_beat_base[idx] = np.append(heart_beat_base[idx], 0.0) #Baseline = 0

        for idx, idxval in enumerate(heart_beat_stress):
            heart_beat_stress[idx] = (heart_beat_stress[idx] - heart_beat_stress[idx].min()) / heart_beat_stress[idx].ptp() # Normalize the readings to a 0-1 range 
            heart_beat_stress[idx] = np.append(heart_beat_stress[idx], 1.0) #Stress = 1    

        for idx, idxval in enumerate(heart_beat_amuse):
            heart_beat_amuse[idx] = (heart_beat_amuse[idx] - heart_beat_amuse[idx].min()) / heart_beat_amuse[idx].ptp() # Normalize the readings to a 0-1 range 
            heart_beat_amuse[idx] = np.append(heart_beat_amuse[idx], 2.0) #Amusement = 2

    #     print("heart_beat_base:", len(heart_beat_base[0]), len(heart_beat_base[-2]), len(heart_beat_base[-1]))
    #     print("heart_beat_stress:", len(heart_beat_stress[0]), len(heart_beat_stress[-1]))
    #     print("heart_beat_amuse:", len(heart_beat_amuse[0]), len(heart_beat_amuse[-1]))

        heart_beat_all = np.concatenate((heart_beat_base, heart_beat_stress, heart_beat_amuse), axis=0)
        subject_data = np.array(list(heart_beat_all[:]), dtype=float)


        if(all_subject_data.size == 0):
            all_subject_data = subject_data
        else:
            if(subject_data.size != 0):
                all_subject_data = np.concatenate((all_subject_data, subject_data), axis=0)

        print("New data shape", subject_data.shape, "Total Shape: ", all_subject_data.shape)
        
    #SMOTE to balance the data
    df_final_data_X = pd.DataFrame(data=all_subject_data[:, :-1])
    df_final_data_Y = pd.DataFrame(data=all_subject_data[:,-1])

    smote = SMOTE(sampling_strategy='not majority')
    X_sm, y_sm = smote.fit_resample(df_final_data_X, df_final_data_Y)
    
    return X_sm, y_sm

## Data Generation - Train

In [13]:
train_data, train_labels = generateData(rest_subjects)

Loading data for S2
New data shape (2115, 257) Total Shape:  (2115, 257)
Loading data for S4
New data shape (2159, 257) Total Shape:  (4274, 257)
Loading data for S5
New data shape (2211, 257) Total Shape:  (6485, 257)
Loading data for S6
New data shape (2196, 257) Total Shape:  (8681, 257)
Loading data for S7
New data shape (2192, 257) Total Shape:  (10873, 257)
Loading data for S8
New data shape (2202, 257) Total Shape:  (13075, 257)
Loading data for S9
New data shape (2191, 257) Total Shape:  (15266, 257)
Loading data for S10
New data shape (2271, 257) Total Shape:  (17537, 257)
Loading data for S11
New data shape (2222, 257) Total Shape:  (19759, 257)
Loading data for S13
New data shape (2220, 257) Total Shape:  (21979, 257)
Loading data for S14
New data shape (2221, 257) Total Shape:  (24200, 257)
Loading data for S15
New data shape (2227, 257) Total Shape:  (26427, 257)
Loading data for S16
New data shape (2215, 257) Total Shape:  (28642, 257)
Loading data for S17
New data shape 

In [14]:
train_labels.value_counts()

0.0    16443
1.0    16443
2.0    16443
dtype: int64

In [15]:
print("Train data shape:", train_data.shape)
print("Train labels shape:", train_labels.shape)
print("Dimension:", train_labels.ndim)

Train data shape: (49329, 256)
Train labels shape: (49329, 1)
Dimension: 2


In [16]:
train_labels = pd.DataFrame(train_labels).to_numpy()
train_labels = train_labels.reshape(-1)

In [17]:
print("Train labels shape:", train_labels.shape)
print("Dimension:", train_labels.ndim)

Train labels shape: (49329,)
Dimension: 1


## Train and Validation data split

In [18]:
X_train, X_val, y_train, y_val = train_test_split(train_data, train_labels, test_size=0.3, random_state=42)

In [19]:
#Reshape train and validation data to (n_samples, 256, 1), where each sample is of size (256, 1)
X_train = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1], 1)
X_val = np.array(X_val).reshape(X_val.shape[0], X_val.shape[1], 1)

In [20]:
print('size of X_train:', X_train.shape)
print('size of X_val:', X_val.shape)

print('size of y_train:', y_train.shape)
print('size of y_val:', y_val.shape)

print('Train:', Counter(y_train), Counter(y_val))

size of X_train: (34530, 256, 1)
size of X_val: (14799, 256, 1)
size of y_train: (34530,)
size of y_val: (14799,)
Train: Counter({2.0: 11546, 0.0: 11500, 1.0: 11484}) Counter({1.0: 4959, 0.0: 4943, 2.0: 4897})


## Data Generation - Test

In [None]:
test_data, test_labels = generateData(test_subject)

Loading data for S3


In [None]:
test_labels.value_counts()

In [None]:
print("Test data shape:", test_data.shape)
print("Test labels shape:", test_labels.shape)
print("Dimension:", test_labels.ndim)

In [None]:
test_labels = pd.DataFrame(test_labels).to_numpy()
y_test = test_labels.reshape(-1)

In [None]:
print("Test labels shape:", y_test.shape)
print("Dimension:", y_test.ndim)

In [None]:
#Reshape test data to (n_samples, 256, 1), where each sample is of size (256, 1)
X_test = np.array(test_data).reshape(test_data.shape[0], test_data.shape[1], 1)

In [None]:
print('size of X_test:', X_test.shape)
print('size of y_test:', y_test.shape)

print('Test:', Counter(y_test))

## LSTM Model Architecture



In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv1D, BatchNormalization, MaxPool1D, Bidirectional, LSTM, Dropout
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.callbacks import LearningRateScheduler

In [None]:
reset_random_seeds()
lstm_model = Sequential()
lstm_model.add(Bidirectional(LSTM(64, input_shape= (X_train.shape[1],1), return_sequences = True)))
lstm_model.add(Bidirectional(LSTM(32)))
lstm_model.add(Flatten())
lstm_model.add(Dense(units = 128, activation='relu'))
lstm_model.add(Dense(units = 3, activation='softmax'))

lstm_model.compile(optimizer= 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
epochs=50
batch_size = 32

In [None]:
def step_decay(epoch):
  initial_lrate = 0.005
  drop = 0.6
  epochs_drop = 10.0
  lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))
  return lrate

lrate = LearningRateScheduler(step_decay)
callbacks_list = [lrate]

In [None]:
history = lstm_model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size, validation_data = (X_val, y_val), callbacks=callbacks_list, verbose=2)

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
t = f.suptitle('1D CNN Performance', fontsize=12)
f.subplots_adjust(top=0.85, wspace=0.3)

max_epoch = len(history.history['accuracy'])+1
epoch_list = list(range(1,max_epoch))
ax1.plot(epoch_list, history.history['accuracy'], label='Train Accuracy')
ax1.plot(epoch_list, history.history['val_accuracy'], label='Validation Accuracy')
ax1.set_xticks(np.arange(1, max_epoch, 5))
ax1.set_ylabel('Accuracy Value')
ax1.set_xlabel('Epoch')
ax1.set_title('Accuracy')
l1 = ax1.legend(loc="best")

ax2.plot(epoch_list, history.history['loss'], label='Train Loss')
ax2.plot(epoch_list, history.history['val_loss'], label='Validation Loss')
ax2.set_xticks(np.arange(1, max_epoch, 5))
ax2.set_ylabel('Loss Value')
ax2.set_xlabel('Epoch')
ax2.set_title('Loss')
l2 = ax2.legend(loc="best")

In [None]:
lstm_model.evaluate(X_test, y_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
y_test_preds = np.around(lstm_model.predict(X_test))
print(y_test_preds)

In [None]:
y_preds = np.argmax(y_test_preds, axis=1)

In [None]:
import sklearn.metrics as metrics

In [None]:
print(metrics.classification_report(y_test, y_preds))