<a href="https://colab.research.google.com/github/vipulSharma18/Automatic-Emotion-Recognition-on-DEAP-Dataset/blob/main/DataAugmentation/Windowing_of_Time_Series.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Add Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Importing Relevant Libraries

In [2]:
import pandas as pd
import tensorflow as tf
import numpy as np
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Softmax
from sklearn.metrics import classification_report
from keras.utils.vis_utils import plot_model
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pickle
from scipy.stats import zscore
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import gc

# GPU Check

In [3]:
print(tf.version.VERSION)
print(tf.config.experimental.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

2.4.1
[]
Num GPUs Available:  0


# Data Augmentation

## Load Data from .dat files into a np array of 1280 x 32 x 8064 size

In [5]:
all_sub_data = []
subjects_list = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32']
for sub in subjects_list:
    path = "/content/drive/MyDrive/major project/data_preprocessed_python/s"+sub+".dat"
    x = pickle.load(open(path, 'rb'), encoding = 'latin1')
    sub_data = x['data']
    sub_eeg = sub_data[:, :32, :]  #indexing EEG signals from physiological data
    all_sub_data.extend(sub_eeg)
    gc.collect()
gc.collect()
all_sub_data = np.array(all_sub_data)

In [6]:
all_sub_data.shape

(1280, 32, 8064)

## Z-score normalization of each EEG signal, resultant np.array is all_sub_data

In [7]:
for sub in range(all_sub_data.shape[0]):
  all_sub_data[sub] = zscore(all_sub_data[sub], axis = 1)  #zscore normalize each channel
gc.collect()

107

## Label Loading into np array of 1280 x 1 named, sub_labels

In [8]:
labels = pd.read_excel("/content/drive/MyDrive/major project/metadata/Labels.xls")
sub_labels = labels["Valence-Arousal Model Quadrant"].astype('int')
sub_labels
gc.collect()

3058

## One-Hot encoding of labels  
> sub_labels: (1280,4)

In [9]:
lb = preprocessing.LabelBinarizer()
sub_labels = lb.fit_transform(sub_labels)
print(lb.classes_)
print(sub_labels.shape)
print(sub_labels)

[0 1 2 3]
(1280, 4)
[[0 0 0 1]
 [0 0 0 1]
 [0 0 0 1]
 ...
 [0 1 0 0]
 [0 1 0 0]
 [0 0 1 0]]


In [10]:
sub_labels.shape

(1280, 4)

In [11]:
gc.collect()

85

## Generating Train Test Splits,  
> X_train, y_train: (1152,32,8064), (1152,4)  
> X_test, y_test: (128,32,8064), (128,4)  

In [12]:
X_train, X_test, y_train, y_test = train_test_split(all_sub_data, sub_labels, test_size = 0.1, random_state = 42,shuffle = True)

In [13]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1152, 32, 8064), (1152, 4), (128, 32, 8064), (128, 4))

In [14]:
gc.collect()

85

## Repetition of Labels for Windowing of training data  

> y_train_12, y_train_6, y_train_4 of shapes(?,4): 13824, 6912 and 4608 respectively 


In [15]:
#12,6 and 4 subsignals are generated from 8064 length EEG signal, labels repeated accordingly
y_train_12 = np.repeat(y_train, 12, axis = 0)
y_train_6 = np.repeat(y_train, 6, axis = 0)
y_train_4 = np.repeat(y_train, 4, axis = 0)
gc.collect()
print(y_train_12.shape, y_train_6.shape, y_train_4.shape)

(13824, 4) (6912, 4) (4608, 4)


## Windowing of EEG singals Channel wise

In [16]:
Channel_wise = np.transpose(X_train, (1,0,2))

In [17]:
gc.collect()
print(Channel_wise[0].shape)

(1152, 8064)


In [29]:
def process_input(instances, sub_signals):
  #instances must be channel wise of shape (32, -1, 8064)
  samples = int(8064/sub_signals)
  transformed = []
  for i in range(instances.shape[0]):
    transformed.append(np.reshape(instances[i], (-1,samples,1)))
  transformed = np.array(transformed)
  print(transformed.shape, 'is the shape obtained.')
  gc.collect()
  return transformed

### 12 sub signals of length 672 each, total 13824 instances

In [30]:
channel_wise_12 = process_input(Channel_wise, 12)
np.save("/content/drive/MyDrive/major project/data_augmentation/channel_wise_12.npy", channel_wise_12)
del(channel_wise_12)
gc.collect()

(32, 13824, 672, 1) is the shape obtained.


0

### 6 sub signals of length 1344 each, total 6912 instances

In [31]:
channel_wise_6 = process_input(Channel_wise, 6)
np.save("/content/drive/MyDrive/major project/data_augmentation/channel_wise_6.npy", channel_wise_6)
del(channel_wise_6)
gc.collect()

(32, 6912, 1344, 1) is the shape obtained.


0

### 4 sub signals of length 2016 each, total 4608 instances

In [32]:
channel_wise_4 = process_input(Channel_wise, 4)
np.save("/content/drive/MyDrive/major project/data_augmentation/channel_wise_4.npy", channel_wise_4)
del(channel_wise_4)
gc.collect()

(32, 4608, 2016, 1) is the shape obtained.


0

In [34]:
gc.collect()

0