In [1]:
from scipy.io import loadmat
from sklearn.preprocessing import MinMaxScaler
from scipy import signal
import numpy as np
import math
import os

In [2]:
from scipy.io import loadmat
from sklearn.preprocessing import MinMaxScaler
from scipy import signal
import numpy as np
import math
import os
from scipy.stats import kurtosis


def calculate_time_domain_features(segment):
    mean_value = np.mean(segment)
    std_dev = np.std(segment)
    var = np.var(segment, ddof=1)
    kurt = kurtosis(segment)
    #max_value = np.max(segment)
    return mean_value, std_dev, var, kurt

def load_data(data_dir): 
    fs = 128  # Adjusted to match DEAP dataset's sampling frequency
    fStart = [4, 8, 14, 31]
    fEnd = [7, 13, 30, 45]
    selected_channels = range(32)  # Adjusted to include only the first 32 channels
    seconds_to_exclude = 10

    datasets_X, datasets_y = [], []

    for filename_data in os.listdir(data_dir):
        if filename_data.endswith(".mat"):
            # Load data and labels
            mat_data = loadmat(os.path.join(data_dir, filename_data))
            data = mat_data['data']  # Shape: (40, 40, 8064)
            labels = mat_data['labels']  # Shape: (40, 4)

            for trial_index in range(data.shape[0]):
                dataset_X = []
                trial_data = data[trial_index, selected_channels, :]  # Selecting only the first 32 channels

                # Exclude the first and last 10 seconds
                start_index = seconds_to_exclude * fs
                end_index = -seconds_to_exclude * fs

                trial_data = trial_data[:, start_index:end_index]

                for band_index, band in enumerate(fStart):
                    b, a = signal.butter(4, [fStart[band_index]/fs, fEnd[band_index]/fs], 'bandpass')
                    filtered_data = signal.filtfilt(b, a, trial_data)
                    features = []

                    for lead in selected_channels:
                        feature = []
                        for de_index in range(0, filtered_data.shape[1] - fs, fs):
                            data_segment = filtered_data[lead, de_index: de_index + fs]
                            mean_value, std_dev, var, kurt = calculate_time_domain_features(data_segment)
                            # Append features to the list
                            feature.append([mean_value, std_dev, var, kurt])

                        features.append(feature)

                    features = np.array(features)
                    dataset_X.append(features)

                dataset_X = np.array(dataset_X)
                dataset_X = dataset_X.reshape((dataset_X.shape[0], dataset_X.shape[1], -1))

                datasets_X.append(dataset_X)
                datasets_y.append(labels[trial_index])

    datasets_X, datasets_y = np.array(datasets_X), np.array(datasets_y)

    return datasets_X, datasets_y

data_dir = "E:/STUDY/Publications/Thesis/Brain Emotion Detection/Dataset/DEAP/data_preprocessed_matlab/"

datasets_X, datasets_y = load_data(data_dir)
print(datasets_X.shape)
print(datasets_y.shape)

(1280, 4, 32, 168)
(1280, 4)


In [3]:
#import numpy as np

# Reshape datasets_X to (40, 4, 32, 42, 4)
#reshaped_datasets_X = datasets_X.reshape((40, 4, 32, 42, 4))

# Verify the new shape
#print(reshaped_datasets_X.shape)


In [4]:
new_datasets_X = np.copy(datasets_X)
new_datasets_X = np.transpose(new_datasets_X, (0, 2, 3, 1))
print(new_datasets_X.shape)

(1280, 32, 168, 4)


In [5]:
np.save('DEAP_Time_Domain_X.npy', new_datasets_X)
np.save('DEAP_Time_Domain_y.npy', datasets_y)