In [2]:
import os
import mne
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
import scipy
from scipy.signal import resample
import warnings
warnings.filterwarnings("ignore")

In [None]:
SAMPLE_RATE = 128  # fs
SAMPLE_LEN = 128   # T

## Labels

In [3]:
# root dir
root = 'REEG-PD/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

Unnamed: 0,participant_id,GROUP,ID,EEG,AGE,GENDER,MOCA,UPDRS,TYPE
0,sub-001,PD,1001,PD1001,80,M,19,28.0,1
1,sub-002,PD,1011,PD1011,81,M,17,25.0,1
2,sub-003,PD,1021,PD1021,68,F,26,10.0,1
3,sub-004,PD,1031,PD1031,80,M,22,10.0,1
4,sub-005,PD,1041,PD1041,56,M,21,13.0,1
...,...,...,...,...,...,...,...,...,...
144,sub-145,Control,1451,Control1451,64,F,27,,0
145,sub-146,Control,1461,Control1461,71,M,30,,0
146,sub-147,Control,1471,Control1471,78,M,27,,0
147,sub-148,Control,1481,Control1481,68,F,27,,0


In [4]:
labels = np.empty(shape=(participants.shape[0],2), dtype='int32')
labels.shape

(149, 2)

In [5]:
for i, participant in enumerate(participants.values):
    sub_id = int(participant[0].split('-')[1])
    label = int(participant[-1])
    labels[i,0] = label
    labels[i,1] = sub_id

In [6]:
label_path = 'Processed/REEG-PD/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [7]:
np.load('Processed/REEG-PD/Label/label.npy')

array([[  1,   1],
       [  1,   2],
       [  1,   3],
       [  1,   4],
       [  1,   5],
       [  1,   6],
       [  1,   7],
       [  1,   8],
       [  1,   9],
       [  1,  10],
       [  1,  11],
       [  1,  12],
       [  1,  13],
       [  1,  14],
       [  1,  15],
       [  1,  16],
       [  1,  17],
       [  1,  18],
       [  1,  19],
       [  1,  20],
       [  1,  21],
       [  1,  22],
       [  1,  23],
       [  1,  24],
       [  1,  25],
       [  1,  26],
       [  1,  27],
       [  1,  28],
       [  1,  29],
       [  1,  30],
       [  1,  31],
       [  1,  32],
       [  1,  33],
       [  1,  34],
       [  1,  35],
       [  1,  36],
       [  1,  37],
       [  1,  38],
       [  1,  39],
       [  1,  40],
       [  1,  41],
       [  1,  42],
       [  1,  43],
       [  1,  44],
       [  1,  45],
       [  1,  46],
       [  1,  47],
       [  1,  48],
       [  1,  49],
       [  1,  50],
       [  1,  51],
       [  1,  52],
       [  1,

## Features

In [10]:
# Test for bad channels, sampling freq and shape
bad_channel_list, sampling_freq_list, data_shape_list = [], [], []
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg/')
        # print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                # print(raw.get_montage())
                # get bad channels
                # print(raw.info)
                bad_channel = raw.info['bads']
                bad_channel_list.append(bad_channel)
                # get sampling frequency
                sampling_freq = raw.info['sfreq']
                sampling_freq_list.append(sampling_freq)
                # get eeg data
                data = raw.get_data()
                data_shape = data.shape
                data_shape_list.append(data_shape)

Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-001\eeg\sub-001_task-Rest_eeg.fdt
Reading 0 ... 140829  =      0.000 ...   281.658 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-002\eeg\sub-002_task-Rest_eeg.fdt
Reading 0 ... 163019  =      0.000 ...   326.038 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-003\eeg\sub-003_task-Rest_eeg.fdt
Reading 0 ... 126179  =      0.000 ...   252.358 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-004\eeg\sub-004_task-Rest_eeg.fdt
Reading 0 ... 131979  =      0.000 ...   263.958 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-005\eeg\sub-005_task-Rest_eeg.fdt
Reading 0 ... 124769  =      0.000 ...   249.538 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-006\eeg\sub-006_task-Rest_eeg.fdt
Reading 0 ... 131029  =  

In [11]:
# 0 bad channels
print(bad_channel_list)
# 500 Hz for all runs
print(sampling_freq_list)
# same number of channels & different timestamps
print(data_shape_list)    # channel number is inconsistent

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500

In [30]:
# channel number not consistent, take the common channels
common_channels = []
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg/')
        # print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                current_channels = set(raw.info['ch_names'])
                if not common_channels:
                    common_channels = current_channels
                else:
                    common_channels &= current_channels
common_channels = list(common_channels)
print(common_channels)
print("Common channels number: ", len(common_channels))

Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-001\eeg\sub-001_task-Rest_eeg.fdt
Reading 0 ... 140829  =      0.000 ...   281.658 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-002\eeg\sub-002_task-Rest_eeg.fdt
Reading 0 ... 163019  =      0.000 ...   326.038 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-003\eeg\sub-003_task-Rest_eeg.fdt
Reading 0 ... 126179  =      0.000 ...   252.358 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-004\eeg\sub-004_task-Rest_eeg.fdt
Reading 0 ... 131979  =      0.000 ...   263.958 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-005\eeg\sub-005_task-Rest_eeg.fdt
Reading 0 ... 124769  =      0.000 ...   249.538 secs...
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-006\eeg\sub-006_task-Rest_eeg.fdt
Reading 0 ... 131029  =  

In [31]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

# split the EEG data into segments of length segment_length, dropping the last segment if it is shorter than segment_length
def split_eeg_segments(data, segment_length=128):
    T, C = data.shape
    num_segments = T // segment_length
    reshaped_data = data[:num_segments * segment_length].reshape(num_segments, segment_length, C)

    return reshaped_data

In [32]:
feature_path = 'Processed/REEG-PD/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

sub_id = 1
for sub in os.listdir(root):
    if 'sub-' in sub:
        li_sub = []
        sub_path = os.path.join(root, sub, 'eeg/')
        print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                # get 63 channels 
                raw.pick(common_channels)
                data = raw.get_data().T
                print("Raw data shape ", data.shape)
                data = resample_time_series(data, 500, SAMPLE_RATE)
                feature_array = split_eeg_segments(data, SAMPLE_LEN)
                print("Downsampling and segmented data shape ", feature_array.shape)
                np.save(feature_path + '/feature_{:03d}.npy'.format(sub_id), feature_array)
        sub_id += 1
    print("-------------------------------------\n")

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

REEG-PD/sub-001\eeg/
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-001\eeg\sub-001_task-Rest_eeg.fdt
Reading 0 ... 140829  =      0.000 ...   281.658 secs...
Raw data shape  (140830, 60)
Downsampling and segmented data shape  (281, 128, 60)
-------------------------------------

REEG-PD/sub-002\eeg/
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-002\eeg\sub-002_task-Rest_eeg.fdt
Reading 0 ... 163019  =      0.000 ...   326.038 secs...
Raw data shape  (163020, 60)
Downsampling and segmented data shape  (326, 128, 60)
-------------------------------------

REEG-PD/sub-003\eeg/
Reading C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-PD\REEG-PD\sub-003\eeg\sub-003_task-Rest_eeg.fdt


In [33]:
# Test the saved npy file
# example

path = 'Processed/REEG-PD/Feature/'

for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)

(281, 128, 60)
(326, 128, 60)
(252, 128, 60)
(263, 128, 60)
(249, 128, 60)
(262, 128, 60)
(239, 128, 60)
(234, 128, 60)
(248, 128, 60)
(342, 128, 60)
(156, 128, 60)
(120, 128, 60)
(123, 128, 60)
(125, 128, 60)
(120, 128, 60)
(121, 128, 60)
(185, 128, 60)
(151, 128, 60)
(186, 128, 60)
(181, 128, 60)
(159, 128, 60)
(153, 128, 60)
(169, 128, 60)
(151, 128, 60)
(163, 128, 60)
(142, 128, 60)
(121, 128, 60)
(154, 128, 60)
(203, 128, 60)
(176, 128, 60)
(181, 128, 60)
(151, 128, 60)
(131, 128, 60)
(174, 128, 60)
(202, 128, 60)
(121, 128, 60)
(129, 128, 60)
(136, 128, 60)
(165, 128, 60)
(137, 128, 60)
(150, 128, 60)
(146, 128, 60)
(131, 128, 60)
(139, 128, 60)
(145, 128, 60)
(131, 128, 60)
(130, 128, 60)
(238, 128, 60)
(121, 128, 60)
(124, 128, 60)
(124, 128, 60)
(130, 128, 60)
(126, 128, 60)
(125, 128, 60)
(124, 128, 60)
(139, 128, 60)
(131, 128, 60)
(125, 128, 60)
(135, 128, 60)
(146, 128, 60)
(125, 128, 60)
(125, 128, 60)
(156, 128, 60)
(161, 128, 60)
(123, 128, 60)
(138, 128, 60)
(128, 128,