In [1]:
import os
import mne
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
import scipy
from scipy.signal import resample
import warnings
warnings.filterwarnings("ignore")

In [None]:
SAMPLE_RATE = 128  # fs
SAMPLE_LEN = 128   # T

In [2]:
# root dir
root = 'REEG-BACA/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

Unnamed: 0,participant_id,sex,age,handedness,session1,late_ses1,session2,late_ses2
0,sub-001,F,60,right,yes,0,yes,0.0
1,sub-002,M,67,right,yes,0,no,
2,sub-003,F,44,right,yes,0,no,
3,sub-004,F,24,right,yes,0,no,
4,sub-005,F,48,right,yes,0,yes,5.0
...,...,...,...,...,...,...,...,...
603,sub-604,M,29,right,yes,0,no,
604,sub-605,M,51,right,yes,0,no,
605,sub-606,F,61,right,yes,0,yes,0.0
606,sub-607,M,66,right,yes,0,no,


In [3]:
# Test for bad channels, sampling freq and shape
bad_channel_list, sampling_freq_list, data_shape_list = [], [], []
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub)
        for session in os.listdir(sub_path): 
            if 'sessions' in session:  # skip json and tsv files
                continue
            session_path = os.path.join(sub_path, session, 'eeg/')
            print(session_path)
            for file in os.listdir(session_path):
                if '.edf' in file:
                    file_path = os.path.join(session_path, file)
                    raw = mne.io.read_raw_edf(file_path, preload=True)
                    # print(raw.get_montage())
                    # get bad channels
                    bad_channel = raw.info['bads']
                    bad_channel_list.append(bad_channel)
                    # get sampling frequency
                    sampling_freq = raw.info['sfreq']
                    sampling_freq_list.append(sampling_freq)
                    # get eeg data
                    data = raw.get_data()
                    data_shape = data.shape
                    data_shape_list.append(data_shape)
# Subject 29 and 104 cannot be loaded

REEG-BACA/sub-001\ses-1\eeg/
Extracting EDF parameters from C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-BACA\REEG-BACA\sub-001\ses-1\eeg\sub-001_ses-1_task-EyesClosed_acq-post_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 192999  =      0.000 ...   192.999 secs...
Extracting EDF parameters from C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-BACA\REEG-BACA\sub-001\ses-1\eeg\sub-001_ses-1_task-EyesClosed_acq-pre_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 183999  =      0.000 ...   183.999 secs...
Extracting EDF parameters from C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-BACA\REEG-BACA\sub-001\ses-1\eeg\sub-001_ses-1_task-EyesOpen_acq-post_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 184999  =      0.000 ...   184.999 secs...
Extracting EDF parameters from C:\User

In [4]:
# 0 bad channels
print(np.unique(bad_channel_list))
# 500 Hz for all runs
print(np.unique(sampling_freq_list))
# same number of channels & different timestamps
print(np.unique([i[0] for i in data_shape_list]))   # channel number is consistently 65
print(len(data_shape_list) )

[]
[1000.]
[65]
3264


## Labels

In [5]:
labels = np.empty(shape=(participants.shape[0],2), dtype='int32')
labels.shape

(608, 2)

In [6]:
sub_id = 1 # count from 1 
for i in range(len(labels)):
    labels[i,0] = 0  # according to the paper, all the subjects are healthy controls
    labels[i,1] = sub_id
    sub_id += 1

In [7]:
label_path = 'Processed/REEG-BACA/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [8]:
np.load('Processed/REEG-BACA/Label/label.npy')

array([[  0,   1],
       [  0,   2],
       [  0,   3],
       ...,
       [  0, 606],
       [  0, 607],
       [  0, 608]])

## Features

In [9]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

# split the EEG data into segments of length segment_length, dropping the last segment if it is shorter than segment_length
def split_eeg_segments(data, segment_length=128):
    T, C = data.shape
    num_segments = T // segment_length
    reshaped_data = data[:num_segments * segment_length].reshape(num_segments, segment_length, C)

    return reshaped_data

In [11]:
feature_path = 'Processed/REEG-BACA/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

sub_id = 1
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub)
        feature_list = []
        for session in os.listdir(sub_path): 
            if 'sessions' in session:  # skip json and tsv files
                continue
            session_path = os.path.join(sub_path, session, 'eeg/')
            print(session_path)
            for file in os.listdir(session_path):
                if '.edf' in file:
                    file_path = os.path.join(session_path, file)
                    raw = mne.io.read_raw_edf(file_path, preload=True)
                    freq = raw.info['sfreq']
                    # get eeg data
                    data = raw.get_data().T
                    print("Raw data shape ", data.shape)
                    data = resample_time_series(data, freq, SAMPLE_RATE)
                    session_feature_array = data.reshape((-1, SAMPLE_LEN, 65))   # 65 channels
                    print("Downsampling and segmented session data shape ", session_feature_array.shape)
                    feature_list.append(session_feature_array)
        feature_array = np.concatenate(feature_list, axis=0)
        print("Subject data shape ", feature_array.shape)
        np.save(feature_path + '/feature_{:03d}.npy'.format(sub_id), feature_array)
        sub_id += 1
    print("-------------------------------------\n")

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

REEG-BACA/sub-001\ses-1\eeg/
Extracting EDF parameters from C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-BACA\REEG-BACA\sub-001\ses-1\eeg\sub-001_ses-1_task-EyesClosed_acq-post_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 192999  =      0.000 ...   192.999 secs...
Raw data shape  (193000, 65)
Downsampling and segmented session data shape  (193, 128, 65)
Extracting EDF parameters from C:\Users\24700\PycharmProjects\DataPreprocessingYihe\REEG-BACA\REEG-BACA\sub-001\ses-1\eeg\sub-001_ses-1_task-EyesClosed_acq-pre_eeg.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Reading 0 ... 183999  =      0.000 ...   183.999 secs...
Raw data shape  (184000, 65)
Downsampling and segmented sessio

In [12]:
# Test the saved npy file
# example

path = 'Processed/REEG-BACA/Feature/'

for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)

(1487, 128, 65)
(737, 128, 65)
(777, 128, 65)
(786, 128, 65)
(1494, 128, 65)
(737, 128, 65)
(747, 128, 65)
(856, 128, 65)
(811, 128, 65)
(740, 128, 65)
(735, 128, 65)
(751, 128, 65)
(737, 128, 65)
(737, 128, 65)
(744, 128, 65)
(777, 128, 65)
(732, 128, 65)
(742, 128, 65)
(755, 128, 65)
(744, 128, 65)
(765, 128, 65)
(734, 128, 65)
(743, 128, 65)
(730, 128, 65)
(773, 128, 65)
(744, 128, 65)
(730, 128, 65)
(1505, 128, 65)
(734, 128, 65)
(749, 128, 65)
(728, 128, 65)
(736, 128, 65)
(1471, 128, 65)
(737, 128, 65)
(739, 128, 65)
(729, 128, 65)
(825, 128, 65)
(1472, 128, 65)
(774, 128, 65)
(749, 128, 65)
(1475, 128, 65)
(744, 128, 65)
(729, 128, 65)
(1477, 128, 65)
(755, 128, 65)
(1544, 128, 65)
(1502, 128, 65)
(1462, 128, 65)
(1510, 128, 65)
(1494, 128, 65)
(1479, 128, 65)
(1503, 128, 65)
(1492, 128, 65)
(1499, 128, 65)
(730, 128, 65)
(743, 128, 65)
(739, 128, 65)
(740, 128, 65)
(1471, 128, 65)
(1477, 128, 65)
(736, 128, 65)
(729, 128, 65)
(731, 128, 65)
(736, 128, 65)
(751, 128, 65)
(728, 1