In [2]:
import os
import mne
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
import scipy
from scipy.signal import resample
import warnings
warnings.filterwarnings("ignore")


In [None]:
SAMPLE_RATE = 128  # fs
SAMPLE_LEN = 128   # T

In [None]:
# root dir
root = 'PEARL-Neuro/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

## Labels

In [4]:
# Only 79 subjects have eeg data, and they are all healthy subjects
labels = np.empty(shape=(79,2), dtype='int32')
labels.shape

(79, 2)

In [5]:
sub_id = 1
for i, participant in enumerate(range(len(labels))):
    labels[i,0] = 0
    labels[i,1] = sub_id
    sub_id += 1

In [6]:
label_path = 'Processed/PEARL-Neuro-19/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [7]:
np.load('Processed/PEARL-Neuro-19/Label/label.npy')

array([[ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 0,  4],
       [ 0,  5],
       [ 0,  6],
       [ 0,  7],
       [ 0,  8],
       [ 0,  9],
       [ 0, 10],
       [ 0, 11],
       [ 0, 12],
       [ 0, 13],
       [ 0, 14],
       [ 0, 15],
       [ 0, 16],
       [ 0, 17],
       [ 0, 18],
       [ 0, 19],
       [ 0, 20],
       [ 0, 21],
       [ 0, 22],
       [ 0, 23],
       [ 0, 24],
       [ 0, 25],
       [ 0, 26],
       [ 0, 27],
       [ 0, 28],
       [ 0, 29],
       [ 0, 30],
       [ 0, 31],
       [ 0, 32],
       [ 0, 33],
       [ 0, 34],
       [ 0, 35],
       [ 0, 36],
       [ 0, 37],
       [ 0, 38],
       [ 0, 39],
       [ 0, 40],
       [ 0, 41],
       [ 0, 42],
       [ 0, 43],
       [ 0, 44],
       [ 0, 45],
       [ 0, 46],
       [ 0, 47],
       [ 0, 48],
       [ 0, 49],
       [ 0, 50],
       [ 0, 51],
       [ 0, 52],
       [ 0, 53],
       [ 0, 54],
       [ 0, 55],
       [ 0, 56],
       [ 0, 57],
       [ 0, 58],
       [ 0, 59

## Features

In [17]:
# Test for bad channels, sampling freq and shape
# only check the resting-state data
bad_channel_list, sampling_freq_list, data_shape_list = [], [], []
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg/')
        # print(sub_path)
        for file in os.listdir(sub_path):
            if '.vhdr' in file and 'rest' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_brainvision(file_path, preload=True)
                # print(raw.get_montage())
                # get bad channels
                # print(raw.info['ch_names'])
                bad_channel = raw.info['bads']
                bad_channel_list.append(bad_channel)
                # get sampling frequency
                sampling_freq = raw.info['sfreq']
                sampling_freq_list.append(sampling_freq)
                # get eeg data
                data = raw.get_data()
                data_shape = data.shape
                data_shape_list.append(data_shape)

Extracting parameters from PEARL-Neuro/sub-01\eeg/sub-01_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 661519  =      0.000 ...   661.519 secs...
Extracting parameters from PEARL-Neuro/sub-02\eeg/sub-02_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 637719  =      0.000 ...   637.719 secs...
Extracting parameters from PEARL-Neuro/sub-03\eeg/sub-03_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 670759  =      0.000 ...   670.759 secs...
Extracting parameters from PEARL-Neuro/sub-04\eeg/sub-04_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 704099  =      0.000 ...   704.099 secs...
Extracting parameters from PEARL-Neuro/sub-05\eeg/sub-05_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 644859  =      0.000 ...   644.859 secs...
Extracting parameters from PEARL-Neuro/sub-06\eeg/sub-06_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 632819  =      0.000 ..

In [18]:
# 0 bad channels
print(bad_channel_list)
# 500 Hz for all runs
print(sampling_freq_list)
# same number of channels & different timestamps
print(data_shape_list)    # channel number is inconsistent

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0]
[(127, 661520), (127, 637720), (127, 670760), (127, 704100), (

In [19]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

# split the EEG data into segments of length segment_length, dropping the last segment if it is shorter than segment_length
def split_eeg_segments(data, segment_length=128):
    T, C = data.shape
    num_segments = T // segment_length
    reshaped_data = data[:num_segments * segment_length].reshape(num_segments, segment_length, C)

    return reshaped_data

In [22]:
feature_path = 'Processed/PEARL-Neuro-19/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

sub_id = 1
for sub in os.listdir(root):
    if 'sub-' in sub:
        li_sub = []
        sub_path = os.path.join(root, sub, 'eeg/')
        print(sub_path)
        for file in os.listdir(sub_path):
            if '.vhdr' in file and 'rest' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_brainvision(file_path, preload=True)
                freq = raw.info['sfreq']
                # For here, T7, T8 is close to T3, T4; P7, P8 is the same to T5, T6;
                # So we use T7, T8, P7, P8 to replace T3, T4, T5, T6
                standard_channels = ['Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T7', 'C3', 
                                     'Cz', 'C4', 'T8', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'O2']
                raw.pick(standard_channels)
                data = raw.get_data().T
                print("Raw data shape ", data.shape)
                data = resample_time_series(data, freq, SAMPLE_RATE)
                feature_array = split_eeg_segments(data, SAMPLE_LEN)
                print("Downsampling and segmented data shape ", feature_array.shape)
                np.save(feature_path + '/feature_{:02d}.npy'.format(sub_id), feature_array)
        sub_id += 1
    print("-------------------------------------\n")

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

PEARL-Neuro/sub-01\eeg/
Extracting parameters from PEARL-Neuro/sub-01\eeg/sub-01_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 661519  =      0.000 ...   661.519 secs...
Raw data shape  (661520, 19)
Downsampling and segmented data shape  (661, 128, 19)
-------------------------------------

PEARL-Neuro/sub-02\eeg/
Extracting parameters from PEARL-Neuro/sub-02\eeg/sub-02_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 637719  =      0.000 ...   637.719 secs...
Raw data shape  (637720, 19)
Downsampling and segmented data shape  (637, 128, 19)
-------------------------------------

PEARL-Neuro/sub-03\eeg/
Extracting parameters from PEARL-Neuro/sub-03\eeg/sub-03_task-rest_eeg.vhdr...
Setting channel info structure.

In [23]:
# Test the saved npy file
# example

path = 'Processed/PEARL-Neuro-19/Feature/'

for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)

(661, 128, 19)
(637, 128, 19)
(670, 128, 19)
(704, 128, 19)
(644, 128, 19)
(632, 128, 19)
(734, 128, 19)
(648, 128, 19)
(693, 128, 19)
(640, 128, 19)
(647, 128, 19)
(656, 128, 19)
(636, 128, 19)
(674, 128, 19)
(646, 128, 19)
(629, 128, 19)
(669, 128, 19)
(639, 128, 19)
(617, 128, 19)
(626, 128, 19)
(756, 128, 19)
(640, 128, 19)
(635, 128, 19)
(660, 128, 19)
(625, 128, 19)
(698, 128, 19)
(644, 128, 19)
(689, 128, 19)
(664, 128, 19)
(628, 128, 19)
(646, 128, 19)
(625, 128, 19)
(658, 128, 19)
(726, 128, 19)
(652, 128, 19)
(655, 128, 19)
(730, 128, 19)
(664, 128, 19)
(650, 128, 19)
(641, 128, 19)
(643, 128, 19)
(637, 128, 19)
(637, 128, 19)
(727, 128, 19)
(659, 128, 19)
(757, 128, 19)
(657, 128, 19)
(636, 128, 19)
(645, 128, 19)
(630, 128, 19)
(835, 128, 19)
(636, 128, 19)
(675, 128, 19)
(650, 128, 19)
(628, 128, 19)
(661, 128, 19)
(642, 128, 19)
(648, 128, 19)
(807, 128, 19)
(636, 128, 19)
(644, 128, 19)
(631, 128, 19)
(666, 128, 19)
(640, 128, 19)
(627, 128, 19)
(663, 128, 19)
(664, 128,