In [1]:
import os
import mne
import numpy as np
import pandas as pd
from scipy import interpolate
import matplotlib.pyplot as plt
import scipy
from scipy.signal import resample
import warnings
warnings.filterwarnings("ignore")


In [None]:
SAMPLE_RATE = 128  # fs
SAMPLE_LEN = 128   # T

In [2]:
# root dir
root = 'PEARL-Neuro/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

Unnamed: 0,participant_id,second_phase,session_order,APOE_rs429358,APOE_rs7412,APOE_haplotype,PICALM_rs3851179,age,sex,education,...,lymphocytes_%,monocytes_%,eosinophils_%,basophils_%,total_cholesterol,cholesterol_HDL,non-HDL_cholesterol,LDL_cholesterol,triglycerides,HSV_r
0,sub-01,1,1.0,T/T,C/C,e3/e3,A/A,59,0,3.0,...,26.6,10.4,0.8,0.3,174.3,37.9,136.4,100.48,179.6,1.0
1,sub-02,1,1.0,T/T,C/C,e3/e3,G/A,56,0,3.0,...,30.9,12.2,2.9,1.6,163.4,46.1,117.3,84.56,163.7,1.0
2,sub-03,1,0.0,T/T,C/C,e3/e3,G/A,52,0,3.0,...,36.1,9.0,2.1,0.6,152.9,43.3,109.6,100.88,43.6,1.0
3,sub-04,1,0.0,T/T,C/C,e3/e3,A/A,52,1,3.0,...,35.5,12.1,1.8,0.8,253.8,83.2,170.6,154.30,81.5,0.0
4,sub-05,1,0.0,T/T,C/C,e3/e3,G/A,52,1,3.0,...,32.9,8.1,1.2,0.9,283.1,66.1,217.0,188.28,143.6,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,sub-95,0,,T/T,C/C,e3/e3,G/G,51,0,1.0,...,,,,,,,,,,
188,sub-96,0,,T/T,C/C,e3/e3,G/A,55,1,3.0,...,,,,,,,,,,
189,sub-97,0,,T/T,C/C,e3/e3,G/A,58,1,3.0,...,,,,,,,,,,
190,sub-98,0,,T/T,C/T,e3/e2,G/A,50,1,3.0,...,,,,,,,,,,


## Labels

In [3]:
# Only 80 subjects have eeg data, and they are all healthy subjects
labels = np.empty(shape=(80,2), dtype='int32')
labels.shape

(80, 2)

In [4]:
sub_id = 1
for i, participant in enumerate(range(len(labels))):
    labels[i,0] = 0
    labels[i,1] = sub_id
    sub_id += 1

In [5]:
label_path = 'Processed/PEARL-Neuro/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [6]:
np.load('Processed/PEARL-Neuro/Label/label.npy')

array([[ 0,  1],
       [ 0,  2],
       [ 0,  3],
       [ 0,  4],
       [ 0,  5],
       [ 0,  6],
       [ 0,  7],
       [ 0,  8],
       [ 0,  9],
       [ 0, 10],
       [ 0, 11],
       [ 0, 12],
       [ 0, 13],
       [ 0, 14],
       [ 0, 15],
       [ 0, 16],
       [ 0, 17],
       [ 0, 18],
       [ 0, 19],
       [ 0, 20],
       [ 0, 21],
       [ 0, 22],
       [ 0, 23],
       [ 0, 24],
       [ 0, 25],
       [ 0, 26],
       [ 0, 27],
       [ 0, 28],
       [ 0, 29],
       [ 0, 30],
       [ 0, 31],
       [ 0, 32],
       [ 0, 33],
       [ 0, 34],
       [ 0, 35],
       [ 0, 36],
       [ 0, 37],
       [ 0, 38],
       [ 0, 39],
       [ 0, 40],
       [ 0, 41],
       [ 0, 42],
       [ 0, 43],
       [ 0, 44],
       [ 0, 45],
       [ 0, 46],
       [ 0, 47],
       [ 0, 48],
       [ 0, 49],
       [ 0, 50],
       [ 0, 51],
       [ 0, 52],
       [ 0, 53],
       [ 0, 54],
       [ 0, 55],
       [ 0, 56],
       [ 0, 57],
       [ 0, 58],
       [ 0, 59

## Features

In [7]:
# Test for bad channels, sampling freq and shape
# only check the resting-state data
bad_channel_list, sampling_freq_list, data_shape_list = [], [], []
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg/')
        # print(sub_path)
        for file in os.listdir(sub_path):
            if '.vhdr' in file and 'rest' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_brainvision(file_path, preload=True)
                # print(raw.get_montage())
                # get bad channels
                # print(raw.info['ch_names'])
                bad_channel = raw.info['bads']
                bad_channel_list.append(bad_channel)
                # get sampling frequency
                sampling_freq = raw.info['sfreq']
                sampling_freq_list.append(sampling_freq)
                # get eeg data
                data = raw.get_data()
                data_shape = data.shape
                data_shape_list.append(data_shape)

Extracting parameters from PEARL-Neuro/sub-01\eeg/sub-01_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 661519  =      0.000 ...   661.519 secs...
Extracting parameters from PEARL-Neuro/sub-02\eeg/sub-02_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 637719  =      0.000 ...   637.719 secs...
Extracting parameters from PEARL-Neuro/sub-03\eeg/sub-03_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 670759  =      0.000 ...   670.759 secs...
Extracting parameters from PEARL-Neuro/sub-04\eeg/sub-04_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 704099  =      0.000 ...   704.099 secs...
Extracting parameters from PEARL-Neuro/sub-05\eeg/sub-05_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 644859  =      0.000 ...   644.859 secs...
Extracting parameters from PEARL-Neuro/sub-06\eeg/sub-06_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 632819  =      0.000 ..

In [8]:
# 0 bad channels
print(bad_channel_list)
# 500 Hz for all runs
print(sampling_freq_list)
# same number of channels & different timestamps
print(data_shape_list)    # channel number is inconsistent

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0]
[(127, 661520), (127, 637720), (127, 670760), (127, 704100), (

In [9]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

# split the EEG data into segments of length segment_length, dropping the last segment if it is shorter than segment_length
def split_eeg_segments(data, segment_length=128):
    T, C = data.shape
    num_segments = T // segment_length
    reshaped_data = data[:num_segments * segment_length].reshape(num_segments, segment_length, C)

    return reshaped_data

In [10]:
feature_path = 'Processed/PEARL-Neuro/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

sub_id = 1
for sub in os.listdir(root):
    if 'sub-' in sub:
        li_sub = []
        sub_path = os.path.join(root, sub, 'eeg/')
        print(sub_path)
        for file in os.listdir(sub_path):
            if '.vhdr' in file and 'rest' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_brainvision(file_path, preload=True)
                freq = raw.info['sfreq']
                data = raw.get_data().T
                print("Raw data shape ", data.shape)
                data = resample_time_series(data, freq, SAMPLE_RATE)
                feature_array = split_eeg_segments(data, SAMPLE_LEN)
                print("Downsampling and segmented data shape ", feature_array.shape)
                np.save(feature_path + '/feature_{:02d}.npy'.format(sub_id), feature_array)
        sub_id += 1
    print("-------------------------------------\n")

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

-------------------------------------

PEARL-Neuro/sub-01\eeg/
Extracting parameters from PEARL-Neuro/sub-01\eeg/sub-01_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 661519  =      0.000 ...   661.519 secs...
Raw data shape  (661520, 127)
Downsampling and segmented data shape  (661, 128, 127)
-------------------------------------

PEARL-Neuro/sub-02\eeg/
Extracting parameters from PEARL-Neuro/sub-02\eeg/sub-02_task-rest_eeg.vhdr...
Setting channel info structure...
Reading 0 ... 637719  =      0.000 ...   637.719 secs...
Raw data shape  (637720, 127)
Downsampling and segmented data shape  (637, 128, 127)
-------------------------------------

PEARL-Neuro/sub-03\eeg/
Extracting parameters from PEARL-Neuro/sub-03\eeg/sub-03_task-rest_eeg.vhdr...
Setting channel info struct

In [11]:
# Test the saved npy file
# example

path = 'Processed/PEARL-Neuro/Feature/'

for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)

(661, 128, 127)
(637, 128, 127)
(670, 128, 127)
(704, 128, 127)
(644, 128, 127)
(632, 128, 127)
(734, 128, 127)
(648, 128, 127)
(693, 128, 127)
(640, 128, 127)
(647, 128, 127)
(656, 128, 127)
(636, 128, 127)
(674, 128, 127)
(646, 128, 127)
(629, 128, 127)
(669, 128, 127)
(639, 128, 127)
(617, 128, 127)
(626, 128, 127)
(756, 128, 127)
(640, 128, 127)
(635, 128, 127)
(660, 128, 127)
(625, 128, 127)
(698, 128, 127)
(644, 128, 127)
(689, 128, 127)
(664, 128, 127)
(628, 128, 127)
(646, 128, 127)
(625, 128, 127)
(658, 128, 127)
(726, 128, 127)
(652, 128, 127)
(655, 128, 127)
(730, 128, 127)
(664, 128, 127)
(650, 128, 127)
(641, 128, 127)
(643, 128, 127)
(637, 128, 127)
(637, 128, 127)
(727, 128, 127)
(659, 128, 127)
(757, 128, 127)
(657, 128, 127)
(636, 128, 127)
(645, 128, 127)
(630, 128, 127)
(835, 128, 127)
(636, 128, 127)
(675, 128, 127)
(650, 128, 127)
(628, 128, 127)
(661, 128, 127)
(642, 128, 127)
(648, 128, 127)
(807, 128, 127)
(636, 128, 127)
(644, 128, 127)
(631, 128, 127)
(666, 12