In [1]:
import os
import mne
import numpy as np
import pandas as pd
from scipy.signal import resample
import h5py
import mat73
import warnings
warnings.filterwarnings("ignore")

In [None]:
SAMPLE_RATE = 128  # fs
SAMPLE_LEN = 128   # T

In [2]:
# root dir
root = 'AD-Auditory/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-01,Male,65,Normal,28
1,sub-02,Female,70,Mild AD,23
2,sub-03,Male,75,Normal,27
3,sub-04,Male,82,Mild AD,21
4,sub-05,Male,75,Mild AD,21
5,sub-06,Female,69,-,-
6,sub-07,Male,89,Mild AD,19
7,sub-08,Male,70,Normal,24
8,sub-09,Female,57,Normal,26
9,sub-10,Male,81,Normal,28


## Labels

In [3]:
labels = np.empty(shape=(participants.shape[0],2), dtype='int32')
labels.shape

(35, 2)

In [4]:
subject_id = 1
for i, participant in enumerate(participants.values):
    # print(participant[3])
    if 'Normal' in participant[3]:
        labels[i,0] = 0
    elif 'MCI' in participant[3]:
        labels[i,0] = 1
    elif 'AD' in participant[3]:
        labels[i,0] = 2
    else:
        # sub-06 and sub-13 are not labeled
        labels[i,0] = -1
    labels[i,1] = subject_id
    subject_id += 1

In [5]:
label_path = 'Processed/AD-Auditory/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [6]:
np.load('Processed/AD-Auditory/Label/label.npy')

array([[ 0,  1],
       [ 2,  2],
       [ 0,  3],
       [ 2,  4],
       [ 2,  5],
       [-1,  6],
       [ 2,  7],
       [ 0,  8],
       [ 0,  9],
       [ 0, 10],
       [ 2, 11],
       [ 0, 12],
       [-1, 13],
       [ 2, 14],
       [ 2, 15],
       [ 2, 16],
       [ 1, 17],
       [ 1, 18],
       [ 1, 19],
       [ 2, 20],
       [ 2, 21],
       [ 0, 22],
       [ 2, 23],
       [ 2, 24],
       [ 0, 25],
       [ 0, 26],
       [ 1, 27],
       [ 1, 28],
       [ 2, 29],
       [ 2, 30],
       [ 0, 31],
       [ 1, 32],
       [ 2, 33],
       [ 2, 34],
       [ 2, 35]])

## Features

In [7]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

# split the EEG data into segments of length segment_length, dropping the last segment if it is shorter than segment_length
def split_eeg_segments(data, segment_length=128, half_overlap=False):
    T, C = data.shape  

    if half_overlap:
        step = segment_length // 2
    else:
        step = segment_length

    num_segments = (T - segment_length) // step + 1
    segments = np.zeros((num_segments, segment_length, C))

    for i in range(num_segments):
        start_idx = i * step
        end_idx = start_idx + segment_length
        segments[i] = data[start_idx:end_idx]

    return segments

In [8]:
feature_path = 'Processed/AD-Auditory/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

# Test 
# deal with the matlab 7.3 file format
sub_id = 1
for sub in os.listdir(root):
    if 'sub-' in sub:
        sub_path = os.path.join(root, sub, 'eeg/')
        print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                set_file_path = os.path.join(sub_path, file)
                print("Read .set file to see sub info", set_file_path)
                with h5py.File(set_file_path, 'r') as f:
                    n_channels = f['nbchan'][()]
                    n_points = f['pnts'][()]
                    n_trials = f['trials'][()]
                    srate = f['srate'][()]
                    chanlocs = f['chanlocs']['labels'][:]
                    print("Number of channels:", n_channels)
                    print("Number of points per trial:", n_points)
                    print("Number of trials:", n_trials)
                    print("Sampling rate:", srate)
                    # print("Channel labels:", chanlocs)
            if '.fdt' in file:
                print("Read .fdt file to load raw EEG data")
                fdt_file_path = os.path.join(sub_path, file)
                data = np.fromfile(fdt_file_path, dtype=np.float32).reshape(-1,19)
                # 250Hz, 19 monopolar channels, no 'T3' and 'T4', only 'T7' and 'T8', which are same as 'T3' and 'T4'
                print("Raw EEG data shape:", data.shape)   
                data = resample_time_series(data, 250, SAMPLE_RATE)
                feature_array = split_eeg_segments(data, SAMPLE_LEN, half_overlap=True)
                print("Downsampling and segmented data shape ", feature_array.shape)
                np.save(feature_path + '/feature_{:02d}.npy'.format(sub_id), feature_array)
                print("\n")           
        sub_id += 1
        print("--------------------------------")

AD-Auditory/sub-01\eeg/
Read .fdt file to load raw EEG data
Raw EEG data shape: (87500, 19)
Downsampling and segmented data shape  (699, 128, 19)


Read .set file to see sub info AD-Auditory/sub-01\eeg/sub-01_task-40HzAuditoryEntrainment_eeg.set
Number of channels: [[19.]]
Number of points per trial: [[87500.]]
Number of trials: [[1.]]
Sampling rate: [[250.]]
--------------------------------
AD-Auditory/sub-02\eeg/
Read .fdt file to load raw EEG data
Raw EEG data shape: (87500, 19)
Downsampling and segmented data shape  (699, 128, 19)


Read .set file to see sub info AD-Auditory/sub-02\eeg/sub-02_task-40HzAuditoryEntrainment_eeg.set
Number of channels: [[19.]]
Number of points per trial: [[87500.]]
Number of trials: [[1.]]
Sampling rate: [[250.]]
--------------------------------
AD-Auditory/sub-03\eeg/
Read .fdt file to load raw EEG data
Raw EEG data shape: (87500, 19)
Downsampling and segmented data shape  (699, 128, 19)


Read .set file to see sub info AD-Auditory/sub-03\eeg/sub-03_t

In [9]:
# Test the saved npy file
# example

path = 'Processed/AD-Auditory/Feature/'

for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)

(699, 128, 19)
(699, 128, 19)
(699, 128, 19)
(699, 128, 19)
(699, 128, 19)
(699, 128, 19)
(699, 128, 19)
(699, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
(1179, 128, 19)
