In [17]:
import os
import mne
import numpy as np
import pandas as pd
from scipy.signal import resample
import warnings
warnings.filterwarnings("ignore")

In [18]:
SAMPLE_RATE = 128  # fs
SAMPLE_LEN = 128   # T

In [19]:
# root dir
root = 'ADFTD/'
# participants file path
participants_path = os.path.join(root, 'participants.tsv')
participants = pd.read_csv(participants_path, sep='\t')
participants

Unnamed: 0,participant_id,Gender,Age,Group,MMSE
0,sub-001,F,57,A,16
1,sub-002,F,78,A,22
2,sub-003,M,70,A,14
3,sub-004,F,67,A,20
4,sub-005,M,70,A,22
...,...,...,...,...,...
83,sub-084,F,71,F,24
84,sub-085,M,64,F,26
85,sub-086,M,49,F,26
86,sub-087,M,73,F,24


## Labels

In [20]:
labels = np.empty(shape=(participants.shape[0],2), dtype='int32')
labels.shape

(88, 2)

In [21]:
label_map = {'A':2, 'F':1, 'C':0}
for i, participant in enumerate(participants.values):
    # print(participant)
    pid = int(participant[0][-3:])
    label = label_map[participant[3]]
    # print(pid)
    # print(label)
    labels[i,0] = label
    labels[i,1] = pid

In [22]:
label_path = 'Processed/ADFTD/Label'
if not os.path.exists(label_path):
    os.makedirs(label_path)
np.save(label_path + '/label.npy', labels)

In [23]:
np.load('Processed/ADFTD/Label/label.npy')

array([[ 2,  1],
       [ 2,  2],
       [ 2,  3],
       [ 2,  4],
       [ 2,  5],
       [ 2,  6],
       [ 2,  7],
       [ 2,  8],
       [ 2,  9],
       [ 2, 10],
       [ 2, 11],
       [ 2, 12],
       [ 2, 13],
       [ 2, 14],
       [ 2, 15],
       [ 2, 16],
       [ 2, 17],
       [ 2, 18],
       [ 2, 19],
       [ 2, 20],
       [ 2, 21],
       [ 2, 22],
       [ 2, 23],
       [ 2, 24],
       [ 2, 25],
       [ 2, 26],
       [ 2, 27],
       [ 2, 28],
       [ 2, 29],
       [ 2, 30],
       [ 2, 31],
       [ 2, 32],
       [ 2, 33],
       [ 2, 34],
       [ 2, 35],
       [ 2, 36],
       [ 0, 37],
       [ 0, 38],
       [ 0, 39],
       [ 0, 40],
       [ 0, 41],
       [ 0, 42],
       [ 0, 43],
       [ 0, 44],
       [ 0, 45],
       [ 0, 46],
       [ 0, 47],
       [ 0, 48],
       [ 0, 49],
       [ 0, 50],
       [ 0, 51],
       [ 0, 52],
       [ 0, 53],
       [ 0, 54],
       [ 0, 55],
       [ 0, 56],
       [ 0, 57],
       [ 0, 58],
       [ 0, 59

## Features

In [24]:
derivatives_root = os.path.join(root, 'derivatives/')
derivatives_root

'ADFTD/derivatives/'

In [25]:
# Test for bad channels, sampling freq and shape
bad_channel_list, sampling_freq_list, data_shape_list = [], [], []
for sub in os.listdir(derivatives_root):
    if 'sub-' in sub:
        sub_path = os.path.join(derivatives_root, sub, 'eeg/')
        # print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                # get bad channels
                bad_channel = raw.info['bads']
                bad_channel_list.append(bad_channel)
                # get sampling frequency
                sampling_freq = raw.info['sfreq']
                sampling_freq_list.append(sampling_freq)
                # get eeg data
                data = raw.get_data()
                data_shape = data.shape
                data_shape_list.append(data_shape)

In [26]:
# 0 bad channels
print(bad_channel_list)
# 500 Hz for all runs
print(sampling_freq_list)
# same number of channels & different timestamps
print(data_shape_list)

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
[500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0]
[(19, 299900), (19, 396550), (

In [27]:
# resample the time series data from original_fs to target_fs
def resample_time_series(data, original_fs, target_fs):
    T, C = data.shape
    new_length = int(T * target_fs / original_fs)

    resampled_data = np.zeros((new_length, C))
    for i in range(C):
        resampled_data[:, i] = resample(data[:, i], new_length)

    return resampled_data

# split the EEG data into segments of length segment_length, dropping the last segment if it is shorter than segment_length
def split_eeg_segments(data, segment_length=128):
    T, C = data.shape
    num_segments = T // segment_length
    reshaped_data = data[:num_segments * segment_length].reshape(num_segments, segment_length, C)

    return reshaped_data

In [28]:
feature_path = 'Processed/ADFTD/Feature'
if not os.path.exists(feature_path):
    os.makedirs(feature_path)

sub_id = 1
for sub in os.listdir(derivatives_root):
    if 'sub-' in sub:
        li_sub = []
        sub_path = os.path.join(derivatives_root, sub, 'eeg/')
        print(sub_path)
        for file in os.listdir(sub_path):
            if '.set' in file:
                file_path = os.path.join(sub_path, file)
                raw = mne.io.read_raw_eeglab(file_path, preload=True)
                data = raw.get_data().T
                print("Raw data shape ", data.shape)
                data = resample_time_series(data, 500, SAMPLE_RATE)
                feature_array = split_eeg_segments(data, SAMPLE_LEN)
                print("Downsampling and segmented data shape ", feature_array.shape)
                np.save(feature_path + '/feature_{:02d}.npy'.format(sub_id), feature_array)
        sub_id += 1
    print("-------------------------------------\n")

ADFTD/derivatives/sub-001\eeg/
Raw data shape  (299900, 19)
Downsampling and segmented data shape  (149, 1024, 19)
-------------------------------------

ADFTD/derivatives/sub-002\eeg/
Raw data shape  (396550, 19)
Downsampling and segmented data shape  (198, 1024, 19)
-------------------------------------

ADFTD/derivatives/sub-003\eeg/
Raw data shape  (153050, 19)
Downsampling and segmented data shape  (76, 1024, 19)
-------------------------------------

ADFTD/derivatives/sub-004\eeg/
Raw data shape  (353050, 19)
Downsampling and segmented data shape  (176, 1024, 19)
-------------------------------------

ADFTD/derivatives/sub-005\eeg/
Raw data shape  (402050, 19)
Downsampling and segmented data shape  (201, 1024, 19)
-------------------------------------

ADFTD/derivatives/sub-006\eeg/
Raw data shape  (316200, 19)
Downsampling and segmented data shape  (158, 1024, 19)
-------------------------------------

ADFTD/derivatives/sub-007\eeg/
Raw data shape  (383210, 19)
Downsampling and 

In [29]:
# Test the saved npy file
# example

path = 'Processed/ADFTD/Feature/'

for file in os.listdir(path):
    sub_path = os.path.join(path, file)
    print(np.load(sub_path).shape)

(149, 1024, 19)
(198, 1024, 19)
(76, 1024, 19)
(176, 1024, 19)
(201, 1024, 19)
(158, 1024, 19)
(191, 1024, 19)
(198, 1024, 19)
(153, 1024, 19)
(320, 1024, 19)
(192, 1024, 19)
(220, 1024, 19)
(209, 1024, 19)
(233, 1024, 19)
(225, 1024, 19)
(243, 1024, 19)
(210, 1024, 19)
(211, 1024, 19)
(229, 1024, 19)
(217, 1024, 19)
(230, 1024, 19)
(205, 1024, 19)
(208, 1024, 19)
(189, 1024, 19)
(171, 1024, 19)
(224, 1024, 19)
(206, 1024, 19)
(204, 1024, 19)
(184, 1024, 19)
(138, 1024, 19)
(287, 1024, 19)
(199, 1024, 19)
(176, 1024, 19)
(242, 1024, 19)
(185, 1024, 19)
(210, 1024, 19)
(194, 1024, 19)
(222, 1024, 19)
(212, 1024, 19)
(241, 1024, 19)
(221, 1024, 19)
(240, 1024, 19)
(207, 1024, 19)
(220, 1024, 19)
(212, 1024, 19)
(188, 1024, 19)
(201, 1024, 19)
(247, 1024, 19)
(195, 1024, 19)
(204, 1024, 19)
(188, 1024, 19)
(190, 1024, 19)
(195, 1024, 19)
(209, 1024, 19)
(203, 1024, 19)
(197, 1024, 19)
(199, 1024, 19)
(189, 1024, 19)
(196, 1024, 19)
(187, 1024, 19)
(200, 1024, 19)
(224, 1024, 19)
(201, 102