### Import

In [1]:
import mne
import os
import pandas as pd
from scipy.io import loadmat

In [3]:
text = pd.read_csv('readme.txt', sep = '\t')

Function to extract data from .mat files, resample data that was sample at 256 Hz at 128 Hz, and return in mne format.

In [4]:
def get_data2(filepath, n_channels=19, chan_names = ["Fp1", "Fp2", "F7", "Fz", "F3", "F4", "F8", "T3", "C3", "Cz", "C4", "T4", "T5", "P3", "Pz", "P4", "T6", "O1", "O2"]):
    """
    Input:
    filepath: path to the .mat file
    n_channels: number of channels in the data
    chan_names: names of the channels

    Output:
    info: mne.Info object
    raw: mne.io.RawArray object
    """
    dat = loadmat(filepath)
    sfreq = int(text[text['AD or MCI or CONTROL'] == os.path.basename(os.path.dirname(file))]['sampling rate']) # get sampling frequency from readme.txt

    # export data into mne format
    try:
        X = dat['export']
        info = mne.create_info(ch_names=chan_names[0:20], sfreq=sfreq, ch_types= ['eeg']*n_channels)
        info.set_montage('standard_1020')
        raw = mne.io.RawArray(X.T, info, first_samp=0, copy='auto', verbose=None)
        if info['sfreq'] == 256.0:
            raw.resample(128, npad='auto') # resample to 128Hz
    
    except:
        X = dat['segmenty']
        info = mne.create_info(ch_names=chan_names[0:20], sfreq=sfreq, ch_types= ['eeg']*n_channels)
        info.set_montage('standard_1020')
        raw = mne.io.RawArray(X[0:19], info, first_samp=0, copy='auto', verbose=None)

    return info, raw

Multiple recording sessions for each particpant are combined into one. The data for each participant is split into equalized sessions of 49 seconds each. THe resulting .fif files are saved in the folder 'mne'.

*Note*. AD and control subjects have multiple recording sessions, whereas control subjects with files ending with "fir.mat" have single recording sessions. Thus data extraction from the two differs. 


In [9]:
data = pd.DataFrame()

subfolders = [f.path for f in os.scandir('dataset2/data') if f.is_dir()]

for name in subfolders:
    children = [f.path for f in os.scandir(name) if f.is_file()]
    if os.path.basename(name) != 'fir':
        raw_dict={}
        raw_out_dict={}
        # combine different recording sessions for each subject
        for n, file in enumerate(children):
            raw_dict[f'info{n}'], raw_dict[f'raw{n}'] = get_data2(file)
        if len(children) > 1:
            raw = mne.concatenate_raws([raw_dict[f'raw{n}'].copy() for n in range(len(children))])
        else:
            raw = raw_dict['raw0'].copy()
        #create output directory
        if not os.path.exists(f'dataset2/data/mne'):
            os.makedirs(f'dataset2/data/mne')
        time_list = [i for i in range(1, int(len(raw)/128), 49)]
        for n, i in enumerate(time_list[:-1]):
            raw_out_dict[f'{n+1}'] = raw.copy().crop(i, time_list[n+1])
            raw_out_dict[f'{n+1}'].save(f"dataset2/data/mne/{os.path.basename(name)}_{n+1}_raw.fif")
            temp_df = pd.DataFrame(raw_out_dict[f'{n+1}']._data.T, columns=raw_out_dict[f'{n+1}'].ch_names)
            temp_df['subject'] = f'{os.path.basename(name)}_{n+1}'
            temp_df['time'] = raw_out_dict[f'{n+1}'].times
            data = data.append(temp_df, ignore_index=True)
    else:
        for n, file in enumerate(children):
            info, raw = get_data2(file)
            #create output directory
            if not os.path.exists(f'dataset2/data/mne'):
                os.makedirs(f'dataset2/data/mne')
            time_list = [i for i in range(1, int(len(raw)/128), 49)]
            for n, i in enumerate(time_list[:-1]):
                raw_out_dict[f'{n+1}'] = raw.copy().crop(i, time_list[n+1])
                raw_out_dict[f'{n+1}'].save(f"dataset2/data/mne/{os.path.basename(file[:-4])}_{n+1}_raw.fif", overwrite=True)
                temp_df = pd.DataFrame(raw_out_dict[f'{n+1}']._data.T, columns=raw_out_dict[f'{n+1}'].ch_names)
                temp_df['subject'] = os.path.basename(file[:-4])
                temp_df['time'] = raw_out_dict[f'{n+1}'].times
                data = data.append(temp_df, ignore_index=True)
    


Creating RawArray with float64 data, n_channels=19, n_times=10465
    Range : 0 ... 10464 =      0.000 ...    81.750 secs
Ready.
Creating RawArray with float64 data, n_channels=19, n_times=3576
    Range : 0 ... 3575 =      0.000 ...    27.930 secs
Ready.
Creating RawArray with float64 data, n_channels=19, n_times=10235
    Range : 0 ... 10234 =      0.000 ...    79.953 secs
Ready.
Creating RawArray with float64 data, n_channels=19, n_times=8534
    Range : 0 ... 8533 =      0.000 ...    66.664 secs
Ready.
Writing /home/zarishabbas/Documents/final/dataset2/data/mne/AD58_1_raw.fif
Closing /home/zarishabbas/Documents/final/dataset2/data/mne/AD58_1_raw.fif
[done]
Writing /home/zarishabbas/Documents/final/dataset2/data/mne/AD58_2_raw.fif
Closing /home/zarishabbas/Documents/final/dataset2/data/mne/AD58_2_raw.fif
[done]
Writing /home/zarishabbas/Documents/final/dataset2/data/mne/AD58_3_raw.fif
Closing /home/zarishabbas/Documents/final/dataset2/data/mne/AD58_3_raw.fif
[done]
Writing /home/zar

Save the data in csv format.

In [12]:
data.to_csv('dataset2/data/raw_mne_data.csv', index=False)