The following processes are applied to both filtered and unfiltered data:
1. Epochs of 2 seconds, with an overlap of 0.75 are extracted from the data.
2. Epochs are standardized.
3. Epochs that are bad (e.g. due to eye blinks) are removed.

This data for both datasets is saved as "semiclean"

Further processing is done to make the data (potentially) more suitable for classification.
4. ICA is run on the data to remove artifacts.
5. the data is recomposed from resulting components.

Four sets of data are saved as fif files, and as pandas dataframes.


In [75]:
#import 
import mne
import os
import glob
import pandas as pd
from mne.utils import ProgressBar
from mne.preprocessing import ICA
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
matplotlib.use('Qt5Agg')
import pickle
from autoreject import get_rejection_threshold

In [6]:
files = glob.glob('dataset2/data/filtered-ref/*.fif')

In [113]:
files_unfiltered = glob.glob('dataset2/data/unfiltered-ref/*.fif')

In [112]:

def clean(files, filtertype):
    n_files = len(files)
    data_filtered = pd.DataFrame()
    data_semifiltered = pd.DataFrame()
    # Make filtered dataset
    print("Filtering {n} files".format(n=n_files))
    # Initialize a progress bar.
    progress = ProgressBar(n_files, mesg='Filtering')
    for file in files:
        raw = mne.io.read_raw_fif(file, preload=True, verbose='error')
        #create epochs
        events = mne.make_fixed_length_events(raw, id=1, duration=2, overlap=.75)
        raw.info['projs'] = []
        epochs = mne.Epochs(raw, events, tmin=0, tmax=2, baseline=(0, 0), detrend=1, reject_by_annotation=True, preload=True, verbose='error')
        reject = get_rejection_threshold(epochs, decim=1)
        reject['mag'] = 5e-12
        reject['grad'] = 4000e-13
        # standardize epochs
        X = epochs.get_data() # EEG signals (n_epochs, n_channels, n_times)
        y = epochs.events[:, 2] # class labels (n_epochs)
        epochs_std = mne.decoding.Scaler(epochs.info, scalings='mean').fit_transform(X, y)
        epochs_std = mne.EpochsArray(epochs_std, epochs.info, events=epochs.events)
        # reject bad epochs
        ar = AutoReject(verbose=True)
        epochs_clean = ar.fit_transform(epochs_std)
        
        # save epochs
        if not os.path.exists(f'dataset2/data/{filtertype}-semiclean'):
            os.makedirs(f'dataset2/data/{filtertype}-semiclean')
        epochs_clean.save(f'dataset2/data/{filtertype}-semiclean/{file.split("/")[-1]}')

        # save dataframe
        temp_df = epochs_clean.to_data_frame()
        temp_df['subject'] = f'{file.split("/")[-1][0:11]}'
        data_semifiltered = data_semifiltered.append(temp_df, ignore_index=True)

        # aapply ICA
        epochs_tmp = epochs_clean.copy()
        picks = mne.pick_types(epochs_tmp.info, eeg=True, eog=False, stim=False, exclude='bads')
        ica = mne.preprocessing.ICA(method="infomax", fit_params=dict(extended=True), random_state=1, n_components = 19)
        ica.fit(epochs_tmp, picks=picks, reject=reject)
        ica.apply(epochs_tmp)
        # save in folder

        if not os.path.exists(f'dataset2/data/{filtertype}-clean'):
            os.makedirs(f'dataset2/data/{filtertype}-clean')
        epochs_tmp.save(f'dataset2/data/{filtertype}-clean/{file.split("/")[-1]}')

        #save in dataframe
        temp_df = epochs_tmp.to_data_frame()
        temp_df['subject'] = f'{file.split("/")[-1][0:11]}'
        data_filtered = data_filtered.append(temp_df, ignore_index=True)
        progress.update_with_increment_value(1)
    
    #pickle data filtered
    with open(f'dataset2/data/{filtertype}_filtered_epoch.pkl', 'wb') as f:
        pickle.dump(data_filtered, f)
    #pickle data semifiltered
    with open(f'dataset2/data/{filtertype}_semifiltered_epoch.pkl', 'wb') as f:
        pickle.dump(data_filtered, f)

In [None]:
clean(files_unfiltered, 'unfiltered')

In [None]:
clean(files, 'filtered')