In [None]:
from braininvaders2015a.dataset import BrainInvaders2015a
import mne
from mne import create_info
from mne.io import RawArray
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mne import Epochs, find_events
from IPython.display import clear_output
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

In [None]:
dataset = BrainInvaders2015a()
data_subject = dataset._get_single_subject_data(3)
data_ses1_run1 = data_subject['session_1']['run_1']
print(data_subject)

In [None]:
def load_data(i):
    data_subject = dataset._get_single_subject_data(i)
    data_ses1_run1 = data_subject['session_1']['run_1']
    data_ses1_run1_pd = data_ses1_run1.to_data_frame()
    data_ses1_run1_pd = data_ses1_run1_pd.drop(['time'],axis = 1)
    return data_ses1_run1_pd

def df_to_raw(df):
    sfreq = 512
    ch_names = list(df.columns)
    ch_types = ['eeg'] * (len(df.columns) - 1) + ['stim']
    ten_twenty_montage = mne.channels.make_standard_montage('standard_1020')

    df = df.T
    df[:-1] *= 1e-6

    info = create_info(ch_names=ch_names, ch_types=ch_types, sfreq=sfreq)

    raw = mne.io.RawArray(df, info)
    raw.set_montage(ten_twenty_montage)
    # raw.plot_psd()
    return raw

def filter(raw):
    raw.notch_filter(np.arange(50, 251, 50))
    raw.filter(1,24,method = 'iir')
    return raw

def do_pca(X):
    #scaling
    sc = StandardScaler()
    X = sc.fit_transform(X)
    # Perform PCA
    pca = PCA()
    # Determine transformed features
    X_pca = pca.fit_transform(X)
    return X_pca

In [None]:
from mne.preprocessing import Xdawn
from mne import (io, compute_raw_covariance, read_events, pick_types, Epochs)
def preprocess(i):
    clear_output(wait=True)
    data_subject1 = load_data(i)
    raw = df_to_raw(data_subject1)
    raw = filter(raw)
    events = find_events(raw)
    event_id = {'NonTarget': 1, 'Target': 2}
    eeg_channels = mne.pick_types(raw.info, eeg=True)
    epochs = Epochs(raw, events=events, event_id=event_id, 
                        tmin=0.0, tmax=0.7, baseline=None, preload=True,verbose=False, picks=eeg_channels)
    X = epochs.get_data()
    y = epochs.events[:, -1]
    # XDAWN
    signal_cov = compute_raw_covariance(raw, picks=eeg_channels)
    xd = Xdawn(n_components=2, signal_cov=signal_cov)
    epochs_dawn = xd.fit_transform(epochs)
    # this epochs has the same shape as X. i found out that using epochs from Xdawn has higher acc than X from epoch.get_data()
    epochs = epochs_dawn.reshape(epochs_dawn.shape[0],-1)
    return epochs,y

In [26]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
epochs,y = preprocess(41)
print(epochs.shape)
print(y.shape)
X_train_pca = do_pca(epochs)
    #split
X_train, X_test, y_train, y_test = train_test_split(X_train_pca,
                    y, test_size=0.3, random_state=1)

Creating RawArray with float64 data, n_channels=33, n_times=157376
    Range : 0 ... 157375 =      0.000 ...   307.373 secs
Ready.
Setting up band-stop filter

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower transition bandwidth: 0.50 Hz
- Upper transition bandwidth: 0.50 Hz
- Filter length: 3381 samples (6.604 sec)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 24 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 16 (effective, after forward-backward)
- Cutoffs at 1.00, 24.00 Hz: -6.02, -6.02 dB

504 events found
Event IDs: [1 2]
Using up to 1536 segments
Number of samples used : 156672
[done]
Computing rank from data with rank='full'
    EEG: rank 32 from info
Reducing dat

In [27]:
kernel = ['poly', 'rbf','linear']
C_range = [1, 2, 3,4]
param_grid = dict(C=C_range, kernel=kernel)
grid = GridSearchCV(SVC(), param_grid=param_grid)
grid.fit(X_train, y_train)
print("=========Best parameters=======")
print(f"The best parameters are {grid.best_params_} with" +
          f"a score of {grid.best_score_:.2f}")

The best parameters are {'C': 3, 'kernel': 'rbf'} witha score of 0.88


In [28]:
clf = SVC(kernel='rbf',C=3)
clf.fit(X_train,y_train)
yhat = clf.predict(X_test)
acc = np.sum(yhat == y_test)/len(y_test)
print(acc)

0.8486842105263158


Model(subject1-10)

In [29]:
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.svm import SVC
accuracy = []
for i in range(1,44):
    X,y = preprocess(i)
    X_train_pca = do_pca(X)
    #split
    X_train, X_test, y_train, y_test = train_test_split(X_train_pca,
                    y, test_size=0.3, random_state=1)
                    
    #model
    clf = SVC(kernel='rbf',C=3)
    clf.fit(X_train,y_train)
    yhat = clf.predict(X_test)
    acc = np.sum(yhat == y_test)/len(y_test)
    accuracy.append(acc)

Creating RawArray with float64 data, n_channels=33, n_times=129472
    Range : 0 ... 129471 =      0.000 ...   252.873 secs
Ready.
Setting up band-stop filter

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower transition bandwidth: 0.50 Hz
- Upper transition bandwidth: 0.50 Hz
- Filter length: 3381 samples (6.604 sec)

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 24 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 16 (effective, after forward-backward)
- Cutoffs at 1.00, 24.00 Hz: -6.02, -6.02 dB

360 events found
Event IDs: [1 2]
Using up to 1264 segments
Number of samples used : 128928
[done]
Computing rank from data with rank='full'
    EEG: rank 32 from info
Reducing dat

In [30]:
#rbf kernel, c=3 , xdawn
sum = 0
for i in range(len(accuracy)):
    print(f"subject{i+1}",accuracy[i])
    sum = sum + accuracy[i]
    avg = sum/len(accuracy)
print(avg)

subject1 0.8395860284605433
subject2 0.8794326241134752
subject3 0.9351851851851852
subject4 0.9769230769230769
subject5 0.9615384615384616
subject6 0.9320987654320988
subject7 0.9351851851851852
subject8 0.8692307692307693
subject9 0.8950617283950617
subject10 0.9387755102040817
subject11 0.9327731092436975
subject12 0.8362989323843416
subject13 0.9012345679012346
subject14 0.8901734104046243
subject15 0.8981481481481481
subject16 0.8907563025210085
subject17 0.9021739130434783
subject18 0.8769230769230769
subject19 0.8888888888888888
subject20 0.8846153846153846
subject21 0.9351851851851852
subject22 0.9210526315789473
subject23 0.9013157894736842
subject24 0.957983193277311
subject25 0.930635838150289
subject26 0.9074074074074074
subject27 0.8795180722891566
subject28 0.9174757281553398
subject29 0.930635838150289
subject30 0.9591836734693877
subject31 0.8666666666666667
subject32 0.9259259259259259
subject33 0.9074074074074074
subject34 0.9148936170212766
subject35 0.86643835616438

from IPython.display import clear_output
num_event = []
X_start,y_start = preprocess(1)
for i in range(2,11):
    X,y = preprocess(i)
    X_start = np.vstack((X_start,X))
    y_start = np.concatenate((y_start,y),axis = 0)

    #num_event.append(X_train.shape[0])

print(X_train.shape)
print(y_train.shape)
X = X_start
y = y_start
X_train_pca = do_pca(X)

X_train, X_test, y_train, y_test = train_test_split(X_train_pca,
                    y, test_size=0.3, random_state=1)
                    
#model
clf = SVC(kernel='linear',C=1)
clf.fit(X_train,y_train)
yhat = clf.predict(X_test)
acc = np.sum(yhat == y_test)/len(y_test)
print(acc)