In [2]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.metrics import sensitivity_specificity_support
from sklearn.model_selection import train_test_split
from sklearn.utils.multiclass import unique_labels
from tqdm import tqdm_notebook as tqdm
from scipy.integrate import simps
import matplotlib.pyplot as plt
from scipy import signal
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
import glob
import mne
import os



warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [7]:
def removeMeanFromChannels(channel_data):
    channel_data = channel_data.transpose()
    scaled_data = StandardScaler(with_std = False).fit_transform(channel_data)
    return scaled_data.transpose()
    
def getMneRaw(data_to_process):
    sfreq = 128
    channel_names = ['F3', 'Fz', 'F4', 'C3', 'Cz', 'C4', 'P3',
                     'P4', 'FC5', 'FC1', 'FC2', 'FC4', 'CP5',
                     'CP1', 'CP2', 'CP4']
    channel_type = {k:'eeg' for k in channel_names}
    info = mne.create_info(channel_names, sfreq,verbose=False)
    raw = mne.io.RawArray(removeMeanFromChannels(data_to_process), info, verbose=False)
    raw.set_channel_types(channel_type)
    montage = mne.channels.read_montage('standard_1020')
    raw.set_montage(montage,verbose=False)
    raw.filter(0,50,fir_design='firwin',verbose=False)
    return raw

def getFileInfo(file_path):
    try:
        raw_data = pd.read_csv(file_path,header=None)
        data_to_process = raw_data[:-1]
        label = int( list(set(raw_data.iloc[len(raw_data) -1 ].values))[0] )
        raw = getMneRaw(data_to_process)
        return [ raw, label ]
    except:
        return [0, 0]
    
def getPSDFeatures(data):
    sf = 128
    window = 2*sf
    overlap = window//2
    freqs, psd = signal.welch(data, sf, nperseg=window, noverlap = overlap)
    alpha_indexes = np.where((freqs >= 8)&(freqs < 13))
    alpha_values = psd[alpha_indexes]
    alpha_values.sort()

    beta_indexes = np.where((freqs >= 13)&(freqs < 30))
    beta_values = psd[beta_indexes]
    beta_values.sort()
    features = [alpha_values[-1],beta_values[-1],beta_values[-2]]
    return features


def bandpower(eeg,sf=128,window_sec=None, relative=False):
    band_power = {}
    for channel_name,data in list(zip(eeg.info['ch_names'], eeg.get_data())):
        band_dic = {}
        for band_name in bands:
            band = np.asarray(bands[band_name])
            low, high = band
            if window_sec is not None:
                nperseg = window_sec * sf
            else:
                nperseg = (2 / low) * sf
            freqs, psd = signal.welch(data, sf, nperseg=nperseg)
            freq_res = freqs[1] - freqs[0]
            idx_band = np.logical_and(freqs >= low, freqs <= high)
            bp = simps(psd[idx_band], dx=freq_res)
            if relative:
                bp /= simps(psd, dx=freq_res)
            band_dic[band_name]= bp
        band_power[channel_name] = band_dic
    return band_power

In [4]:
files = glob.glob('../data/open_bci/2colourswifi_transition/*.csv')
bands = {
    'alpha':[8,13],'beta':[13,30],'gamma':[30,200],
'delta':[1,4],'theta':[4,8]
}

In [21]:
training_data_list = []
training_label_list = []

for i in tqdm(files):
    raw_data_label = getFileInfo(i)
    file_features = []
    if raw_data_label[0] != 0:
        band_feature = bandpower(raw_data_label[0])
        for channel in band_feature:
            file_features.append(list(band_feature[channel].values()))
        training_data_list.append(sum(file_features, []))
        training_label_list.append(raw_data_label[1])

HBox(children=(IntProgress(value=0, max=288), HTML(value='')))




In [27]:
X = np.array(training_data_list)
Y = training_label_list

In [28]:
X_scaled = StandardScaler().fit_transform(X)

In [30]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3)

In [31]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [38]:
classifier = KNeighborsClassifier()
classifier.fit(x_train,y_train)
y_pred = classifier.predict(x_test)
classifier.score(x_test,y_test)

0.5116279069767442

In [39]:
basic_metrics = pd.DataFrame(classification_report(y_test, y_pred, output_dict = True))
basic_metrics = basic_metrics[basic_metrics.columns[:2]]
basic_metrics.columns = [1,2]
other_metrics = pd.DataFrame(sensitivity_specificity_support(y_test, y_pred), columns=[1,2])
other_metrics.index = ['sensitivity','specificity','support']
metric = pd.concat([basic_metrics,other_metrics]).drop_duplicates()

In [40]:
metric

Unnamed: 0,1,2
precision,0.5,0.52
recall,0.428571,0.590909
f1-score,0.461538,0.553191
support,42.0,44.0
specificity,0.590909,0.428571
