In [1]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.metrics import sensitivity_specificity_support
from sklearn.model_selection import train_test_split
from sklearn.utils.multiclass import unique_labels
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
from scipy import signal
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
import glob
import mne
import os



warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [2]:
def removeMeanFromChannels(channel_data):
    channel_data = channel_data.transpose()
    scaled_data = StandardScaler(with_std = False).fit_transform(channel_data)
    return scaled_data.transpose()
    
def getMneRaw(data_to_process):
    sfreq = 128
    channel_names = ['F3', 'Fz', 'F4', 'C3', 'Cz', 'C4', 'P3',
                     'P4', 'FC5', 'FC1', 'FC2', 'FC4', 'CP5',
                     'CP1', 'CP2', 'CP4']
    channel_type = {k:'eeg' for k in channel_names}
    info = mne.create_info(channel_names, sfreq,verbose=False)
    raw = mne.io.RawArray(removeMeanFromChannels(data_to_process), info, verbose=False)
    raw.set_channel_types(channel_type)
    montage = mne.channels.read_montage('standard_1020')
    raw.set_montage(montage,verbose=False)
    raw.filter(0,50,fir_design='firwin',verbose=False)
    return raw

def getFileInfo(file_path):
    try:
        raw_data = pd.read_csv(file_path,header=None)
        data_to_process = raw_data[:-1]
        label = int( list(set(raw_data.iloc[len(raw_data) -1 ].values))[0] )
        raw = getMneRaw(data_to_process)
        return [ raw, label ]
    except:
        return [0, 0]
    
def getPSDFeatures(data):
    sf = 128
    window = 2*sf
    overlap = window//2
    freqs, psd = signal.welch(data, sf, nperseg=window, noverlap = overlap)
    alpha_indexes = np.where((freqs >= 8)&(freqs < 13))
    alpha_values = psd[alpha_indexes]
    alpha_values.sort()

    beta_indexes = np.where((freqs >= 13)&(freqs < 30))
    beta_values = psd[beta_indexes]
    beta_values.sort()
    features = [alpha_values[-1],beta_values[-1],beta_values[-2]]
    return features

In [3]:
files = glob.glob('../data/open_bci/2colourswifi_transition/*.csv')

In [14]:
training_data_list = []
training_label_list = []

for i in tqdm(files):
    raw_data_label = getFileInfo(i)
    if raw_data_label[0] != 0:
        raw_data = raw_data_label[0].get_data()
        label = raw_data_label[1]
        for channel in raw_data:
            training_data_list.append(getPSDFeatures(channel))
        training_label_list.append(list(map(int,('{},'.format(label)*raw_data.shape[0]).split(',')[:-1])))

HBox(children=(IntProgress(value=0, max=288), HTML(value='')))




In [45]:
X = np.array(training_data_list)
Y = sum(training_label_list,[])

In [46]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3)

In [82]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [90]:
classifier = LogisticRegression()
classifier.fit(x_train,y_train)
y_pred = classifier.predict(x_test)
classifier.score(x_test,y_test)

0.5241935483870968

In [93]:
basic_metrics = pd.DataFrame(classification_report(y_test, y_pred, output_dict = True))
basic_metrics = basic_metrics[basic_metrics.columns[:2]]
basic_metrics.columns = [1,2]
other_metrics = pd.DataFrame(sensitivity_specificity_support(y_test, y_pred), columns=[1,2])
other_metrics.index = ['sensitivity','specificity','support']
metric = pd.concat([basic_metrics,other_metrics]).drop_duplicates()

In [95]:
metric

Unnamed: 0,1,2
precision,0.0,0.524194
recall,0.0,1.0
f1-score,0.0,0.687831
support,649.0,715.0
specificity,1.0,0.0
