In [None]:
## Testing multiple neural net architectures on time series data 

In [1]:
#Import necessary libraries

import pandas as pd
import numpy as np
from scipy import signal
import os
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import pickle as pkl
import itertools 
import glob
from sklearn import svm 
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, plot_confusion_matrix, f1_score
from sklearn.decomposition import PCA
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

%matplotlib inline 
# %matplotlib qt


In [4]:
#Importing raw data files 

#.csv path
csvpath = "C:/Users/Wu Di/Documents/EEG-analysis/200108-Readings-csv/reading_switching.csv"

#Read .csv files
cols_to_use = list(range(4, 36))

#Raw dataframes - each channel is a column
raw_df = pd.read_csv(csvpath, header=None, usecols=cols_to_use)

#Split into thinking and counting data frames
think_index_list = []
read_index_list = []

for i in range(6):
    if i%2==0:
        think_index_list+=(list(range(1280*i,1280*(i+1))))
    else:
        read_index_list+=(list(range(1280*i,1280*(i+1))))


# for i in range(len(all_bands_list)):
#     df = all_bands_list[i].iloc[0:1280*6]
df = raw_df.iloc[0:1280*6]
df_list = np.vsplit(df,6)
think_df = pd.DataFrame(np.vstack((df_list[0],df_list[2],df_list[4])))
read_df = pd.DataFrame(np.vstack((df_list[1],df_list[3],df_list[5])))

print(think_df.shape)
print(read_df.shape)

think_df.to_pickle("F:\EEG-data\\think-read\\raw/think.pkl")
read_df.to_pickle("F:\EEG-data\\think-read\\raw/read.pkl")

(3840, 32)
(3840, 32)


In [5]:
# Import raw datasets, 
think_df = pd.read_pickle("F:\EEG-data\\think-read\\raw/think.pkl")
read_df = pd.read_pickle("F:\EEG-data\\think-read\\raw/read.pkl")

print(think_df.shape)
print(read_df.shape)
print(think_df.iloc[0,0]) # Each element in dataframe is a single timestep
print(read_df.iloc[0,0])

(3840, 32)
(3840, 32)
-829.138184
-206.888657


In [6]:
#Splits a single dataframe into list of equally sized arrays
#Each element in list is nx32 array, where n= sample length 
def split_df(df,fs,sample_t,check=False):
    rows,_ = df.shape #get no. of rows
    sample_len = int(sample_t*fs) #find no. of recorded samples required for each sample time length
    Ns = int(rows/sample_len) #find total no. of samples
    df_cut = df.iloc[:Ns*sample_len] #truncate dataframe to exact multiple of sample length
    # print(Ns*sample_len)
    # print(Ns)
    df_split_list = np.vsplit(df_cut,Ns) #split dataframe row-wise, returns a list
    
    if check:
        print("Total no. of recorded samples: "+str(rows))
        print("Sample length: "+str(sample_len))
        print("Total no. of samples: "+str(Ns))
        print("Length of df_split_list: "+str(len(df_split_list)))
        
        if all(isinstance(x.shape,tuple) for x in df_split_list):
            print("Shape of each element in df_split_list: "+str(df_split_list[0].shape))
        else:
            print("Shapes are wrong.")
            for x in df_split_list:
                print(x.shape)
    return df_split_list,Ns

#Apply split_df() function to list of dataframes, reshape dataframe such that each element is an array 
#for the appropriate sample time length 
def split_bands_list(bands_list,fs,sample_t,check=False,checkSD=False):
    df_list_rFE = [0]*len(bands_list) #dataframes list ready for feature extraction 
    for df_no in range(len(bands_list)):
        df_split_list,Ns = split_df(bands_list[df_no],fs,sample_t,check=checkSD)
        list_of_series = [0]*Ns
        for i in range(len(df_split_list)):
            #New dataframe will have shape Nsx32, each element is a 1xsample_len array 
            new_row = [0]*32 
            #Each df_split_list[i] is a dataframe
            for j in range(len(df_split_list[i].columns)):
                new_row[j] = df_split_list[i].iloc[:,j].values 
            list_of_series[i] = new_row
        df_list_rFE[df_no] = pd.DataFrame(list_of_series)
    if check:
        print("Length of bands_list: "+str(len(bands_list)))
        print("Length of df_list_rFE: "+str(len(df_list_rFE)))
        if (all(isinstance(x.shape,tuple) for x in df_list_rFE)) and (Ns==len(df_list_rFE[0].index)):
            print("Shape of each dataframe in df_list_rFE: "+str(df_list_rFE[0].shape))
    return df_list_rFE
            

In [7]:
#0.05s 
split005_t_df=split_bands_list([think_df],128,0.05)[0]
split005_r_df=split_bands_list([read_df],128,0.05)[0]

#0.1s 
split01_t_df=split_bands_list([think_df],128,0.1)[0]
split01_r_df=split_bands_list([read_df],128,0.1)[0]


In [8]:
print(split005_t_df.shape)
print(split005_r_df.shape)

print(split01_t_df.shape)
print(split01_r_df.shape)


(640, 32)
(640, 32)
(320, 32)
(320, 32)


In [9]:
#Save datasets
split005_t_df.to_pickle("F:\EEG-data\\think-read\\raw\\0.05s/raw_think.pkl")
split005_r_df.to_pickle("F:\EEG-data\\think-read\\raw\\0.05s/raw_read.pkl")

split01_t_df.to_pickle("F:\EEG-data\\think-read\\raw\\0.1s/raw_think.pkl")
split01_r_df.to_pickle("F:\EEG-data\\think-read\\raw\\0.1s/raw_read.pkl")



In [2]:
# Import datasets
#0.05s 
t_df005 = pd.read_pickle("F:\EEG-data\\think-read\\raw\\0.05s/raw_think.pkl")
r_df005 = pd.read_pickle("F:\EEG-data\\think-read\\raw\\0.05s/raw_read.pkl")

#0.1s
t_df01 = pd.read_pickle("F:\EEG-data\\think-read\\raw\\0.1s/raw_think.pkl")
r_df01 = pd.read_pickle("F:\EEG-data\\think-read\\raw\\0.1s/raw_read.pkl")


In [3]:
print(t_df005.shape)
print(r_df005.shape)

print(t_df01.shape)
print(r_df01.shape)


(640, 32)
(640, 32)
(320, 32)
(320, 32)


In [8]:
### FILTERED 
# Import filtered datasets
#0.05s 
ft_df005 = pd.read_pickle("F:\EEG-data\\think-read\\filtered\\0.05s/lp40.5_think.pkl")
fr_df005 = pd.read_pickle("F:\EEG-data\\think-read\\filtered\\0.05s/lp40.5_read.pkl")

#0.1s
ft_df01 = pd.read_pickle("F:\EEG-data\\think-read\\filtered\\0.1s/lp40.5_think.pkl")
fr_df01 = pd.read_pickle("F:\EEG-data\\think-read\\filtered\\0.1s/lp40.5_read.pkl")


In [10]:
print(ft_df005.shape)
print(fr_df005.shape)

print(ft_df01.shape)
print(fr_df01.shape)


(640, 32)
(640, 32)
(320, 32)
(320, 32)


In [4]:
# Shape each dataframe into 320,12,32
def reshape_df(df):
    sampLen = len(df.iloc[0,0])
    N = df.shape[0]
    new_df = np.zeros((N,sampLen,32))
    for i in range(32):
        channel = df.iloc[:,i].values 
        channel_df = np.zeros((N,sampLen))
        for j in range(len(channel)):
            channel_df[j,:] = channel[j]
        new_df[:,:,i] = keras.utils.normalize(channel_df)
    return new_df


In [5]:
#0.05s
t_df005r = reshape_df(t_df005)
r_df005r = reshape_df(r_df005)

#0.1s
t_df01r = reshape_df(t_df01)
r_df01r = reshape_df(r_df01)


In [6]:
#0.05s
print(t_df005r.shape)
print(r_df005r.shape)

#0.1s
print(t_df01r.shape)
print(r_df01r.shape)

(640, 6, 32)
(640, 6, 32)
(320, 12, 32)
(320, 12, 32)


In [11]:
# FILTERED
#0.05s
ft_df005r = reshape_df(ft_df005)
fr_df005r = reshape_df(fr_df005)

#0.1s
ft_df01r = reshape_df(ft_df01)
fr_df01r = reshape_df(fr_df01)


In [12]:
## FILTERED
#0.05s
print(ft_df005r.shape)
print(fr_df005r.shape)

#0.1s
print(ft_df01r.shape)
print(fr_df01r.shape)


(640, 6, 32)
(640, 6, 32)
(320, 12, 32)
(320, 12, 32)


In [7]:
#0.05
X005 = np.vstack((t_df005r,r_df005r))
y005 = np.hstack((np.zeros(t_df005r.shape[0]),np.ones(r_df005r.shape[0])))
print(X005.shape)
print(y005.shape)
#0.1
X01 = np.vstack((t_df01r,r_df01r))
y01 = np.hstack((np.zeros(t_df01r.shape[0]),np.ones(r_df01r.shape[0])))
print(X01.shape)
print(y01.shape)

(1280, 6, 32)
(1280,)
(640, 12, 32)
(640,)


In [14]:
#FILTERED 
#0.05
fX005 = np.vstack((ft_df005r,fr_df005r))
fy005 = np.hstack((np.zeros(ft_df005r.shape[0]),np.ones(fr_df005r.shape[0])))
print(fX005.shape)
print(fy005.shape)
#0.1
fX01 = np.vstack((ft_df01r,fr_df01r))
fy01 = np.hstack((np.zeros(ft_df01r.shape[0]),np.ones(fr_df01r.shape[0])))
print(fX01.shape)
print(fy01.shape)

(1280, 6, 32)
(1280,)
(640, 12, 32)
(640,)


In [15]:
#Split into train and test sets
def split_train_test(X,y):
    sss = StratifiedShuffleSplit(n_splits=5,test_size=0.2,random_state=0)
    for train_index, test_index in sss.split(X,y):
                x_train, x_test = X[train_index],X[test_index]
                y_train, y_test = y[train_index],y[test_index]
    return x_train,x_test,y_train,y_test

In [16]:
#DNN model
def DNN_model(sampLen):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(sampLen*32, input_shape = (sampLen,32), activation="relu"),
        tf.keras.layers.Dense(10, activation="relu"), 
        tf.keras.layers.Dense(1,activation="sigmoid")
    ])
    model.compile(loss="binary_crossentropy",
                optimizer='adam',
                metrics=["accuracy"])
    # model.summary()
    return model

#Conv model
def CONV_model(sampLen):
    model2 = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(filters=128, kernel_size=5,strides=1, padding="causal",
                                activation="relu",input_shape=(sampLen,32)), #input shape = (n_timesteps,n_features)
        tf.keras.layers.Conv1D(filters=64, kernel_size=5,strides=1, padding="causal",activation="relu"),
        tf.keras.layers.Conv1D(filters=32, kernel_size=5,strides=1, padding="causal",activation="relu"),
        # tf.keras.layers.Dropout(0.5),
        tf.keras.layers.MaxPool1D(pool_size=2),
        tf.keras.layers.Conv1D(filters=16, kernel_size=5,strides=1, padding="causal",activation="relu"),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(10, activation="relu"), 
        # tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(1,activation="sigmoid")
    ])
    # optimizer = tf.keras.optimizers.Adam(lr=9e-4)
    model2.compile(loss="binary_crossentropy",
                optimizer="adam",
                metrics=["accuracy"])
    return model2

#Conv + LSTM
def CONV_LSTM_model(sampLen):
    model3 = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(filters=128, kernel_size=5,strides=1, padding="causal",
                              activation="relu",input_shape=(sampLen,32)), #input shape = (n_timesteps,n_features)
    tf.keras.layers.Conv1D(filters=64, kernel_size=5,strides=1, padding="causal",activation="relu"),
    tf.keras.layers.Conv1D(filters=32, kernel_size=5,strides=1, padding="causal",activation="relu"),
    # tf.keras.layers.Dropout(0.5),
    tf.keras.layers.MaxPool1D(pool_size=2),
    tf.keras.layers.Conv1D(filters=16, kernel_size=5,strides=1, padding="causal",activation="relu"),
    # tf.keras.layers.Flatten(),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
    tf.keras.layers.Flatten(),
    # tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
    tf.keras.layers.Dense(10, activation="relu"), 
    # tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(1,activation="sigmoid")
])
    model3.compile(loss="binary_crossentropy",
              optimizer='adam',
              metrics=["accuracy"])
    return model3

#Simple RNN 
def RNN_model(sampLen):
    model4 = tf.keras.models.Sequential([
    tf.keras.layers.SimpleRNN(40, input_shape = (sampLen,32),return_sequences=True),
    tf.keras.layers.SimpleRNN(40),
    # tf.keras.layers.Dense(10,activation="relu",input_shape=(12,32)),
    tf.keras.layers.Dense(1,activation="sigmoid")
    ])
    model4.compile(loss="binary_crossentropy",
                optimizer='adam',
                metrics=["accuracy"])
    return model4

#LSTM only

def LSTM_model(sampLen):
    model5 = tf.keras.models.Sequential([
    # tf.keras.layers.Flatten(input_shape=(sampLen,32)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True,input_shape=(sampLen,32))),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10,activation="relu"),
    tf.keras.layers.Dense(1,activation="sigmoid")
    ])
    model5.compile(loss="binary_crossentropy",
                optimizer='adam',
                metrics=["accuracy"])
    return model5

In [17]:
def test(model,dataset,devset,batchsize,epochs):
    tf.random.set_seed(10)
    datasetb = dataset.batch(batchsize)
    devsetb = devset.batch(batchsize)
    history = model.fit(datasetb,epochs=epochs,verbose=0,validation_data=devsetb)
    return history 

In [11]:
#0.05s
x_train005,x_test005,y_train005,y_test005 = split_train_test(X005,y005)
#0.1s
x_train01,x_test01,y_train01,y_test01 = split_train_test(X01,y01)

In [12]:
# 0.05s 
dataset005 = tf.data.Dataset.from_tensor_slices((x_train005,y_train005))
devset005  = tf.data.Dataset.from_tensor_slices((x_test005,y_test005))
dataset005b = dataset005.batch(40)
devset005b = devset005.batch(40)

# 0.1s 
dataset01 = tf.data.Dataset.from_tensor_slices((x_train01,y_train01))
devset01  = tf.data.Dataset.from_tensor_slices((x_test01,y_test01))
dataset01b = dataset01.batch(40)
devset01b = devset01.batch(40)

In [13]:
# 0.05s 
# Testing different models, epochs=10, batchsize = 1 
sampLen = 6 
DNN_model005 = DNN_model(sampLen)
CONV_model005 = CONV_model(sampLen)
CONV_LSTM_model005 = CONV_LSTM_model(sampLen)
LSTM_model005 = LSTM_model(sampLen)
RNN_model005 = RNN_model(sampLen)
#Histories 
DNN_hist005 = test(DNN_model005,dataset005,devset005,1,10)
CONV_hist005 = test(CONV_model005,dataset005,devset005,1,10)
CONV_LSTM_hist005 = test(CONV_LSTM_model005,dataset005,devset005,1,10)
LSTM_hist005 = test(LSTM_model005,dataset005,devset005,1,10)
RNN_hist005 = test(RNN_model005,dataset005,devset005,1,10)
#Max validation accuracy 
valacc_DNN005 = max(DNN_hist005.history['val_accuracy'])
valacc_CONV005 = max(CONV_hist005.history['val_accuracy'])
valacc_CONVLSTM005 = max(CONV_LSTM_hist005.history['val_accuracy'])
valacc_LSTM005 = max(LSTM_hist005.history['val_accuracy'])
valacc_RNN005 = max(RNN_hist005.history['val_accuracy'])

print(valacc_DNN005)
print(valacc_CONV005)
print(valacc_CONVLSTM005)
print(valacc_LSTM005)
print(valacc_RNN005)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

0.9140625
0.90234375
0.90234375
0.89453125
0.83984375


In [25]:
# 0.1s 
# Testing different models, epochs=10, batchsize = 1 
sampLen = 12
DNN_model01 = DNN_model(sampLen)
CONV_model01 = CONV_model(sampLen)
CONV_LSTM_model01 = CONV_LSTM_model(sampLen)
LSTM_model01 = LSTM_model(sampLen)
RNN_model01 = RNN_model(sampLen)
#Histories 
DNN_hist01 = test(DNN_model01,dataset01,devset01,1,10)
CONV_hist01 = test(CONV_model01,dataset01,devset01,1,10)
CONV_LSTM_hist01 = test(CONV_LSTM_model01,dataset01,devset01,1,10)
LSTM_hist01 = test(LSTM_model01,dataset01,devset01,1,10)
RNN_hist01 = test(RNN_model01,dataset01,devset01,1,10)
#Max validation accuracy 
valacc_DNN01 = max(DNN_hist01.history['val_accuracy'])
valacc_CONV01 = max(CONV_hist01.history['val_accuracy'])
valacc_CONVLSTM01 = max(CONV_LSTM_hist01.history['val_accuracy'])
valacc_LSTM01 = max(LSTM_hist01.history['val_accuracy'])
valacc_RNN01 = max(RNN_hist01.history['val_accuracy'])

print(valacc_DNN01)
print(valacc_CONV01)
print(valacc_CONVLSTM01)
print(valacc_LSTM01)
print(valacc_RNN01)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

0.8606771230697632
0.90625
0.5
0.9375
0.8515625


In [18]:
# FILTERED
#0.05s
fx_train005,fx_test005,fy_train005,fy_test005 = split_train_test(fX005,fy005)
#0.1s
fx_train01,fx_test01,fy_train01,fy_test01 = split_train_test(fX01,fy01)

In [19]:
#FILTERED
# 0.05s 
fdataset005 = tf.data.Dataset.from_tensor_slices((fx_train005,fy_train005))
fdevset005  = tf.data.Dataset.from_tensor_slices((fx_test005,fy_test005))
fdataset005b = fdataset005.batch(40)
fdevset005b = fdevset005.batch(40)

# 0.1s 
fdataset01 = tf.data.Dataset.from_tensor_slices((fx_train01,fy_train01))
fdevset01  = tf.data.Dataset.from_tensor_slices((fx_test01,fy_test01))
fdataset01b = fdataset01.batch(40)
fdevset01b = fdevset01.batch(40)

In [20]:
#### FILTERED ####
# 0.05s 
# Testing different models, epochs=10, batchsize = 1 
sampLen = 6 
DNN_model005 = DNN_model(sampLen)
CONV_model005 = CONV_model(sampLen)
CONV_LSTM_model005 = CONV_LSTM_model(sampLen)
LSTM_model005 = LSTM_model(sampLen)
RNN_model005 = RNN_model(sampLen)
#Histories 
DNN_hist005f = test(DNN_model005,fdataset005,fdevset005,1,10)
CONV_hist005f = test(CONV_model005,fdataset005,fdevset005,1,10)
CONV_LSTM_hist005f = test(CONV_LSTM_model005,fdataset005,fdevset005,1,10)
LSTM_hist005f = test(LSTM_model005,fdataset005,fdevset005,1,10)
RNN_hist005f = test(RNN_model005,fdataset005,fdevset005,1,10)
#Max validation accuracy 
valacc_DNN005f = max(DNN_hist005f.history['val_accuracy'])
valacc_CONV005f = max(CONV_hist005f.history['val_accuracy'])
valacc_CONVLSTM005f = max(CONV_LSTM_hist005f.history['val_accuracy'])
valacc_LSTM005f = max(LSTM_hist005f.history['val_accuracy'])
valacc_RNN005f = max(RNN_hist005f.history['val_accuracy'])

print("Filtered validation accuracies")
print(valacc_DNN005f)
print(valacc_CONV005f)
print(valacc_CONVLSTM005f)
print(valacc_LSTM005f)
print(valacc_RNN005f)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Filtered validation accuracies
0.8522133827209473
0.90234375
0.90625
0.89453125
0.828125


In [21]:
##### FILTERED ########
#0.1s
# Testing different models, epochs=10, batchsize = 1 
sampLen = 12
DNN_model01 = DNN_model(sampLen)
CONV_model01 = CONV_model(sampLen)
CONV_LSTM_model01 = CONV_LSTM_model(sampLen)
LSTM_model01 = LSTM_model(sampLen)
RNN_model01 = RNN_model(sampLen)
#Histories 
DNN_hist01f = test(DNN_model01,fdataset01,fdevset01,1,10)
CONV_hist01f = test(CONV_model01,fdataset01,fdevset01,1,10)
CONV_LSTM_hist01f = test(CONV_LSTM_model01,fdataset01,fdevset01,1,10)
LSTM_hist01f = test(LSTM_model01,fdataset01,fdevset01,1,10)
RNN_hist01f = test(RNN_model01,fdataset01,fdevset01,1,10)
#Max validation accuracy 
valacc_DNN01f = max(DNN_hist01f.history['val_accuracy'])
valacc_CONV01f = max(CONV_hist01f.history['val_accuracy'])
valacc_CONVLSTM01f = max(CONV_LSTM_hist01f.history['val_accuracy'])
valacc_LSTM01f = max(LSTM_hist01f.history['val_accuracy'])
valacc_RNN01f = max(RNN_hist01f.history['val_accuracy'])

print("Filtered validation accuracies")
print(valacc_DNN01f)
print(valacc_CONV01f)
print(valacc_CONVLSTM01f)
print(valacc_LSTM01f)
print(valacc_RNN01f)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Filtered validation accuracies
0.8736978769302368
0.8984375
0.5
0.9296875
0.796875


In [30]:
# Testing different batch sizes 
def test_batchsizes(model,dataset,devset,batchsizes,epochs):
    histories = []
    for batchsize in batchsizes:
        histories.append(test(model,dataset,devset,batchsize,epochs))
    return histories 

In [33]:
# 0.1s 
batchsizes = list(range(1,40,10))

DNN01_histories = test_batchsizes(DNN_model01,dataset01,devset01,batchsizes,10)
CONV01_histories = test_batchsizes(CONV_model01,dataset01,devset01,batchsizes,10)
CONV_LSTM_01_histories = test_batchsizes(CONV_LSTM_model01,dataset01,devset01,batchsizes,10)
LSTM_01_histories = test_batchsizes(LSTM_model01,dataset01,devset01,batchsizes,10)
RNN01_histories = test_batchsizes(RNN_model01,dataset01,devset01,batchsizes,10)

KeyboardInterrupt: 

In [32]:
hists_maxvalacc = lambda histories: max([history.history['val_accuracy'][-1] for history in histories])
hists_maxvalacc_batch = lambda histories,batchsizes: batchsizes[np.argmax(np.array([history.history['val_accuracy'][-1] for history in histories]))]

print("batch size: {}, val acc: {}".format(hists_maxvalacc_batch(DNN01_histories,batchsizes),hists_maxvalacc(DNN01_histories)))
print("batch size: {}, val acc: {}".format(hists_maxvalacc_batch(CONV01_histories,batchsizes),hists_maxvalacc(CONV01_histories)))
print("batch size: {}, val acc: {}".format(hists_maxvalacc_batch(CONV_LSTM_01_histories,batchsizes),hists_maxvalacc(CONV_LSTM_01_histories)))
print("batch size: {}, val acc: {}".format(hists_maxvalacc_batch(LSTM_01_histories,batchsizes),hists_maxvalacc(LSTM_01_histories)))
print("batch size: {}, val acc: {}".format(hists_maxvalacc_batch(RNN01_histories,batchsizes),hists_maxvalacc(RNN01_histories)))

batch size: 31, val acc: 0.8860676884651184
batch size: 1, val acc: 0.9140625
batch size: 1, val acc: 0.5
batch size: 11, val acc: 0.90625
batch size: 21, val acc: 0.890625


## FILTERING


In [2]:
#Import raw recordings

think_df = pd.read_pickle("F:\EEG-data\\think-read\\raw/think.pkl")
read_df = pd.read_pickle("F:\EEG-data\\think-read\\raw/read.pkl")

print(think_df.shape)
print(read_df.shape)

(3840, 32)
(3840, 32)


In [3]:
#Bandpass (BP) filter helper functions

#Creates butterworth BP filter
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5*fs  # Nyquist frequency, which is half of fs
    low = lowcut/nyq  # Digital butterworth filter cutoffs must be normalized to Nyquist frequency
    high = highcut/nyq
    b, a = signal.butter(order, [low, high], btype="bandpass")
    return b, a

def butter_lowpass(cutFreq,fs,order=5):
    nyq = 0.5*fs
    cutFreq = cutFreq/nyq
    b,a = signal.butter(order,cutFreq,btype="lowpass")
    return b,a 

def butter_highpass(cutFreq,fs,order=5):
    nyq = 0.5*fs
    cutFreq = cutFreq/nyq
    b,a = signal.butter(order,cutFreq,btype="highpass")
    return b,a 

#Applies butterworth BP filter
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
#     filtered_data = signal.lfilter(b, a, data)
    filtered_data = signal.filtfilt(b,a,data)
    return filtered_data

#Applies butterworth lowpass filter
def butter_lowpass_filter(data, cutFreq, fs, order=5):
    b, a = butter_lowpass(cutFreq,fs,order=5)
    filtered_data = signal.filtfilt(b,a,data)
    return filtered_data

#Applies butterworth lowpass filter
def butter_highpass_filter(data, cutFreq, fs, order=5):
    b, a = butter_highpass(cutFreq,fs,order=5)
    filtered_data = signal.filtfilt(b,a,data)
    return filtered_data

#Applies butterworth BP filter to Pandas dataframe 
def bp_filter_df(df, lowcut, highcut, fs, order):
    rows, cols = df.shape  # Get no. of rows and cols in df
    new_index = range(1, rows+1)
    new_cols = range(1, cols+1)
    # Create new df with same no. of rows and cols
    new_df = pd.DataFrame(index=new_index, columns=new_cols)
    # new_df = new_df.fillna(0) #Fill in 0 for all values
    for i in range(cols):  # Apply bp filter each column (channel) and saves in new_df
        filt_col = butter_bandpass_filter(
            df.iloc[:, i].values, lowcut, highcut, fs, order)
        new_df[i+1] = filt_col
    return new_df

#Applies butterworth lowpass filter to Pandas dataframe 
def lp_filter_df(df, cutFreq, fs, order):
    rows, cols = df.shape  # Get no. of rows and cols in df
    new_index = range(1, rows+1)
    new_cols = range(1, cols+1)
    # Create new df with same no. of rows and cols
    new_df = pd.DataFrame(index=new_index, columns=new_cols)
    # new_df = new_df.fillna(0) #Fill in 0 for all values
    for i in range(cols):  # Apply bp filter each column (channel) and saves in new_df
        filt_col = butter_lowpass_filter(
            df.iloc[:, i].values, cutFreq, fs, order)
        new_df[i+1] = filt_col
    return new_df

In [4]:
filt_think_df = lp_filter_df(think_df,40.5,128,6)
filt_read_df = lp_filter_df(read_df,40.5,128,6)


In [5]:
#Splits a single dataframe into list of equally sized arrays
#Each element in list is nx32 array, where n= sample length 
def split_df(df,fs,sample_t,check=False):
    rows,_ = df.shape #get no. of rows
    sample_len = int(sample_t*fs) #find no. of recorded samples required for each sample time length
    Ns = int(rows/sample_len) #find total no. of samples
    df_cut = df.iloc[:Ns*sample_len] #truncate dataframe to exact multiple of sample length
    # print(Ns*sample_len)
    # print(Ns)
    df_split_list = np.vsplit(df_cut,Ns) #split dataframe row-wise, returns a list
    
    if check:
        print("Total no. of recorded samples: "+str(rows))
        print("Sample length: "+str(sample_len))
        print("Total no. of samples: "+str(Ns))
        print("Length of df_split_list: "+str(len(df_split_list)))
        
        if all(isinstance(x.shape,tuple) for x in df_split_list):
            print("Shape of each element in df_split_list: "+str(df_split_list[0].shape))
        else:
            print("Shapes are wrong.")
            for x in df_split_list:
                print(x.shape)
    return df_split_list,Ns

#Apply split_df() function to list of dataframes, reshape dataframe such that each element is an array 
#for the appropriate sample time length 
def split_bands_list(bands_list,fs,sample_t,check=False,checkSD=False):
    df_list_rFE = [0]*len(bands_list) #dataframes list ready for feature extraction 
    for df_no in range(len(bands_list)):
        df_split_list,Ns = split_df(bands_list[df_no],fs,sample_t,check=checkSD)
        list_of_series = [0]*Ns
        for i in range(len(df_split_list)):
            #New dataframe will have shape Nsx32, each element is a 1xsample_len array 
            new_row = [0]*32 
            #Each df_split_list[i] is a dataframe
            for j in range(len(df_split_list[i].columns)):
                new_row[j] = df_split_list[i].iloc[:,j].values 
            list_of_series[i] = new_row
        df_list_rFE[df_no] = pd.DataFrame(list_of_series)
    if check:
        print("Length of bands_list: "+str(len(bands_list)))
        print("Length of df_list_rFE: "+str(len(df_list_rFE)))
        if (all(isinstance(x.shape,tuple) for x in df_list_rFE)) and (Ns==len(df_list_rFE[0].index)):
            print("Shape of each dataframe in df_list_rFE: "+str(df_list_rFE[0].shape))
    return df_list_rFE
            

In [6]:
#0.05s filt
fsplit005_t_df=split_bands_list([filt_think_df],128,0.05)[0]
fsplit005_r_df=split_bands_list([filt_read_df],128,0.05)[0]

#0.1s filt
fsplit01_t_df=split_bands_list([filt_think_df],128,0.1)[0]
fsplit01_r_df=split_bands_list([filt_read_df],128,0.1)[0]


In [7]:
print(fsplit005_t_df.shape)
print(fsplit005_r_df.shape)

print(fsplit01_t_df.shape)
print(fsplit01_r_df.shape)


(640, 32)
(640, 32)
(320, 32)
(320, 32)


In [8]:
#Save datasets
# Filt 
fsplit005_t_df.to_pickle("F:\EEG-data\\think-read\\filtered\\0.05s/lp40.5_think.pkl")
fsplit005_r_df.to_pickle("F:\EEG-data\\think-read\\filtered\\0.05s/lp40.5_read.pkl")

fsplit01_t_df.to_pickle("F:\EEG-data\\think-read\\filtered\\0.1s/lp40.5_think.pkl")
fsplit01_r_df.to_pickle("F:\EEG-data\\think-read\\filtered\\0.1s/lp40.5_read.pkl")
