In [1]:
## Five finger (5F) EEG Classification using LSTM-based RNN

In [5]:
## This file is used to split data into series of arrays and their corresponding MI task.
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
import scipy.linalg as la
file = sio.loadmat('../../../matDown/5F_Data/5F-SubjectB-160311-5St-SGLHand-HFREQ.mat') #replace with .mat file name
header=file['__header__']
version=file['__version__']
glob=file['__globals__']
#ans=file['ans']


#x=file['x']
o=file['o'][0][0]
data=o['data']
data = np.transpose(data)
data = data[0:21,:];
print(data)
nS=o['nS'][0][0]
#values of structure seem to be 2D numpy arrays, if originally a scalar in Matlab.
#use [0][0] to get scalar.
print("Number of samples: {numSamples}".format(numSamples=nS))
test=o['id'][0] #id value became a 1D array of size 1 for some reason. use [0] to get value
print("Dataset ID: {id}".format(id=test))
chnames=o['chnames'][:,0] #[:,0] converts from 2D array back to 1D array
print("Channel names: {channelNames}".format(channelNames=chnames))
markers = o['marker']
## The markers are all still individual arrays of size 1x1, so we convert them to an array with single values
markersArray = []
for marker in markers:
    markersArray.append(marker[0])
markersArray = np.asarray(markersArray)

#################################
#################################
#5F interaction paradigm
#1-thumb MI, 2-index finger MI, 3-middle finger MI, 4-ring finger MI, 5-pinkie finger MI

#all paradigms
#99-initial relaxation period
#91-inter-session rest break period
#92-experiment end
#################################

[[ -0.    -0.    -0.   ...  42.98 -11.25 -54.89]
 [ -0.    -0.    -0.   ...  37.77  -3.2  -57.64]
 [ -0.    -0.    -0.   ...  36.85  -3.78 -50.66]
 ...
 [ -0.    -0.    -0.   ...   7.51 -19.17 -41.75]
 [ -0.    -0.    -0.   ...  23.26  -0.6  -15.94]
 [ -0.    -0.    -0.   ...  24.24   6.41 -14.74]]
Number of samples: 3596000
Dataset ID: 201603111905.D091BB44
Channel names: [array(['Fp1'], dtype='<U3') array(['Fp2'], dtype='<U3')
 array(['F3'], dtype='<U2') array(['F4'], dtype='<U2')
 array(['C3'], dtype='<U2') array(['C4'], dtype='<U2')
 array(['P3'], dtype='<U2') array(['P4'], dtype='<U2')
 array(['O1'], dtype='<U2') array(['O2'], dtype='<U2')
 array(['A1'], dtype='<U2') array(['A2'], dtype='<U2')
 array(['F7'], dtype='<U2') array(['F8'], dtype='<U2')
 array(['T3'], dtype='<U2') array(['T4'], dtype='<U2')
 array(['T5'], dtype='<U2') array(['T6'], dtype='<U2')
 array(['Fz'], dtype='<U2') array(['Cz'], dtype='<U2')
 array(['Pz'], dtype='<U2') array(['X5'], dtype='<U2')]


In [6]:
## CSP File from: https://github.com/spolsley/common-spatial-patterns

# CSP takes any number of arguments, but each argument must be a collection of trials associated with a task
# That is, for N tasks, N arrays are passed to CSP each with dimensionality (# of trials of task N) x (feature vector)
# Trials may be of any dimension, provided that each trial for each task has the same dimensionality,
# otherwise there can be no spatial filtering since the trials cannot be compared
def CSP(*tasks):
	if len(tasks) < 2:
		print("Must have at least 2 tasks for filtering.")
		return (None,) * len(tasks)
	else:
		filters = ()
		# CSP algorithm
		# For each task x, find the mean variances Rx and not_Rx, which will be used to compute spatial filter SFx
		iterator = range(0,len(tasks))
		for x in iterator:
			# Find Rx
			Rx = covarianceMatrix(tasks[x][0])
			for t in range(1,len(tasks[x])):
				Rx += covarianceMatrix(tasks[x][t])
			Rx = Rx / len(tasks[x])

			# Find not_Rx
			count = 0
			not_Rx = Rx * 0
			for not_x in [element for element in iterator if element != x]:
				for t in range(0,len(tasks[not_x])):
					not_Rx += covarianceMatrix(tasks[not_x][t])
					count += 1
			not_Rx = not_Rx / count

			# Find the spatial filter SFx
			SFx = spatialFilter(Rx,not_Rx)
			filters += (SFx,)

			# Special case: only two tasks, no need to compute any more mean variances
			if len(tasks) == 2:
				filters += (spatialFilter(not_Rx,Rx),)
				break
		return filters

# covarianceMatrix takes a matrix A and returns the covariance matrix, scaled by the variance
def covarianceMatrix(A):
	Ca = np.dot(A,np.transpose(A))/np.trace(np.dot(A,np.transpose(A)))
	return Ca

# spatialFilter returns the spatial filter SFa for mean covariance matrices Ra and Rb
def spatialFilter(Ra,Rb):
	R = Ra + Rb
	E,U = la.eig(R)

	# CSP requires the eigenvalues E and eigenvector U be sorted in descending order
	ord = np.argsort(E)
	ord = ord[::-1] # argsort gives ascending order, flip to get descending
	E = E[ord]
	U = U[:,ord]

	# Find the whitening transformation matrix
	P = np.dot(np.sqrt(la.inv(np.diag(E))),np.transpose(U))

	# The mean covariance matrices may now be transformed
	Sa = np.dot(P,np.dot(Ra,np.transpose(P)))
	Sb = np.dot(P,np.dot(Rb,np.transpose(P)))

	# Find and sort the generalized eigenvalues and eigenvector
	E1,U1 = la.eig(Sa,Sb)
	ord1 = np.argsort(E1)
	ord1 = ord1[::-1]
	E1 = E1[ord1]
	U1 = U1[:,ord1]

	# The projection matrix (the spatial filter) may now be obtained
	SFa = np.dot(np.transpose(U1),P)
	return SFa.astype(np.float32)

In [7]:
## Find the starting indeces where the marker changes
changeIdxs = np.where(np.transpose(markersArray)[:-1] != np.transpose(markersArray)[1:])[0]
print("Number of index changes: {idxChanges}".format(idxChanges=changeIdxs.shape[0]))
## Split the data so that it has its matching marker
dataSplit = np.array_split(data, changeIdxs[:-1], axis=1)
splitCount = 0
for splitData in dataSplit:
    splitCount += 1
print("Number of arrays in data split: {num}".format(num=splitCount))
## Retrieve the marker values for each of the change indeces (changeIdxs)
markerTargets = markersArray[changeIdxs];
print("Number of marker targets: {numTargets}".format(numTargets=markerTargets.shape[0]))

Number of index changes: 1934
Number of arrays in data split: 1934
Number of marker targets: 1934


In [15]:
## To Apply CSP, we first only get the indeces for MI tasks 1 and 2 (left and right hand, respectively.)
tIdx = np.where(markerTargets == 1)[0]
iIdx = np.where(markerTargets == 2)[0]
mIdx = np.where(markerTargets == 3)[0]
rIdx = np.where(markerTargets == 4)[0]
pIdx = np.where(markerTargets == 5)[0]
tCount = tIdx.shape
print("Thumb Marker Count: {}\tSize of First: ({},{})".format(tCount, dataSplit[tIdx[0]].shape[0],dataSplit[tIdx[0]].shape[1]))
print("Thumb Marker Count: {}\tSize of Second: ({},{})".format(tCount, dataSplit[tIdx[1]].shape[0],dataSplit[tIdx[1]].shape[1]))
print("Thumb Marker Count: {}\tSize of Third: ({},{})".format(tCount, dataSplit[tIdx[2]].shape[0],dataSplit[tIdx[2]].shape[1]))

Thumb Marker Count: (209,)	Size of First: (21,4635)
Thumb Marker Count: (209,)	Size of Second: (21,1308)
Thumb Marker Count: (209,)	Size of Third: (21,1297)


In [29]:
def GetMinSteps(indeces, data):
    minVal = 9999;
    for index in indeces:
        length = data[index].shape[1];
        if(length < minVal):
            minVal = length;
    return minVal

tIdxMin = GetMinSteps(tIdx, dataSplit)
iIdxMin = GetMinSteps(iIdx, dataSplit)
mIdxMin = GetMinSteps(mIdx, dataSplit)
rIdxMin = GetMinSteps(rIdx, dataSplit)
pIdxMin = GetMinSteps(pIdx, dataSplit)
minValues = [tIdxMin, iIdxMin, mIdxMin, rIdxMin, pIdxMin]
minValues

#Truncate the data to the min size
minValue = np.min(minValues)
print(minValue)

1275


In [40]:
currentData = dataSplit[11]

In [44]:
currentDataTrunc = currentData[:,0:1275]

In [45]:
currentDataTrunc.shape

(21, 1275)

In [48]:
def GetData(indeces, data, truncateValue):
    data = []
    truncate = True;
    if truncateValue == 0:
        truncate = False;
    for idx in indeces:
        try:
            currentData = np.transpose(data[idx])
            if truncate:
                data.append(currentData[:,0:truncateValue])
            else:
                data.append(currentData)
        except:
            print("Current index is: {}".format(idx))
    return data

tData = GetData(tIdx, dataSplit, minValue)
iData = GetData(iIdx, dataSplit, minValue)
mData = GetData(mIdx, dataSplit, minValue)
rData = GetData(rIdx, dataSplit, minValue)
pData = GetData(pIdx, dataSplit, minValue)

Current index is: 11


UnboundLocalError: local variable 'currentData' referenced before assignment

In [5]:
leftData = [];
for leftIndex in LeftIdxs[0]:
    #print(leftIndex)
    #print("Dimensions of index: {ind}".format(ind=dataSplit[leftIndex].shape))
    if(dataSplit[leftIndex].shape[1] != 170):
        continue
    else:
        leftData.append(np.transpose(dataSplit[leftIndex]))
leftData = np.asarray(leftData)
leftData.shape

(288, 170, 22)

In [6]:
rightData = [];
for rightIndex in RightIdxs[0]:
    #print(leftIndex)
    #print("Dimensions of index: {ind}".format(ind=dataSplit[leftIndex].shape))
    if(dataSplit[rightIndex].shape[1] != 170):
        continue
    else:
        rightData.append(np.transpose(dataSplit[rightIndex]))
rightData = np.asarray(rightData)
rightData.shape

(327, 170, 22)

In [7]:
## Only keep the top 288 samples, so that left and right data are equal
rightDataSub = rightData[1:289]

In [8]:
#Construct the target array and merge the data
leftTargets = np.tile(np.array([1,0]),(288,1))
rightTargets = np.tile(np.array([0,1]), (288,1))
markerTargets = np.vstack((leftTargets, rightTargets))
lrData = np.vstack((leftData, rightDataSub))

#Sanity Check
print("lrData Shape: {arg1}\tmarkerTargets Shape: {arg2}".format(arg1=lrData.shape, arg2=markerTargets.shape))

lrData Shape: (576, 170, 22)	markerTargets Shape: (576, 2)


In [9]:
## Construct LSTM using Tensorflow + Keras
# Import Libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from tensorflow.keras import optimizers

In [10]:
## Shuffle the data
lrData, markerTargets = shuffle(lrData, markerTargets, random_state=0)

In [11]:
## Split into train and test sets
lrDataTrain, lrDataTest, markerTargetsTrain, markerTargetsTest = train_test_split(lrData, markerTargets, test_size=0.3, random_state=1)
markerTargetsTrain.shape

(403, 2)

In [12]:
## Reshape the data for time-series processing
## Syntax np.reshape((numExamples, numTimeSteps, numInputs/numFeatures))
lrDataTrainRe = lrDataTrain.reshape((lrDataTrain.shape[0], lrDataTrain.shape[1], lrDataTrain.shape[2]))
lrDataTestRe = lrDataTest.reshape((lrDataTest.shape[0], lrDataTest.shape[1], lrDataTest.shape[2]))

In [13]:
## Construct the model
LSTM_EEG = Sequential()
LSTM_EEG.add(LSTM((100),batch_input_shape=(None,lrDataTrainRe.shape[1], lrDataTrainRe.shape[2]), return_sequences=True))
LSTM_EEG.add(LSTM((50), return_sequences=False))
LSTM_EEG.add(Dense((2),activation='sigmoid'))

In [14]:
LSTM_EEG.summary()
sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
LSTM_EEG.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 170, 100)          49200     
_________________________________________________________________
lstm_1 (LSTM)                (None, 50)                30200     
_________________________________________________________________
dense (Dense)                (None, 2)                 102       
Total params: 79,502
Trainable params: 79,502
Non-trainable params: 0
_________________________________________________________________


In [15]:
history = LSTM_EEG.fit(lrDataTrain, markerTargetsTrain, epochs=30,verbose=2, batch_size=16)

Epoch 1/30
26/26 - 5s - loss: 0.6775 - accuracy: 0.5980
Epoch 2/30
26/26 - 5s - loss: 0.6248 - accuracy: 0.6650
Epoch 3/30
26/26 - 4s - loss: 0.5491 - accuracy: 0.7146
Epoch 4/30
26/26 - 4s - loss: 0.5258 - accuracy: 0.7320
Epoch 5/30
26/26 - 4s - loss: 0.4521 - accuracy: 0.8238
Epoch 6/30
26/26 - 4s - loss: 0.4264 - accuracy: 0.7940
Epoch 7/30
26/26 - 4s - loss: 0.3616 - accuracy: 0.8437
Epoch 8/30
26/26 - 3s - loss: 0.3180 - accuracy: 0.8462
Epoch 9/30
26/26 - 3s - loss: 0.2930 - accuracy: 0.8883
Epoch 10/30
26/26 - 3s - loss: 0.2680 - accuracy: 0.8983
Epoch 11/30
26/26 - 3s - loss: 0.2678 - accuracy: 0.8983
Epoch 12/30
26/26 - 3s - loss: 0.2198 - accuracy: 0.9007
Epoch 13/30
26/26 - 4s - loss: 0.1466 - accuracy: 0.9454
Epoch 14/30
26/26 - 3s - loss: 0.1805 - accuracy: 0.9305
Epoch 15/30
26/26 - 2s - loss: 0.2859 - accuracy: 0.8759
Epoch 16/30
26/26 - 2s - loss: 0.2113 - accuracy: 0.9181
Epoch 17/30
26/26 - 2s - loss: 0.2795 - accuracy: 0.8834
Epoch 18/30
26/26 - 2s - loss: 0.1470 - 

In [16]:
predictionsTest = LSTM_EEG.predict(lrDataTest)

In [17]:
predictionsTest[predictionsTest>0.5] = 1

In [18]:
predictionsTest[predictionsTest <= 0.5] = 0

In [19]:
comparisonArrayTest = predictionsTest == markerTargetsTest

In [20]:
correctCountTest = 0
for boolValues in comparisonArrayTest:
    if(boolValues[0] & boolValues[1]):
        correctCountTest += 1
falseCountTest = lrDataTest.shape[0] - correctCountTest

predictionsTrain = LSTM_EEG.predict(lrDataTrain)
predictionsTrain[predictionsTrain>0.5] = 1;
predictionsTrain[predictionsTrain<=0.5] = 0;
comparisonArrayTrain = predictionsTrain == markerTargetsTrain;

correctCountTrain = 0
for boolValues in comparisonArrayTrain:
    if(boolValues[0] & boolValues[1]):
        correctCountTrain += 1
falseCountTrain = lrDataTrain.shape[0] - correctCountTrain

In [21]:
print("#################################")
print("#################################")
print("Training Performance:\nCorrect MI Prediction: {}\nIncorrect MI Prediction: {}\nPercent Accuracy: {:.3f}%".format(correctCountTrain, falseCountTrain, (correctCountTrain*100/lrDataTrain.shape[0])))
print("#################################")
print("#################################")
print("Testing Performance:\nCorrect MI Prediction: {}\nIncorrect MI Prediction: {}\nPercent Accuracy: {:.3f}%".format(correctCountTest, falseCountTest, (correctCountTest*100/lrDataTest.shape[0])))
print("#################################")
print("#################################")

#################################
#################################
Training Performance:
Correct MI Prediction: 384
Incorrect MI Prediction: 19
Percent Accuracy: 95.285%
#################################
#################################
Test Performance:
Correct MI Prediction: 110
Incorrect MI Prediction: 63
Percent Accuracy: 63.584%
#################################
#################################
