In [1]:
## Construct LSTM using Tensorflow + Keras
# Import Libraries
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from sklearn.utils import shuffle
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
import numpy as np

#Import the helper functions
#from helperFunctions import CSP
from helperFunctions import GetCombinedData_HaLT as GetHALT
from helperFunctions import GetCombinedData_CLA as GetCLA
from helperFunctions import GetCombinedData_5F as Get5F
from helperFunctions import GetCombinedData_FreeForm as GetFree

print("Available paradigms:\nHand, Leg, Tongue (HALT)\nClassic - L/R Hand (CLA)\nFingers (5F)")
paradigm = input('Enter desired paradigm: ');
directoryPath = input('Enter the relative path to data: ');
if(paradigm == 'HALT'):
    Data, Targets, DataCSP, TargetsCSP = GetHALT(directoryPath, True);
    numOutputs = 5;
elif(paradigm == 'CLA'):
    Data, Targets, DataCSP, TargetsCSP = GetCLA(directoryPath, True);
    print("############")
    print("Processing FreeForm data for further testing")
    FreeData, FreeTargets, FreeDataCSP, FreeTargetsCSP = GetFree(directoryPath, True);
    numOutputs = 2;
elif(paradigm == '5F'):
    Data, Targets, DataCSP, TargetsCSP = Get5F(directoryPath, True);
    numOutputs = 5;
else:
    print("Error: Invalid paradigm {}".format(paradigm));
    quit()

enableDropout = True;
dropoutPercentage = 0.3;

Available paradigms:
Hand, Leg, Tongue (HALT)
Classic - L/R Hand (CLA)
Fingers (5F)
Enter desired paradigm: 5F
Enter the relative path to data: ../../Project/dataset
Processing dataset 1 of 10
Processing dataset 2 of 10
Processing dataset 3 of 10
Processing dataset 4 of 10
Processing dataset 5 of 10
Processing dataset 6 of 10
Processing dataset 7 of 10
Processing dataset 8 of 10
Processing dataset 9 of 10
Processing dataset 10 of 10


  return SFa.astype(np.float32)


## Building  and training the model
After processing the data, we build the LSTM model.
We use 100 units in the first LSTM layer, 50 units in the second LSTM layer, and 2 or 5 dense units in the final layer (depending on the paradigm).

We first train the network on non-CSP data.

In [2]:
#Run GetCombinedData to pull the datasets from multiple subjects into a single set
#Data, Targets, DataCSP, TargetsCSP = GetHALT('../../../matDown/HaLT_Data', True);

Targets=np.argmax(Targets, axis=1) #decode from one hot encoding to integers

#reshape to be compatible with SVM
Data = Data.reshape(Data.shape[0], -1) #reshape so that each row is an action. And contains all the channels and voltages
Targets = Targets.reshape(-1) #reshape to vector

## Shuffle the data
Data, Targets = shuffle(Data, Targets, random_state=0)

## Split into train and test sets
DataTrain, DataTest, TargetsTrain, TargetsTest = train_test_split(Data, Targets, test_size=0.3, random_state=0)

In [3]:
# SVM Classifier
#clf = SVC(kernel = 'rbf', random_state = 42)
clf = LinearSVC(random_state=42)
clf.fit(DataTrain, TargetsTrain)



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
          verbose=0)

## Evaluating the model
Now, we use the model to predict the outputs for the test set, and compare them to the target outputs.

In [4]:
trainPerc=clf.score(DataTrain, TargetsTrain)
testPerc=clf.score(DataTest, TargetsTest)

print("#################################")
print("#################################")
print("Training Performance:\nPercent Accuracy: {:.3f}%".format(trainPerc*100))
print("#################################")
print("#################################")
print("Testing Performance:\nPercent Accuracy: {:.3f}%".format(testPerc*100))
print("#################################")
print("#################################")

#################################
#################################
Training Performance:
Percent Accuracy: 99.933%
#################################
#################################
Testing Performance:
Percent Accuracy: 36.171%
#################################
#################################


In [5]:
cm = confusion_matrix(TargetsTest, clf.predict(DataTest))
print(cm)

[[234 145  53  49  45]
 [121 185 115  48  45]
 [ 74 109 149 107  82]
 [ 41  64  99 159 142]
 [ 30  50  83 125 195]]


## Using CSP Data
Now, we train the model using the CSP data and then evaluate it

In [6]:
#Run GetCombinedData to pull the datasets from multiple subjects into a single set
#Data, Targets, DataCSP, TargetsCSP = GetHALT('../../../matDown/HaLT_Data', True);

TargetsCSP=np.argmax(TargetsCSP, axis=1) #decode from one hot encoding to integers

#reshape to be compatible with SVM
ReDataCSP = DataCSP.reshape(DataCSP.shape[0], -1) #reshape so that each row is an action. And contains all the channels and voltages
ReTargetsCSP = TargetsCSP.reshape(-1) #reshape to vector

## Shuffle the data
ReDataCSP, ReTargetsCSP = shuffle(ReDataCSP, ReTargetsCSP, random_state=0)

## Split into train and test sets
DataTrain, DataTest, TargetsTrain, TargetsTest = train_test_split(ReDataCSP, ReTargetsCSP, test_size=0.3, random_state=0)

# SVM Classifier
clf.fit(DataTrain, TargetsTrain)

trainPerc=clf.score(DataTrain, TargetsTrain)
testPerc=clf.score(DataTest, TargetsTest)

print("#################################")
print("#################################")
print("Training Performance:\nPercent Accuracy: {:.3f}%".format(trainPerc*100))
print("#################################")
print("#################################")
print("Testing Performance:\nPercent Accuracy: {:.3f}%".format(testPerc*100))
print("#################################")
print("#################################")



#################################
#################################
Training Performance:
Percent Accuracy: 99.966%
#################################
#################################
Testing Performance:
Percent Accuracy: 74.107%
#################################
#################################


In [7]:
cm = confusion_matrix(TargetsTest, clf.predict(DataTest))
print(cm)

[[377  48  45  27  29]
 [ 28 398  31  36  21]
 [ 33  30 372  48  38]
 [ 39  34  42 356  34]
 [ 18  27  26  26 386]]


## Optional testing of small FreeForm Datasets
The FREEFORM datasets are a collection of datasets (3 of them) that are self-paced. There may not be enough data to train the network on just the FreeForm dataset. If you do train it on that dataset, you might have issues with overfitting. So, if the paradigm was CLA, which is similar to FreeForm, we can evaluate the model on FreeForm as well.

In [8]:
if(paradigm == 'CLA'):
    ## Have to check and make sure that the FreeData is the same dimensions as the CLA data
    lenFree = FreeDataCSP.shape[2];
    if(lenFree < len(DataCSP)):
        FreeDataCSP = FreeDataCSP[:, :, 0:DataCSP.shape[2]];
        
    FreeTargetsCSP=np.argmax(FreeTargetsCSP, axis=1) #decode from one hot encoding to integers
    ReFreeDataCSP = FreeDataCSP.reshape(FreeDataCSP.shape[0], -1) #reshape so that each row is an action. And contains all the channels and voltages
    ReFreeTargetsCSP = FreeTargetsCSP.reshape(-1) #reshape to vector
    testPerc=clf.score(ReFreeDataCSP, ReFreeTargetsCSP)

    print("#################################")
    print("#################################")
    print("FreeForm Performance:\nPercent Accuracy: {:.3f}%".format(testPerc*100))
    print("#################################")
    print("#################################")
    
    cm = confusion_matrix(ReFreeTargetsCSP, clf.predict(ReFreeDataCSP))
    print(cm)