In [1]:
import numpy as np
from scipy.io import loadmat

import matplotlib.pyplot as plt
%matplotlib inline 

from sklearn.linear_model import LogisticRegression
# https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html
    
# confusion matrix
from sklearn import metrics
# http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html




#from tqdm import tqdm # for progressive bar

from sklearn.externals import joblib # to save model

In [2]:
def load_one_epoch(subject, data_type, epoch_num):
    if data_type is 'train':
        epoch_data=loadmat("data/{}{}-allfilt10.mat".format(subject, epoch_num))

        x=epoch_data['x']
        y=epoch_data['y']
        code=epoch_data['code']

    elif data_type is 'test':
        epoch_data=loadmat("data/{}t{}-allfilt10.mat".format(subject, epoch_num))

        x=epoch_data['x']
        #y=epoch_data['y']
        code=epoch_data['code']
        
        # decode y 
        #print('decode y')
        true_code = np.loadtxt("data/{}t_{}_true_code.txt".format(subject, epoch_num))

        y = -np.ones(code.shape)
        idx = (code == true_code[0]) | (code == true_code[1])
        y[idx] = 1

    else:
        pass
            


    return x, y, code

In [3]:
def load_data(subject, data_type, num_epoches):
    X = list()
    Y = list()
    C = list()
    for epoch_num in range(num_epoches):
        #print(epoch_num)
        x, y, code = load_one_epoch(subject, data_type, 1+epoch_num)

        X.append(x)
        Y.append(y)
        C.append(code)
        


    X = np.array(X)
    Y = np.array(Y)
    C = np.array(C)

    print('loaded:')
    print(X.shape)
    print(Y.shape)
    print(C.shape)    

    num_trials = X.shape[1]
    data_dim = X.shape[2]

    # stack epoches
    X = X.reshape(-1,data_dim)
    Y = Y.ravel()
    C = C.ravel()    

    print('stacked:')
    print(X.shape)
    print(Y.shape)
    print(C.shape)    
    
    return X, Y, C

In [4]:
subject='A'
#subject='B'
epoch_num = 1
x, y, code = load_one_epoch(subject, 'train', 1)
#x, y, code = load_one_epoch(subject, 'test', 1)

print(x.shape)
print(y.shape)
print(code.shape)

(180, 896)
(180, 1)
(180, 1)


In [5]:
#load test data
epoch_num = 1
x, y, code = load_one_epoch(subject, 'test', epoch_num)

print(x.shape)
print(y.shape)
print(code.shape)


(180, 896)
(180, 1)
(180, 1)


In [6]:
# for subject A
trueChar='WQXPLZCOMRKO97YFZDEZ1DPI9NNVGRQDJCUVRMEUOOOJD2UFYPOO6J7LDGYEGOA5VHNEHBTXOO1TDOILUEE5BFAEEXAW-K4R3MRU'
# for subject B
#trueChar='MERMIROOMUHJPXJOHUVLEORZP3GLOO7AUFDKEFTWEOOALZOP9ROCGZET1Y19EWX65QUYU7NAK-4YCJDVDNGQXODBEV2B5EFDIDNR';
len(trueChar)

100

In [7]:
max(code)

array([12], dtype=uint8)

In [8]:
min(code)

array([1], dtype=uint8)

In [9]:
subject = 'A'
#data_type = 'train'
X_train, Y_train, Code_train = load_data(subject, 'train', 85)

X_test, Y_test, Code_test = load_data(subject, 'test', 100)


loaded:
(85, 180, 896)
(85, 180, 1)
(85, 180, 1)
stacked:
(15300, 896)
(15300,)
(15300,)
loaded:
(100, 180, 896)
(100, 180, 1)
(100, 180, 1)
stacked:
(18000, 896)
(18000,)
(18000,)


In [10]:
sum(Y_test==1), sum(Y_test==-1)

(3000, 15000)

## Logistic regression

In [11]:
clf = LogisticRegression(class_weight='balanced', n_jobs=-1)
clf.fit(X_train, Y_train)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=1.0, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=-1, penalty='l2', random_state=None,
          solver='warn', tol=0.0001, verbose=0, warm_start=False)

In [12]:
num_ch = 64
joblib.dump(clf, "{}_LR_ch{}.model".format(subject,num_ch))

['A_LR_ch64.model']

In [13]:
# predict
Y_est=clf.predict(X_test)

#accuracy = 
sum(Y_test==Y_est) / len(Y_test)

0.7347777777777778

In [14]:
# confusion matrix
from sklearn import metrics
# http://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html

metrics.confusion_matrix(Y_test,Y_est,labels=(1,-1))

array([[ 1901,  1099],
       [ 3675, 11325]])

## subject B

In [15]:
subject = 'B'
X_train, Y_train, Code_train = load_data(subject, 'train', 85)
X_test, Y_test, Code_test = load_data(subject, 'test', 100)

loaded:
(85, 180, 896)
(85, 180, 1)
(85, 180, 1)
stacked:
(15300, 896)
(15300,)
(15300,)
loaded:
(100, 180, 896)
(100, 180, 1)
(100, 180, 1)
stacked:
(18000, 896)
(18000,)
(18000,)


In [16]:
# train
clf = LogisticRegression(class_weight='balanced', n_jobs=-1)
clf.fit(X_train, Y_train)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=1.0, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=-1, penalty='l2', random_state=None,
          solver='warn', tol=0.0001, verbose=0, warm_start=False)

In [17]:
# save model
num_ch = 64
joblib.dump(clf, "{}_LR_ch{}.model".format(subject,num_ch))

['B_LR_ch64.model']

In [18]:
# test
Y_est=clf.predict(X_test)

#accuracy = 
sum(Y_test==Y_est) / len(Y_test)

0.7665555555555555

In [19]:
# confusion matrix
metrics.confusion_matrix(Y_test,Y_est,labels=(1,-1))

array([[ 2124,   876],
       [ 3326, 11674]])

## Joint

In [20]:
subject = 'A'
X_train_A, Y_train_A, Code_train_A = load_data(subject, 'train', 85)
X_test_A, Y_test_A, Code_test_A = load_data(subject, 'test', 100)

subject = 'B'
X_train_B, Y_train_B, Code_train_B = load_data(subject, 'train', 85)
X_test_B, Y_test_B, Code_test_B = load_data(subject, 'test', 100)


loaded:
(85, 180, 896)
(85, 180, 1)
(85, 180, 1)
stacked:
(15300, 896)
(15300,)
(15300,)
loaded:
(100, 180, 896)
(100, 180, 1)
(100, 180, 1)
stacked:
(18000, 896)
(18000,)
(18000,)
loaded:
(85, 180, 896)
(85, 180, 1)
(85, 180, 1)
stacked:
(15300, 896)
(15300,)
(15300,)
loaded:
(100, 180, 896)
(100, 180, 1)
(100, 180, 1)
stacked:
(18000, 896)
(18000,)
(18000,)


In [21]:
# combine
X_train = np.vstack((X_train_A,X_train_B))
print(X_train.shape)
Y_train = np.concatenate((Y_train_A, Y_train_B))
print(Y_train.shape)

X_test = np.vstack((X_test_A, X_test_B))
print(X_test.shape)
Y_test = np.concatenate((Y_test_A, Y_test_B))
print(Y_test.shape)




(30600, 896)
(30600,)
(36000, 896)
(36000,)


In [22]:
# train
clf = LogisticRegression(class_weight='balanced', n_jobs=-1)
clf.fit(X_train, Y_train)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=1.0, class_weight='balanced', dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='warn', n_jobs=-1, penalty='l2', random_state=None,
          solver='warn', tol=0.0001, verbose=0, warm_start=False)

In [23]:
# save model
subject = 'AandB'
num_ch = 64
joblib.dump(clf, "{}_LR_ch{}.model".format(subject,num_ch)) 
    

['AandB_LR_ch64.model']

In [24]:
# predict
Y_est=clf.predict(X_test)

#accuracy = 
sum(Y_test==Y_est) / len(Y_test)

0.7188611111111111

In [25]:
# confusion matrix
metrics.confusion_matrix(Y_test,Y_est,labels=(1,-1))

array([[ 3962,  2038],
       [ 8083, 21917]])

In [26]:
# predict A using joint model
Y_est_A=clf.predict(X_test_A)

#accuracy = 
sum(Y_test_A==Y_est_A) / len(Y_test_A)



0.7098333333333333

In [27]:
# predict B using joint model

Y_est_B=clf.predict(X_test_B)

#accuracy = 
sum(Y_test_B==Y_est_B) / len(Y_test_B)



0.7278888888888889