In [1]:
import numpy as np
from scipy.io import loadmat

import matplotlib.pyplot as plt
%matplotlib inline 

## general SVM 
from sklearn.svm import SVC

## Let's start from linear SVM 
from sklearn.svm import LinearSVC
# http://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html
from sklearn import svm

from tqdm import tqdm # for progressive bar

from sklearn.externals import joblib # to save model

from P300.data import load_data


## load training data

In [2]:
# load training data
num_epoches = 85

subject_train='AandB'

if subject_train is 'A':
    X_train, Y_train, C_train = load_data('A', 'train', num_epoches)
    
if subject_train is 'B':
    X_train, Y_train, C_train = load_data('B', 'train', num_epoches)
    
if subject_train is 'AandB':
    X_train_A, Y_train_A, C_train_A = load_data('A', 'train', num_epoches)
    X_train_B, Y_train_B, C_train_B = load_data('B', 'train', num_epoches)
    
    # combine
    X_train = np.vstack((X_train_A,X_train_B))
    print(X_train.shape)
    Y_train = np.concatenate((Y_train_A, Y_train_B))
    print(Y_train.shape)


loaded:
(85, 180, 896)
(85, 180, 1)
(85, 180, 1)
stacked:
(15300, 896)
(15300,)
(15300,)
loaded:
(85, 180, 896)
(85, 180, 1)
(85, 180, 1)
stacked:
(15300, 896)
(15300,)
(15300,)


(30600, 896)
(30600,)


## train a SVM

In [None]:
# train SVM
clf = svm.SVC(kernel='linear', C=1.0, class_weight='balanced', probability=True, max_iter=-1, random_state=42)
#clf = svm.LinearSVC(C=1.0, loss="hinge", class_weight='balanced', probability=True, max_iter=5000, random_state=42)
clf.fit(X_train, Y_train)

In [None]:
# training score
clf.score(X_train, Y_train)

In [None]:
# save model

num_ch = 64
joblib.dump(clf, "{}_SVC_linear_ch{}.model".format(subject_train,num_ch))

## test

In [None]:
# test scores
X_test_A, Y_test_A, C_test_A = load_data('A', 'test', num_epoches)
X_test_B, Y_test_B, C_test_B = load_data('B', 'test', num_epoches)

In [None]:
clf.score(X_test_A, Y_test_A)

In [None]:
clf.score(X_test_B, Y_test_B)

## Let's understand it

In [None]:
subject_train='AandB'
num_ch = 64
clf = joblib.load("model/{}_SVM_ch{}.model".format(subject_train,num_ch))

In [None]:
f=clf.decision_function(X_train)

f_A=clf.decision_function(X_train_A)
f_B=clf.decision_function(X_train_B)

plt.figure(figsize=(8, 6), dpi=120)
plt.subplot(231)
plt.hist(f_A[Y_train_A>0])
#plt.hist(f_A[Y_train_A<0])
plt.xlabel('f')
plt.ylabel('count')
plt.title('subject A')

plt.subplot(232)
plt.hist(f_A[Y_train_A<0])
plt.xlabel('f')
plt.ylabel('count')
plt.title('subject A')


plt.subplot(233)
plt.plot(Y_train_A, f_A, 'o', alpha=0.05)
plt.xlabel('Y')
plt.ylabel('f')



plt.subplot(234)
plt.hist(f_B[Y_train_B>0])
plt.xlabel('f')
plt.ylabel('count')
plt.title('subject B')

plt.subplot(235)
plt.hist(f_B[Y_train_B<0])
plt.xlabel('f')
plt.ylabel('count')
plt.title('subject B')


plt.subplot(236)
plt.plot(Y_train_B, f_B, 'o', alpha=0.05)
plt.xlabel('Y')
plt.ylabel('f')


In [None]:
# train logistic regression using f and Y
from sklearn.linear_model import LogisticRegression

clf2 = LogisticRegression(class_weight='balanced', n_jobs=-1)
clf2.fit(f_A.reshape(-1, 1), Y_train_A.reshape(-1, 1))


In [None]:
clf2.score(f_A.reshape(-1, 1), Y_train_A.reshape(-1, 1))

In [None]:
Y_train_A

In [None]:
plt.plot(1/(1+np.exp(-f_A.reshape(-1,1))), Y_train_A, 'o', alpha=0.05)

In [None]:
from platt import platt 

%load_ext autoreload
%autoreload 2

In [None]:
a, b = platt(f, Y_train, sum(Y_train<0), sum(Y_train>0))
print(a, b)

np.savetxt("AandB_SVM_ab.txt", (a,b))

plt.plot(f_A, 1.0/(1+np.exp(f_A*a+b)), 'o')


In [None]:
a, b = platt(f_A, Y_train_A, sum(Y_train_A<0), sum(Y_train_A>0))
print(a, b)

np.savetxt("A_SVM_ab.txt", (a,b))

plt.plot(f_A, 1.0/(1+np.exp(f_A*a+b)), 'o')




In [None]:
sum(Y_train_A>0),sum(Y_train_A<0)

In [None]:
np.savetxt("f.txt", f_A)
np.savetxt("Y.txt", Y_train_A)