In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score
import sklearn.tree as tree
from sklearn.metrics import plot_confusion_matrix

# Classifiers 
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

dir_out = '../plots/'

In [3]:
dir_in = '../data/emg-4/'

col_names = []
for i in range(8):
    for j in range(8):
        col_names.append('muscle reading '+ str(i+1)+' sensor '+str(j+1))
col_names.append('gestures')

# Gesture classes were : rock - 0, scissors - 1, paper - 2, ok - 3

df = []        
for i in range(4):
    fn = dir_in + str(i) + '.csv'
    df_i = pd.read_csv(fn, header=None,index_col = False, names = col_names)
    print(len(df_i))
    if len(df) == 0:
        df = df_i
    else:
        df = df.append(df_i)
display(df.head())

print(len(df))

2910
2903
2943
2922


Unnamed: 0,muscle reading 1 sensor 1,muscle reading 1 sensor 2,muscle reading 1 sensor 3,muscle reading 1 sensor 4,muscle reading 1 sensor 5,muscle reading 1 sensor 6,muscle reading 1 sensor 7,muscle reading 1 sensor 8,muscle reading 2 sensor 1,muscle reading 2 sensor 2,...,muscle reading 7 sensor 8,muscle reading 8 sensor 1,muscle reading 8 sensor 2,muscle reading 8 sensor 3,muscle reading 8 sensor 4,muscle reading 8 sensor 5,muscle reading 8 sensor 6,muscle reading 8 sensor 7,muscle reading 8 sensor 8,gestures
0,26.0,4.0,5.0,8.0,-1.0,-13.0,-109.0,-66.0,-9.0,2.0,...,-28.0,61.0,4.0,8.0,5.0,4.0,-7.0,-59.0,16.0,0
1,-47.0,-6.0,-5.0,-7.0,13.0,-1.0,35.0,-10.0,10.0,-4.0,...,-25.0,47.0,6.0,6.0,5.0,13.0,21.0,111.0,15.0,0
2,-19.0,-8.0,-8.0,-8.0,-21.0,-6.0,-79.0,12.0,0.0,5.0,...,-83.0,7.0,7.0,1.0,-8.0,7.0,21.0,114.0,48.0,0
3,2.0,3.0,0.0,2.0,0.0,22.0,106.0,-14.0,-16.0,-2.0,...,-38.0,-11.0,4.0,7.0,11.0,33.0,39.0,119.0,43.0,0
4,6.0,0.0,0.0,-2.0,-14.0,10.0,-51.0,5.0,7.0,0.0,...,38.0,-35.0,-8.0,2.0,6.0,-13.0,-24.0,-112.0,-69.0,0


11678


In [4]:
RANDOM_STATE_DATA = 0
X = df.drop(['gestures'],axis=1)
y = df['gestures']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE_DATA) 


In [5]:
# DecisionTreeClassifier, grid search to find best parameter
RANDOM_STATE_MODEL = 42
clf = KNeighborsClassifier()
param_grid = {'n_neighbors': [5, 10, 15],
              'weights': ['uniform', 'distance']}
CV_clf = GridSearchCV(estimator = clf, param_grid = param_grid, cv =5, verbose = 3)

CV_clf.fit(X_train, y_train)
print(CV_clf.best_params_)
print(CV_clf.best_score_)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] n_neighbors=5, weights=uniform ..................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ...... n_neighbors=5, weights=uniform, score=0.659, total=   1.5s
[CV] n_neighbors=5, weights=uniform ..................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.5s remaining:    0.0s


[CV] ...... n_neighbors=5, weights=uniform, score=0.678, total=   1.4s
[CV] n_neighbors=5, weights=uniform ..................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.9s remaining:    0.0s


[CV] ...... n_neighbors=5, weights=uniform, score=0.669, total=   1.5s
[CV] n_neighbors=5, weights=uniform ..................................
[CV] ...... n_neighbors=5, weights=uniform, score=0.689, total=   1.3s
[CV] n_neighbors=5, weights=uniform ..................................
[CV] ...... n_neighbors=5, weights=uniform, score=0.671, total=   1.5s
[CV] n_neighbors=5, weights=distance .................................
[CV] ..... n_neighbors=5, weights=distance, score=0.667, total=   1.5s
[CV] n_neighbors=5, weights=distance .................................
[CV] ..... n_neighbors=5, weights=distance, score=0.679, total=   1.4s
[CV] n_neighbors=5, weights=distance .................................
[CV] ..... n_neighbors=5, weights=distance, score=0.676, total=   1.5s
[CV] n_neighbors=5, weights=distance .................................
[CV] ..... n_neighbors=5, weights=distance, score=0.695, total=   1.4s
[CV] n_neighbors=5, weights=distance .................................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:   43.6s finished


In [11]:
RANDOM_STATE_MODEL = 42
clf = KNeighborsClassifier(n_neighbors=CV_clf.best_params_['n_neighbors'],
              weights=CV_clf.best_params_['weights']
            )
clf.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=10, p=2,
                     weights='distance')

In [12]:
labels = [0,1,2,3]
label_names = ['rock-0', 'scissors-1', 'paper-2', 'ok-3']

plt.rcParams["figure.figsize"] = (8, 8)
plt.rcParams.update({'font.size': 18})

plot_confusion_matrix(clf, X_train, y_train,labels = labels, display_labels = label_names, cmap=plt.cm.Blues)
plt.title('Confusion Matrix:\n KNN (Train)', fontsize = 20)
plt.tight_layout()
plt.savefig(dir_out +'CM_KNN_train.png')
plt.close()

plot_confusion_matrix(clf, X_test, y_test,labels = labels, display_labels = label_names, cmap=plt.cm.Blues)
plt.title('Confusion Matrix:\n KNN (Test)', fontsize = 20)
plt.tight_layout()
plt.savefig(dir_out +'CM_KNN_test.png')
plt.close()


In [16]:
from plot_learning_curve import plot_learning_curve
from sklearn.model_selection import ShuffleSplit

In [15]:
title =  'Learning Curve of KNN'
cv = ShuffleSplit(n_splits=20, test_size=0.2, random_state=RANDOM_STATE_DATA)

plot_learning_curve(clf, title, X, y, ylim=(0.5, 1.01),
                    cv=cv, n_jobs=4)#, train_sizes = np.linspace(0.1, 1.0, 10))
plt.savefig(dir_out +'LC_KNN.png')
plt.close()