In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from glob import glob
import os

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [3]:
################ function to read data ################

def prepare_data(fname):
    """ read and prepare training data """
    # Read data
    data = pd.read_csv(fname)
    # events file
    events_fname = fname.replace('_data','_events')
    # read event file
    labels= pd.read_csv(events_fname)
    clean=data.drop(['id' ], axis=1)#remove id
    labels=labels.drop(['id' ], axis=1)#remove id
    return  clean,labels


scaler= StandardScaler()

def data_preprocess(X):
    X_prep=scaler.fit_transform(X)
    return X_prep

In [4]:
################ columns name for labels ################

cols = ['HandStart','FirstDigitTouch',
        'BothStartLoadPhase','LiftOff',
        'Replace','BothReleased']

In [5]:
################ number of subjects ################

train_subjects = range(1,11)
test_subjects = range(11,13)


In [None]:
for subject in train_subjects:
    y_raw= []
    raw = []
    
    ################ READ DATA ################
    
    fnames =  glob('./train/subj%d_series*_data.csv' % (subject))
    
    for fname in fnames:
      data,labels=prepare_data(fname)
      raw.append(data)
      y_raw.append(labels)

    X = pd.concat(raw)
    y = pd.concat(y_raw)

    X_train =np.asarray(X.astype(float))
    y = np.asarray(y.astype(float))

for subject in test_subjects:
    y_raw= []
    raw = []
    
    ################ READ DATA ################
    
    fnames =  glob('./train/subj%d_series*_data.csv' % (subject))
    for fname in fnames:
      data,labels=prepare_data(fname)
      raw.append(data)
      y_raw.append(labels)

    X_test = pd.concat(raw)
    y_test = pd.concat(y_raw)

    X_test =np.asarray(X_test.astype(float))
    y_test = np.asarray(y_test.astype(float))


################ Train classifiers ################

lr = KNeighborsClassifier()
X_train=data_preprocess(X_train)
X_test=data_preprocess(X_test)

for i in range(6):
    y_train_subj = y[:,i]
    y_test_subj = y_test[:,i]
    
    lr.fit(X_train[::],y_train_subj[::])
    
    y_train_pred = lr.predict_proba(X_train)[:,1]
    y_test_pred = lr.predict_proba(X_test)[:,1]
    
    print("\nTrain accuracy for " + cols[i] + " : " + str(1-mean_squared_error(y_train_subj, y_train_pred)))
    print("Test accuracy for " + cols[i] + " : " + str(1-mean_squared_error(y_test_subj, y_test_pred)))
