In [1]:
import pandas as pd
import json
import sklearn.svm as svm
from sklearn.linear_model import Lasso, LassoCV
from sklearn.feature_selection import SelectFromModel
import numpy as np
from sklearn.neural_network import MLPClassifier
import script as sc
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score

In [2]:
## Change the name to get your path
user = 'Hans'

## Build test and train Datasets

In [3]:
data = pd.DataFrame(sc.get_complete(user))

In [4]:
copy = data[['mean.vm','sd.vm','mean.ang','sd.ang','p625','dfreq','ratio.df']].copy(deep=True)
copy.loc[-1] = copy.loc[0]  # adding a row
copy.index = copy.index + 1  # shifting index
copy.sort_index(inplace=True)
copy.columns = 'last.' + copy.columns
data = pd.concat([data, copy], axis = 1)
data = data.drop(data.index[len(data)-1])

In [5]:
trainX, trainY, testX, testY = sc.get_test_train(data, 0)

In [8]:
dataH = data[data['type'].str.contains("H")]
dataE = data[data['type'].str.contains("E")]
dataL = data[data['type'].str.contains("L")]
dataA = data[data['type'].str.contains("A")]
dataW = data[data['type'].str.contains("W")]

In [9]:
trainXH, trainYH, testXH, testYH = sc.get_test_train(dataH, 0)
trainXE, trainYE, testXE, testYE = sc.get_test_train(dataE, 0)
trainXL, trainYL, testXL, testYL = sc.get_test_train(dataL, 0)
trainXA, trainYA, testXA, testYA = sc.get_test_train(dataA, 0)
trainXW, trainYW, testXW, testYW = sc.get_test_train(dataW, 0)

## Angle Cut Point Method

In [25]:
## create the cut point
def cut_point_classify(angle):
    if angle > -15 and angle < 15:
        return 'sedentary'
    else:
        return 'non-sedentary'

In [67]:
## use the cut point
pred = data['mean.ang'].apply(cut_point_classify)
## find accuracy
np.mean(pred == data['coding'])

0.51798949632894087

In [71]:
## use the cut point
predH = dataH['mean.ang'].apply(cut_point_classify)
## find accuracy
np.mean(predH == dataH['coding'])

0.74814402375649591

In [72]:
## use the cut point
predE = dataE['mean.ang'].apply(cut_point_classify)
## find accuracy
np.mean(predE == dataE['coding'])

0.61997719655875339

In [74]:
## use the cut point
predL = dataL['mean.ang'].apply(cut_point_classify)
## find accuracy
np.mean(predL == dataL['coding'])

0.2467454528667713

In [73]:
## use the cut point
predA = dataA['mean.ang'].apply(cut_point_classify)
## find accuracy
np.mean(predA == dataA['coding'])

0.61755601755601752

In [75]:
## use the cut point
predW = dataW['mean.ang'].apply(cut_point_classify)
## find accuracy
np.mean(predW == dataW['coding'])

0.31853438020461577

## SVM

In [8]:
clf = svm.SVC(cache_size=7000)

In [9]:
clf.fit(trainX,trainY)

SVC(C=1.0, cache_size=7000, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [10]:
clf.score(testXH,testYH)

0.76466221232368226

In [11]:
clf.score(testXE,testYE)

0.66155334438916524

In [12]:
clf.score(testXL,testYL)

0.57034711964549478

In [13]:
clf.score(testXA,testYA)

0.81892091648189214

In [14]:
clf.score(testXW,testYW)

0.59935287400076132

In [15]:
clf.score(testX,testY)

0.78707542240027872

## Neural Network

In [None]:
n_net = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(2000, 1000), random_state=1, verbose = True)
n_net.fit(trainX,trainY)

In [None]:
np.sum(n_net.predict(testXH) == testYH) / len(testXH)

In [None]:
np.sum(n_net.predict(testXH) == testYE) / len(testXE)

In [None]:
np.sum(n_net.predict(testXH) == testYL) / len(testXL)

In [None]:
np.sum(n_net.predict(testXA) == testYA) / len(testXA)

In [None]:
np.sum(n_net.predict(testXH) == testYH) / len(testXH)

In [8]:
predictedVals = n_net.predict(testX)
print (confusion_matrix(testY,predictedVals,labels = ['sedentary','non-sedentary']))
f1_score(testY == 'sedentary', predictedVals == 'sedentary',labels = ['sedentary','non-sedentary'])

[[ 9444  3302]
 [ 2096 13863]]


0.777732026682039

## Random Forest

In [16]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(max_depth=5, random_state=0)
rf.fit(trainX,trainY)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=5, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=0, verbose=0, warm_start=False)

In [17]:
rf.score(testXH,testYH)

0.7548255382331106

In [18]:
rf.score(testXE,testYE)

0.66680486456605859

In [19]:
rf.score(testXL,testYL)

0.34490398818316098

In [20]:
rf.score(testXA,testYA)

0.7889874353288987

In [21]:
rf.score(testXW,testYW)

0.59326227636086792

In [22]:
rf.score(testX,testY)

0.77672879289322416

## KNN

In [23]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=200)
knn.fit(trainX,trainY)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=200, p=2,
           weights='uniform')

In [24]:
knn.score(testXH,testYH)

0.75668151447661469

In [25]:
knn.score(testXE,testYE)

0.65823659480375896

In [26]:
knn.score(testXL,testYL)

0.61983013293943867

In [27]:
knn.score(testXA,testYA)

0.81596452328159641

In [28]:
knn.score(testXW,testYW)

0.57689379520365436

In [30]:
knn.score(testX,testY)

0.78892179062881029