In [5]:
import pandas as pd
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

test_activity = test['Activity']
train_activity = train['Activity']
train = train.drop(['subject','Activity'],axis=1)
test = test.drop(['subject','Activity'],axis=1)
#print(data)
filter_train = train.filter(regex='meanFreq')
filter_test = test.filter(regex='meanFreq')
train = train.drop(filter_train,axis=1)
test = test.drop(filter_test,axis=1)
train=train.filter(regex='mean|std')
test=test.filter(regex='mean|std')

In [6]:
from sklearn.preprocessing import StandardScaler as scaler
scaler = scaler().fit(train)
train = scaler.transform(train)
test = scaler.transform(test)
#print(data)
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder().fit(['STANDING','SITTING','LAYING','WALKING','WALKING_UPSTAIRS','WALKING_DOWNSTAIRS'])
test_activity_enc = le.transform(test_activity)
train_activity_enc = le.transform(train_activity)
#print(test_activity_enc)

In [7]:
from sklearn.svm import SVC
model = SVC(kernel='linear')
model.fit(train, train_activity_enc)
prediction = model.predict(test)


In [8]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

result = confusion_matrix(prediction,test_activity_enc)
print("Confusion Matrix:")
print(result)

report = classification_report(prediction,test_activity_enc)
print(report)

print('Accuracy score is ', accuracy_score(prediction,test_activity_enc))

Confusion Matrix:
[[537   1   0   0   0   0]
 [  0 398  33   0   0   0]
 [  0  92 499   0   0   0]
 [  0   0   0 489   2  43]
 [  0   0   0   6 384  15]
 [  0   0   0   1  34 413]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       538
           1       0.81      0.92      0.86       431
           2       0.94      0.84      0.89       591
           3       0.99      0.92      0.95       534
           4       0.91      0.95      0.93       405
           5       0.88      0.92      0.90       448

    accuracy                           0.92      2947
   macro avg       0.92      0.93      0.92      2947
weighted avg       0.93      0.92      0.92      2947

Accuracy score is  0.9229725144214456


In [9]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
kf = KFold(n_splits=20,)
svm = SVC(kernel='linear')
score = cross_val_score(svm,train,train_activity_enc,cv=kf)


In [10]:
print(score)
print(score.mean())

[0.98369565 0.93206522 0.92934783 0.87228261 0.91032609 0.98097826
 0.70108696 0.98097826 0.73641304 0.96195652 0.91032609 0.97826087
 0.96730245 0.95640327 0.8719346  0.94822888 1.         0.89373297
 0.92643052 0.96730245]
0.9204526270584055


In [11]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier()
score = cross_val_score(classifier,train,train_activity_enc,cv=kf)
print(score)
print(score.mean())

[0.95108696 0.89673913 0.92663043 0.9076087  0.95108696 0.92934783
 0.8125     0.98369565 0.85326087 0.94293478 0.88586957 0.80163043
 0.95912807 0.91280654 0.82288828 0.96730245 0.98092643 0.90190736
 0.96185286 0.94822888]
0.9148716088141216


In [12]:
from sklearn.ensemble import ExtraTreesClassifier
classifier = ExtraTreesClassifier()
score = cross_val_score(classifier,train,train_activity_enc,cv=kf)
print(score)
print(score.mean())

[0.95108696 0.92663043 0.92119565 0.87228261 0.9375     0.92391304
 0.8125     0.98369565 0.89673913 0.96195652 0.87228261 0.9701087
 0.91280654 0.91280654 0.83923706 0.95640327 0.98910082 0.92915531
 0.97547684 0.92915531]
0.9237016496860561


In [13]:
from sklearn.tree import DecisionTreeClassifier as dtc
dt = dtc()
score = cross_val_score(dt,train,train_activity_enc,cv=kf)

In [14]:
print(score)
print(score.mean())

[0.89945652 0.86684783 0.85869565 0.8125     0.8423913  0.87771739
 0.66576087 0.91032609 0.69021739 0.83695652 0.8451087  0.76358696
 0.91008174 0.78474114 0.77384196 0.88555858 0.9373297  0.83106267
 0.85558583 0.8719346 ]
0.8359850728586661


In [30]:
from sklearn.model_selection import RandomizedSearchCV as rondomcv
import numpy as np
model = ExtraTreesClassifier()
parameters = {'n_estimators':np.arange(20,220,20),'min_samples_split':np.arange(2,12,2)}
search = rondomcv(model,param_distributions=parameters, n_iter=30,n_jobs=4)
search.fit(train,train_activity_enc)
print(search.best_score_)
print(search.best_estimator_)

0.9269617133053085
ExtraTreesClassifier(min_samples_split=6, n_estimators=140)


In [31]:
xt = search.best_estimator_
xt.fit(train,train_activity_enc)
prediction = xt.predict(test)

In [32]:
print("Accuracy is : ", accuracy_score(prediction,test_activity_enc))

Accuracy is :  0.9338310145911096


In [33]:
print(np.arange(20,210,20))

[ 20  40  60  80 100 120 140 160 180 200]
