References:
1. https://machinelearningmastery.com/how-to-model-human-activity-from-smartphone-data/
2. https://machinelearningmastery.com/evaluate-machine-learning-algorithms-for-human-activity-recognition/


## Step 1: Import Necessary Libraries

In [11]:

# spot check on engineered-features
from pandas import read_csv
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier

## Step 2: Load Dataset

In [4]:
# load dataset
from numpy import dstack
from pandas import read_csv

# load a single file as a numpy array
def load_file(filepath):
    dataframe = read_csv(filepath, header=None, delim_whitespace=True)
    return dataframe.values
 
def load_dataset_group(group, prefix=''):
    # load input data
    X = load_file(prefix + group + '/X_'+group+'.txt')
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.txt')
    return X, y

In [7]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
    # load all train
    X_train, y_train = load_dataset_group('train', prefix + 'HARDataset/')
    print(X_train.shape, y_train.shape)
    # load all test
    X_test, y_test = load_dataset_group('test', prefix + 'HARDataset/')
    print(X_test.shape, y_test.shape)
    # flatten y
    y_train, y_test = y_train[:,0], y_test[:,0]
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
    return X_train, y_train, X_test, y_test

# load dataset
X_train, y_train, X_test, y_test = load_dataset()
# load all train
print(X_train.shape, y_train.shape)
# load all test
print(X_test.shape, y_test.shape)

(7352, 561) (7352, 1)
(2947, 561) (2947, 1)
(7352, 561) (7352,) (2947, 561) (2947,)
(7352, 561) (7352,)
(2947, 561) (2947,)


## Step 3: Using different models

## Step 5. SVM model

In [15]:
# SVM model (Dan Li)
from sklearn import svm
from pandas import read_csv
import time
from sklearn.metrics import confusion_matrix,accuracy_score, recall_score, precision_score, f1_score
def svm_model(trainX, trainy, testX, testy):
    # Step1: finding best params
    # ml = svm.SVC()
    # # setting different kernel by change the value of parameter 'kernel'
    # param_grid = {'C': [1, 10, 100, 1000, 10000],
    #               'kernel': ['sigmoid']}
    # grid = GridSearchCV(ml, param_grid, refit=True, verbose=1, cv=15)
    # # fitting the model for grid search
    # grid_search = grid.fit(trainX, trainy)
    # best_params = grid_search.best_params_
    # print(grid_search.best_params_)


    # Step2: train and predict model by best params
    # setting different kernel by change the value of parameter 'kernel'
    model = svm.SVC(kernel='linear', C=1)
    model.fit(trainX, trainy)
    train_y_predict = model.predict(trainX)
    print("training accuracy: ", end=" ")
    print(accuracy_score(trainy, train_y_predict))
    print("training recall: ", end=" ")
    print(recall_score(trainy, train_y_predict, average='macro'))
    print("training precision: ", end=" ")
    print(precision_score(trainy, train_y_predict, average='macro'))
    print("training f1 score: ", end=" ")
    print(f1_score(trainy, train_y_predict, average='macro'))
    print("___________________________________________________________________________________")
    test_y_predict = model.predict(testX)
    print("test accuracy: ", end=" ")
    print(accuracy_score(testy, test_y_predict))
    print("test recall: ", end=" ")
    print(recall_score(testy, test_y_predict, average='macro'))
    print("test precision: ", end=" ")
    print(precision_score(testy, test_y_predict, average='macro'))
    print("test f1 score: ", end=" ")
    print(f1_score(testy, test_y_predict, average='macro'))

    # Step3: plot confusion matrix
    # cm = confusion_matrix(testy, test_y_predict, labels=model.classes_)
    # disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=model.classes_)
    # disp.plot()
    # plt.show()
start = time.time()
svm_model(trainX, trainy, testX, testy)
end = time.time()
print("The time of execution of above program is :", end - start)

training accuracy:  0.9938792165397171
training recall:  0.9943671744302657
training precision:  0.9943439381310409
training f1 score:  0.9943551382036183
___________________________________________________________________________________
test accuracy:  0.9640312181879878
test recall:  0.9635891623206801
test precision:  0.9658952454847611
test f1 score:  0.9641983433163016
The time of execution of above program is : 5.095134973526001


In [2]:
# Logistic Regression Model Fitting
from sklearn.linear_model import LogisticRegression

lr_clf = LogisticRegression(solver='liblinear', random_state=0)
lr_clf.fit(X_train, trainy)

train_y_predict = lr_clf.predict(X_train)
print("training accuracy: ", end=" ")
print(accuracy_score(trainy, train_y_predict))
print("training recall: ", end=" ")
print(recall_score(trainy, train_y_predict, average='macro'))
print("training precision: ", end=" ")
print(precision_score(trainy, train_y_predict, average='macro'))
print("training f1 score: ", end=" ")
print(f1_score(trainy, train_y_predict, average='macro'))
print("___________________________________________________________________________________")
test_y_predict = lr_clf.predict(testX)
print("test accuracy: ", end=" ")
print(accuracy_score(testy, test_y_predict))
print("test recall: ", end=" ")
print(recall_score(testy, test_y_predict, average='macro'))
print("test precision: ", end=" ")
print(precision_score(testy, test_y_predict, average='macro'))
print("test f1 score: ", end=" ")
print(f1_score(testy, test_y_predict, average='macro'))

NameError: name 'trainX' is not defined

In [1]:
from sklearn.metrics import classification_report
import numpy as np
target_names = ["WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS", "SITTING","STANDING","LAYING"]

print(classification_report(testy, test_y_predict, target_names=target_names))

NameError: name 'classification_report' is not defined

In [None]:
# create a dict of standard models to evaluate {name:object}
def define_models(models=dict()):
    # nonlinear models
    models['knn'] = KNeighborsClassifier(n_neighbors=7)
    models['cart'] = DecisionTreeClassifier()
    models['svm'] = SVC()
    models['bayes'] = GaussianNB()
    # ensemble models
    models['bag'] = BaggingClassifier(n_estimators=100)
    models['rf'] = RandomForestClassifier(n_estimators=100)
    models['et'] = ExtraTreesClassifier(n_estimators=100)
    models['gbm'] = GradientBoostingClassifier(n_estimators=100)
    print('Defined %d models' % len(models))
    return models

In [None]:
# evaluate a single model
def evaluate_model(trainX, trainy, testX, testy, model):
    # fit the model
    model.fit(trainX, trainy)
    # make predictions
    yhat = model.predict(testX)
    # evaluate predictions
    accuracy = accuracy_score(testy, yhat)
    return accuracy * 100.0

In [None]:
# evaluate a dict of models {name:object}, returns {name:score}
def evaluate_models(trainX, trainy, testX, testy, models):
    results = dict()
    for name, model in models.items():
        # evaluate the model
        results[name] = evaluate_model(trainX, trainy, testX, testy, model)
        # show process
        print('>%s: %.3f' % (name, results[name]))
    return results

In [None]:
# print and plot the results
def summarize_results(results, maximize=True):
    # create a list of (name, mean(scores)) tuples
    mean_scores = [(k,v) for k,v in results.items()]
    # sort tuples by mean score
    mean_scores = sorted(mean_scores, key=lambda x: x[1])
    # reverse for descending order (e.g. for accuracy)
    if maximize:
        mean_scores = list(reversed(mean_scores))
    print()
    for name, score in mean_scores:
        print('Name=%s, Score=%.3f' % (name, score))

In [None]:
# get model list
models = define_models()
# evaluate models
results = evaluate_models(trainX, trainy, testX, testy, models)
# summarize results
summarize_results(results)