# Human activity detector by smartphone sensor readings

In [1]:
#importing dependancies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

## Training and Testing data

In [2]:
#Training data
X_train = pd.read_csv("Dataset/Train/X_train.txt", sep = " ", header = None)
y_train = pd.read_csv("Dataset/Train/y_train.txt", sep = " ", header = None)

#Testing data
X_test = pd.read_csv("Dataset/Test/X_test.txt", sep = " ", header = None)
y_test = pd.read_csv("Dataset/Test/y_test.txt", sep = " ", header = None)

#activity labels
y_labels = pd.read_csv("Dataset/activity_labels.txt", header = None)

#feature labels
features = pd.read_csv("Dataset/features.txt", header = None)

In [3]:
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,551,552,553,554,555,556,557,558,559,560
0,0.04358,-0.00597,-0.035054,-0.995381,-0.988366,-0.937382,-0.995007,-0.988816,-0.953325,-0.794796,...,-0.012236,-0.314848,-0.713308,-0.112754,0.0304,-0.464761,-0.018446,-0.841559,0.179913,-0.051718
1,0.03948,-0.002131,-0.029067,-0.998348,-0.982945,-0.971273,-0.998702,-0.983315,-0.974,-0.802537,...,0.202804,-0.603199,-0.860677,0.053477,-0.007435,-0.732626,0.703511,-0.845092,0.180261,-0.047436
2,0.039978,-0.005153,-0.022651,-0.995482,-0.977314,-0.98476,-0.996415,-0.975835,-0.985973,-0.798477,...,0.440079,-0.404427,-0.761847,-0.118559,0.177899,0.100699,0.808529,-0.84923,0.18061,-0.042271
3,0.039785,-0.011809,-0.028916,-0.996194,-0.988569,-0.993256,-0.996994,-0.988526,-0.993135,-0.798477,...,0.430891,-0.138373,-0.491604,-0.036788,-0.012892,0.640011,-0.485366,-0.848947,0.181907,-0.040826
4,0.038758,-0.002289,-0.023863,-0.998241,-0.986774,-0.993115,-0.998216,-0.986479,-0.993825,-0.801982,...,0.137735,-0.366214,-0.70249,0.12332,0.122542,0.693578,-0.615971,-0.848164,0.185124,-0.03708


In [4]:
X_train.shape

(7767, 561)

In [5]:
X_test.shape

(3162, 561)

## Random Forest Classifier

In [6]:
from sklearn.ensemble import RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators = 50)

In [7]:
#fitting the training data into random forest classifier
rf_classifier.fit(X_train, y_train.values.ravel())

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [8]:
print("Training Data Score: {}".format(rf_classifier.score(X_train, y_train)))
print("Testing Data Score: {}".format(rf_classifier.score(X_test, y_test)))

Training Data Score: 1.0
Testing Data Score: 0.9067046173308033


In [9]:
predictions = rf_classifier.predict(X_test)
print(f"First 10 Predictions:   {predictions[:10]}")
print(f"First 10 Actual labels: {y_test[:10]}")

First 10 Predictions:   [5 5 5 5 5 5 5 5 5 5]
First 10 Actual labels:    0
0  5
1  5
2  5
3  5
4  5
5  5
6  5
7  5
8  5
9  5


In [10]:
# Classification metrics
from sklearn.metrics import classification_report
y_pred = rf_classifier.predict(X_test)
target_names = np.arange(1,13)

print(classification_report(y_test.values, y_pred))

              precision    recall  f1-score   support

           1       0.88      0.97      0.92       496
           2       0.89      0.89      0.89       471
           3       0.96      0.86      0.91       420
           4       0.89      0.87      0.88       508
           5       0.88      0.91      0.90       556
           6       1.00      1.00      1.00       545
           7       0.74      0.74      0.74        23
           8       0.91      1.00      0.95        10
           9       0.70      0.88      0.78        32
          10       0.67      0.64      0.65        25
          11       0.79      0.61      0.69        49
          12       0.64      0.52      0.57        27

    accuracy                           0.91      3162
   macro avg       0.83      0.82      0.82      3162
weighted avg       0.91      0.91      0.91      3162



## Naive Bayes

In [11]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import ComplementNB
from sklearn.naive_bayes import BernoulliNB

In [12]:
#Gaussian Naive Bayes
gnb = GaussianNB()
gnb.fit(X_train, y_train)
y_pred_gnb = gnb.predict(X_test)
print(classification_report(y_test, y_pred_gnb))

              precision    recall  f1-score   support

           1       0.83      0.84      0.83       496
           2       0.78      0.94      0.85       471
           3       0.83      0.61      0.70       420
           4       0.55      0.90      0.68       508
           5       0.86      0.40      0.54       556
           6       1.00      0.86      0.92       545
           7       0.27      0.65      0.38        23
           8       0.53      0.90      0.67        10
           9       0.52      0.75      0.62        32
          10       0.41      0.84      0.55        25
          11       0.63      0.55      0.59        49
          12       0.53      0.30      0.38        27

    accuracy                           0.75      3162
   macro avg       0.64      0.71      0.64      3162
weighted avg       0.79      0.75      0.74      3162



  y = column_or_1d(y, warn=True)


In [13]:
#Multinomial Naive Bayes
#Transform data from [-1, 1] to [0, 1]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

mnb = MultinomialNB()
mnb.fit(X_train_scaled, y_train)
y_pred_mnb = mnb.predict(X_test_scaled)
print(classification_report(y_test, y_pred_mnb))

              precision    recall  f1-score   support

           1       0.77      0.57      0.65       496
           2       0.82      0.55      0.66       471
           3       0.51      0.89      0.65       420
           4       0.81      0.72      0.77       508
           5       0.79      0.81      0.80       556
           6       1.00      0.96      0.98       545
           7       0.41      0.61      0.49        23
           8       0.82      0.90      0.86        10
           9       0.44      0.56      0.49        32
          10       0.32      0.48      0.39        25
          11       0.60      0.57      0.58        49
          12       0.56      0.67      0.61        27

    accuracy                           0.75      3162
   macro avg       0.65      0.69      0.66      3162
weighted avg       0.78      0.75      0.75      3162



  y = column_or_1d(y, warn=True)


In [14]:
#Complement Naive Bayes
#Transform data from [-1, 1] to [0, 1]
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

cnb = ComplementNB()
cnb.fit(X_train_scaled, y_train)
y_pred_cnb = cnb.predict(X_test_scaled)
print(classification_report(y_test, y_pred_cnb))

              precision    recall  f1-score   support

           1       0.38      0.01      0.01       496
           2       0.02      0.00      0.00       471
           3       0.28      1.00      0.44       420
           4       1.00      0.00      0.01       508
           5       0.65      0.98      0.78       556
           6       0.71      1.00      0.83       545
           7       0.00      0.00      0.00        23
           8       0.00      0.00      0.00        10
           9       0.00      0.00      0.00        32
          10       0.00      0.00      0.00        25
          11       0.00      0.00      0.00        49
          12       0.00      0.00      0.00        27

    accuracy                           0.48      3162
   macro avg       0.25      0.25      0.17      3162
weighted avg       0.50      0.48      0.34      3162



  y = column_or_1d(y, warn=True)
  'precision', 'predicted', average, warn_for)


In [15]:
#Bernoulli Naive Bayes
bnb = BernoulliNB()
bnb.fit(X_train, y_train)
y_pred_bnb = bnb.predict(X_test)
print(classification_report(y_test, y_pred_bnb))

              precision    recall  f1-score   support

           1       0.79      0.84      0.82       496
           2       0.77      0.93      0.84       471
           3       0.84      0.60      0.70       420
           4       0.87      0.74      0.80       508
           5       0.80      0.87      0.83       556
           6       1.00      0.98      0.99       545
           7       0.41      0.83      0.55        23
           8       0.62      0.80      0.70        10
           9       0.51      0.78      0.62        32
          10       0.58      0.72      0.64        25
          11       0.62      0.37      0.46        49
          12       0.67      0.59      0.63        27

    accuracy                           0.82      3162
   macro avg       0.71      0.75      0.71      3162
weighted avg       0.83      0.82      0.82      3162



  y = column_or_1d(y, warn=True)


## Deep Neural Network

### Pre-processing

In [16]:
# Convert the dataframe to a numpy array for Keras
X_train_arr = X_train.values
X_test_arr = X_test.values

# One-hot encoding the labels
y_train_arr =  to_categorical(y_train)
y_test_arr = to_categorical(y_test)

In [17]:
# An empty sequential model
model = Sequential()

In [18]:
# add the first layer of training data
model.add(Dense(100, activation="relu", input_dim = X_train_arr.shape[1]))

Instructions for updating:
Colocations handled automatically by placer.


In [19]:
# Adding a dropout layer to prevent overfitting
from tensorflow.keras.layers import Dropout

model.add(Dropout(0.5))

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [20]:
# Adding second hidden layer
model.add(Dense(100, activation = "relu"))

In [21]:
# Adding third hidden layer
# model.add(Dense(100, activation = "relu"))

In [22]:
# Add output layer
model.add(Dense(y_train_arr.shape[1], activation = "softmax"))

In [23]:
# Compile the model
# Using categorical_crossentropy for the loss function, adam optimizer and accuracy metric

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics = ["accuracy"])

In [24]:
# Train the model
model.fit(X_train_arr, y_train_arr, epochs = 100, shuffle = True, verbose =2)

Instructions for updating:
Use tf.cast instead.
Epoch 1/100
 - 2s - loss: 0.8964 - acc: 0.6401
Epoch 2/100
 - 1s - loss: 0.4497 - acc: 0.8199
Epoch 3/100
 - 1s - loss: 0.3552 - acc: 0.8593
Epoch 4/100
 - 1s - loss: 0.2892 - acc: 0.8857
Epoch 5/100
 - 1s - loss: 0.2625 - acc: 0.8971
Epoch 6/100
 - 1s - loss: 0.2423 - acc: 0.9007
Epoch 7/100
 - 1s - loss: 0.2133 - acc: 0.9127
Epoch 8/100
 - 1s - loss: 0.2342 - acc: 0.9051
Epoch 9/100
 - 1s - loss: 0.2143 - acc: 0.9139
Epoch 10/100
 - 1s - loss: 0.1912 - acc: 0.9191
Epoch 11/100
 - 2s - loss: 0.1987 - acc: 0.9185
Epoch 12/100
 - 1s - loss: 0.2047 - acc: 0.9226
Epoch 13/100
 - 2s - loss: 0.1882 - acc: 0.9209
Epoch 14/100
 - 2s - loss: 0.1688 - acc: 0.9316
Epoch 15/100
 - 1s - loss: 0.1785 - acc: 0.9287
Epoch 16/100
 - 2s - loss: 0.1737 - acc: 0.9284
Epoch 17/100
 - 2s - loss: 0.1641 - acc: 0.9360
Epoch 18/100
 - 1s - loss: 0.1693 - acc: 0.9333
Epoch 19/100
 - 2s - loss: 0.1653 - acc: 0.9340
Epoch 20/100
 - 2s - loss: 0.1686 - acc: 0.9289
E

<tensorflow.python.keras.callbacks.History at 0x1a4a552dd8>

In [25]:
# save the trained model
model.save("deep_learning_model.h5")

### Evaluate the model with test data

In [26]:
# Load the saved model
from tensorflow.keras.models import load_model
model = load_model("deep_learning_model.h5")

In [27]:
# Load the saved model
from tensorflow.keras.models import load_model
model = load_model("deep_learning_model.h5")

In [28]:
# Evaluate
model_loss, model_accuracy = model.evaluate(X_test_arr, y_test_arr, verbose =3)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Loss: 0.24422230586862947, Accuracy: 0.9307400584220886


In [29]:
# Run this cell to specify the row numbers you want to test/compare

# Predict the class of given data

def predictor(data):
    return model.predict_classes(data)

# Decoding the label integer into its actual class name
def decoder(num):
    return y_labels[0][int(num)].strip().split()[1]

def compare(row_num):
    test_data = np.expand_dims(X_test_arr[row_num], axis = 0)
    print (f" OBSERVED: {decoder(y_test.iloc[row_num,0])}\t\t   PREDICTED: {decoder(predictor(test_data))}")


begin = int(input("Which row you want to start from?(Range 1-3162) \n"))-1
end = int(input("Which row you want it to end?\n"))
for i in range(begin, end):
    compare(i)

Which row you want to start from?(Range 1-3162) 
100
Which row you want it to end?
200
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WALKING_UPSTAIRS		   PREDICTED: WALKING_UPSTAIRS
 OBSERVED: WA