## Human Activity Recognition Using Accelerometer Data

Dataset Link: http://www.cis.fordham.edu/wisdm/dataset.php

The WISDM dataset contains six different labels (Downstairs, Jogging, Sitting, Standing, Upstairs, Walking). 

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
print(tf.__version__)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [None]:
num_activities = 7

In [None]:
import pandas as pd
import glob

train_path = 'C:/Users/uwu/Documents/CMU/2022 Spring/18743/18-743_Project/data'
source_files = glob.glob(train_path + '/*.csv')

dataframes = []
num = 1
for file in source_files:
    df = pd.read_csv(file) # additional arguments up to your needs
    df['user'] = num
    df['time'] = range(0, len(df))
    df = df.rename(columns={'label': 'activity'})
    num = num + 1
    df = df[df.activity != 0]
    dataframes.append(df)

df_all = pd.concat(dataframes, axis=0)
df_all = df_all[['user', 'activity', 'time', 'x', 'y', 'z']]
data = df_all
df_all

In [None]:
#predict_path = 'C:/Users/uwu/Documents/CMU/2022 Spring/18743/18-743_Project/03_a.csv'
predict_path = 'C:/Users/uwu/Documents/CMU/2022 Spring/18743/18-743_Project/data/15.csv'

df_predict = pd.read_csv(predict_path) # additional arguments up to your needs
df_predict['user'] = 16
df_predict['time'] = range(0, len(df_predict))
df_predict = df_predict.rename(columns={'label': 'activity'})
df_predict = df_predict[df_predict.activity != 0]
df_predict = df_predict[['user', 'activity', 'time', 'x', 'y', 'z']]
df_predict


#### Load the Dataset 

In [None]:
columns = ['user', 'activity', 'time', 'x', 'y', 'z']

In [None]:
data.shape

In [None]:
df_predict.shape

### Balance this data 

In [None]:
data['x'] = data['x'].astype('float')
data['y'] = data['y'].astype('float')
data['z'] = data['z'].astype('float')

In [None]:
df_predict['x'] = df_predict['x'].astype('float')
df_predict['y'] = df_predict['y'].astype('float')
df_predict['z'] = df_predict['z'].astype('float')

In [None]:
data.info()

In [None]:
df_predict.info()

In [None]:
Fs = 20

In [None]:
activities = data['activity'].value_counts().index

In [None]:
def plot_activity(activity, data):
    fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, figsize=(15, 7), sharex=True)
    plot_axis(ax0, data['time'], data['x'], 'X-Axis')
    plot_axis(ax1, data['time'], data['y'], 'Y-Axis')
    plot_axis(ax2, data['time'], data['z'], 'Z-Axis')
    plt.subplots_adjust(hspace=0.2)
    fig.suptitle(activity)
    plt.subplots_adjust(top=0.90)
    plt.show()

def plot_axis(ax, x, y, title):
    ax.plot(x, y, 'g')
    ax.set_title(title)
    ax.xaxis.set_visible(False)
    ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
    ax.set_xlim([min(x), max(x)])
    ax.grid(True)

for activity in activities:
    data_for_plot = data[(data['activity'] == activity)][:Fs*10]
    plot_activity(activity, data_for_plot)

In [None]:
df = data.drop(['user', 'time'], axis = 1).copy()
df.head()

In [None]:
df_pre = df_predict.drop(['user', 'time'], axis = 1).copy()
df_pre.head()

In [None]:
balanced_data = pd.DataFrame()

for i in range(1, num_activities+1):
    balanced_data = pd.concat([balanced_data, df[df['activity']==i].head(df['activity'].value_counts().min()).copy()])

print(balanced_data.shape)
print(balanced_data['activity'].value_counts())

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
label = LabelEncoder()
balanced_data['label'] = label.fit_transform(balanced_data['activity'])
balanced_data.head()

In [None]:
df_pre['label'] = label.fit_transform(df_pre['activity'])
df_pre.head()

In [None]:
label.classes_

### Standardized data 

In [None]:
X = balanced_data[['x', 'y', 'z']]
y = balanced_data['label']
X.shape

In [None]:
X_pred = df_pre[['x', 'y', 'z']]
y_pred = df_pre['label']
X_pred.shape

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

scaled_X = pd.DataFrame(data = X, columns = ['x', 'y', 'z'])
scaled_X['label'] = y.values

scaled_X

In [None]:
scaler = StandardScaler()
X_pred = scaler.fit_transform(X_pred)

scaled_X_pred = pd.DataFrame(data = X_pred, columns = ['x', 'y', 'z'])
scaled_X_pred['label'] = y_pred.values

scaled_X_pred

### Frame Preparation 

In [None]:
import scipy.stats as stats

In [None]:
Fs = 20
frame_size = Fs*4 # 80
hop_size = Fs*2 # 40

In [None]:
def get_frames(df, frame_size, hop_size):

    N_FEATURES = 3

    frames = []
    labels = []
    for i in range(0, len(df) - frame_size, hop_size):
        x = df['x'].values[i: i + frame_size]
        y = df['y'].values[i: i + frame_size]
        z = df['z'].values[i: i + frame_size]
        
        # Retrieve the most often used label in this segment
        label = stats.mode(df['label'][i: i + frame_size])[0][0]
        frames.append([x, y, z])
        labels.append(label)

    # Bring the segments into a better shape
    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)

    return frames, labels

In [None]:
X, y = get_frames(scaled_X, frame_size, hop_size)

In [None]:
X_pred, y_pred = get_frames(scaled_X_pred, frame_size, hop_size)

In [None]:
X.shape, y.shape

In [None]:
X_pred.shape, y_pred.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)

In [None]:
X_train.shape, X_test.shape

In [None]:
X_train[0].shape, X_test[0].shape

In [None]:
X_train = X_train.reshape(6686, 80, 3, 1)
X_test = X_test.reshape(1672, 80, 3, 1)

In [None]:
X_train[0].shape, X_test[0].shape

### 2D CNN Model 

In [None]:
model = Sequential()
model.add(Conv2D(16, (2, 2), activation = 'relu', input_shape = X_train[0].shape))
model.add(Dropout(0.1))

model.add(Conv2D(32, (2, 2), activation='relu'))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))

model.add(Dense(7, activation='softmax'))


In [None]:
model.compile(optimizer=Adam(learning_rate = 0.001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs = 10, validation_data= (X_test, y_test), verbose=1)

In [None]:
def plot_learningCurve(history, epochs):
  # Plot training & validation accuracy values
  epoch_range = range(1, epochs+1)
  plt.plot(epoch_range, history.history['accuracy'])
  plt.plot(epoch_range, history.history['val_accuracy'])
  plt.title('Model accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Val'], loc='upper left')
  plt.show()

  # Plot training & validation loss values
  plt.plot(epoch_range, history.history['loss'])
  plt.plot(epoch_range, history.history['val_loss'])
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Val'], loc='upper left')
  plt.show()

In [None]:
plot_learningCurve(history, 10)

### Confusion Matrix 

In [None]:
y_p = model.predict(X_test)
y_classes = y_p.argmax(axis=-1)
y_classes

In [None]:
y_predict_1 = model.predict(X_pred)
y_predict_classes  = y_predict_1.argmax(axis=-1)
y_predict_classes

In [None]:
from sklearn.metrics import confusion_matrix

#Generate the confusion matrix

cf_matrix = confusion_matrix(y_test, y_classes)

num1 = 0

for i in range(len(y_classes)):
    if (y_classes[i] != y_test[i]):
        num1 = num1 + 1
print(1-num1/len(y_classes))

print(cf_matrix)

In [None]:
cf_matrix = confusion_matrix(y_pred, y_predict_classes)

num1 = 0

for i in range(len(y_predict_classes)):
    if (y_pred[i] != y_predict_classes[i]):
        num1 = num1 + 1
print(1-num1/len(y_predict_classes))

print(cf_matrix)

In [None]:
model.save_weights('model.h5')