In [1]:
# Importing Libraries

In [4]:
import pandas as pd
import numpy as np

In [5]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

### Data

In [6]:
# Data directory
DATADIR = 'UCI_HAR_Dataset'

In [7]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [8]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).as_matrix()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [9]:

def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()

In [10]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [11]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [12]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [13]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

In [39]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM,Conv1D,BatchNormalization,MaxPooling1D
from keras.layers.wrappers import Bidirectional
from keras.layers.core import Dense, Dropout

In [31]:
# Initializing parameters
epochs = 30
batch_size = 128
n_hidden = 32


In [18]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [19]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

  if sys.path[0] == '':


In [20]:
X_train.shape,Y_train.shape

((7352, 128, 9), (7352, 6))

In [35]:
Y_train[0]

array([0, 0, 0, 0, 1, 0], dtype=uint8)

In [23]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


- Defining the Architecture of LSTM

In [24]:
# Initiliazing the sequential model
model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden,recurrent_dropout= 0.3,return_sequences=True ,input_shape=(timesteps, input_dim)))
model.add(LSTM(n_hidden,recurrent_dropout = 0.3)) #since we added another lstm we got to increase epochs..
# Adding a dropout layer
model.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary() 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128, 32)           5376      
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 198       
Total params: 13,894
Trainable params: 13,894
Non-trainable params: 0
_________________________________________________________________


### THAT'S MORE THAN TWICE THE NUMBER OF MODELS 

# here i wanted to share one idea...We could take the initial layer conv1d ...before passing onto rnn layer i remember trying this out with char rnn when i first heard about them.The intuition is simple:
## -> say in this question we have 128 dimensional vectors laid out temporally ->7352 of them what if we take a simple a linear combination of them say with a kernel(3) this decreases the width....one advantage is now we could use larger rnn's ( decreased parameters due to the above operation.)Most importantly we expect the device recordings to vary smoothly -> extracting features and then passing on to rnn's for sequence information seems to be a good idea thought i haven't seen that widely used....

In [None]:
# model.add(Conv1D(n_conv, k_conv, activation='relu'))
# model.add(MaxPooling1D(mp_size))
# model.add(Bidirectional(LSTM(n_lstm, dropout=drop_lstm)))

In [25]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [36]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128, 32)           5376      
_________________________________________________________________
lstm_2 (LSTM)                (None, 32)                8320      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 198       
Total params: 13,894
Trainable params: 13,894
Non-trainable params: 0
_________________________________________________________________


In [33]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)
model.save('HAR_BASIC.h5')

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [51]:
epochs = 50
mp_size = 2
model2 = Sequential()
n_conv = 80
k_conv = 5
n_lstm = 32
drop_lstm = 0.3

model2.add(Conv1D(n_conv, k_conv, activation='relu',input_shape = (timesteps, input_dim)))
model2.add(MaxPooling1D(mp_size))
model2.add(Bidirectional(LSTM(n_lstm, dropout=drop_lstm)))
model2.add(Dense(n_classes, activation='sigmoid'))

In [52]:

model2.compile(optimizer='Adam',loss = 'categorical_crossentropy',metrics = ['accuracy'])

In [53]:
model2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 124, 80)           3680      
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 62, 80)            0         
_________________________________________________________________
bidirectional_4 (Bidirection (None, 64)                28928     
_________________________________________________________________
dense_5 (Dense)              (None, 6)                 390       
Total params: 32,998
Trainable params: 32,998
Non-trainable params: 0
_________________________________________________________________


In [54]:
# Training the model
model2.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)
model2.save('HAR_advanced.h5')

Train on 7352 samples, validate on 2947 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# I intentionally tried to make the model overfit(train for more epochs...) as it is a general practice to over-fit and  than regularize....(but it seems i could as well run for more epochs).95.88 (better than handcrafted features) is cool enough .....
## AS FAR AS ARCHITECTURE IS CONSIDERED I WON'T PRETEND TO HAVE SOME DEEP INSIGHTNS...IT'S ALMOST ALWAYS LARGELY BASED ON TRYING COUPLE OF SENSIBLE ONES.....BUT AS FAR AS TRAINING METHOD IS CONCERNED
## -> [CYCLICAL LEARNING RATES - LESLIE SMITH](https://arxiv.org/abs/1506.01186)
## ->[SUPER CONVERGANCE](https://arxiv.org/abs/1708.07120)

HERE I WON'T GO INTO THE DETAILS BUT IN LATER NOTEBOOK I WOULD TRY TO IMPLEMENT THEM(AS THEY ARE NOT READILY 
AVAILABLE) IN PYTORCH....BY FAR THESE ARE THE MOST AMAZING(PRACTICAL) PAPERS I HAVE READ....IT'S CRAZY THAT ALMOST NOBODY SEEMS TO TALK ABOUT IT .... THOSE TECHNIQUES COMBINED WITH FEW OTHER HACKS (I WILL SHARE THEM IN LATER NOTEBOOKS) HAVE GIVEN ME BETTER RESULTS(IMDB DATASET) THEN ANY CURRENTLY PUBLISHED ONES..

In [27]:
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [28]:
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))

In [29]:
sess

<tensorflow.python.client.session.Session at 0x7f3ec72fbf98>

In [24]:
print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 512        0        25        0                   0   
SITTING                  3      410        75        0                   0   
STANDING                 0       87       445        0                   0   
WALKING                  0        0         0      481                   2   
WALKING_DOWNSTAIRS       0        0         0        0                 382   
WALKING_UPSTAIRS         0        0         0        2                  18   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            3  
STANDING                           0  
WALKING                           13  
WALKING_DOWNSTAIRS                38  
WALKING_UPSTAIRS                 451  


In [27]:
score = model.evaluate(X_test, Y_test)



In [28]:
score

[0.3087582236972612, 0.9097387173396675]

- With a simple 2 layer architecture we got 90.09% accuracy and a loss of 0.30
- We can further imporve the performace with Hyperparameter tuning